From 92b4418c8c03a7737764af6c7f19549297ded977 Mon Sep 17 00:00:00 2001 From: Kazuki Maeda Date: Mon, 10 Jul 2023 17:27:55 +0900 Subject: [PATCH] Datadog logs loader (#7356) ### Description Created a Loader to get a list of specific logs from Datadog Logs. ### Dependencies `datadog_api_client` is required. ### Twitter handle [kzk_maeda](https://twitter.com/kzk_maeda) --------- Co-authored-by: Bagatur --- .../ecosystem/integrations/datadog_logs.mdx | 19 +++ .../integrations/datadog_logs.ipynb | 96 ++++++++++++ langchain/document_loaders/__init__.py | 2 + langchain/document_loaders/datadog_logs.py | 137 ++++++++++++++++++ 4 files changed, 254 insertions(+) create mode 100644 docs/extras/ecosystem/integrations/datadog_logs.mdx create mode 100644 docs/extras/modules/data_connection/document_loaders/integrations/datadog_logs.ipynb create mode 100644 langchain/document_loaders/datadog_logs.py diff --git a/docs/extras/ecosystem/integrations/datadog_logs.mdx b/docs/extras/ecosystem/integrations/datadog_logs.mdx new file mode 100644 index 00000000000..4da581b7c6b --- /dev/null +++ b/docs/extras/ecosystem/integrations/datadog_logs.mdx @@ -0,0 +1,19 @@ +# Datadog Logs + +>[Datadog](https://www.datadoghq.com/) is a monitoring and analytics platform for cloud-scale applications. + +## Installation and Setup + +```bash +pip install datadog_api_client +``` + +We must initialize the loader with the Datadog API key and APP key, and we need to set up the query to extract the desired logs. + +## Document Loader + +See a [usage example](/docs/modules/data_connection/document_loaders/integrations/datadog_logs.html). + +```python +from langchain.document_loaders import DatadogLogsLoader +``` diff --git a/docs/extras/modules/data_connection/document_loaders/integrations/datadog_logs.ipynb b/docs/extras/modules/data_connection/document_loaders/integrations/datadog_logs.ipynb new file mode 100644 index 00000000000..86fd433b9b8 --- /dev/null +++ b/docs/extras/modules/data_connection/document_loaders/integrations/datadog_logs.ipynb @@ -0,0 +1,96 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Datadog Logs\n", + "\n", + ">[Datadog](https://www.datadoghq.com/) is a monitoring and analytics platform for cloud-scale applications.\n", + "\n", + "This loader fetches the logs from your applications in Datadog using the `datadog_api_client` Python package. You must initialize the loader with your `Datadog API key` and `APP key`, and you need to pass in the query to extract the desired logs." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from langchain.document_loaders import DatadogLogsLoader" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#!pip install datadog-api-client" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "query = \"service:agent status:error\"\n", + "\n", + "loader = DatadogLogsLoader(\n", + " query=query,\n", + " api_key=DD_API_KEY,\n", + " app_key=DD_APP_KEY,\n", + " from_time=1688732708951, # Optional, timestamp in milliseconds\n", + " to_time=1688736308951, # Optional, timestamp in milliseconds\n", + " limit=100, # Optional, default is 100\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[Document(page_content='message: grep: /etc/datadog-agent/system-probe.yaml: No such file or directory', metadata={'id': 'AgAAAYkwpLImvkjRpQAAAAAAAAAYAAAAAEFZa3dwTUFsQUFEWmZfLU5QdElnM3dBWQAAACQAAAAAMDE4OTMwYTQtYzk3OS00MmJjLTlhNDAtOTY4N2EwY2I5ZDdk', 'status': 'error', 'service': 'agent', 'tags': ['accessible-from-goog-gke-node', 'allow-external-ingress-high-ports', 'allow-external-ingress-http', 'allow-external-ingress-https', 'container_id:c7d8ecd27b5b3cfdf3b0df04b8965af6f233f56b7c3c2ffabfab5e3b6ccbd6a5', 'container_name:lab_datadog_1', 'datadog.pipelines:false', 'datadog.submission_auth:private_api_key', 'docker_image:datadog/agent:7.41.1', 'env:dd101-dev', 'hostname:lab-host', 'image_name:datadog/agent', 'image_tag:7.41.1', 'instance-id:7497601202021312403', 'instance-type:custom-1-4096', 'instruqt_aws_accounts:', 'instruqt_azure_subscriptions:', 'instruqt_gcp_projects:', 'internal-hostname:lab-host.d4rjybavkary.svc.cluster.local', 'numeric_project_id:3390740675', 'p-d4rjybavkary', 'project:instruqt-prod', 'service:agent', 'short_image:agent', 'source:agent', 'zone:europe-west1-b'], 'timestamp': datetime.datetime(2023, 7, 7, 13, 57, 27, 206000, tzinfo=tzutc())}),\n", + " Document(page_content='message: grep: /etc/datadog-agent/system-probe.yaml: No such file or directory', metadata={'id': 'AgAAAYkwpLImvkjRpgAAAAAAAAAYAAAAAEFZa3dwTUFsQUFEWmZfLU5QdElnM3dBWgAAACQAAAAAMDE4OTMwYTQtYzk3OS00MmJjLTlhNDAtOTY4N2EwY2I5ZDdk', 'status': 'error', 'service': 'agent', 'tags': ['accessible-from-goog-gke-node', 'allow-external-ingress-high-ports', 'allow-external-ingress-http', 'allow-external-ingress-https', 'container_id:c7d8ecd27b5b3cfdf3b0df04b8965af6f233f56b7c3c2ffabfab5e3b6ccbd6a5', 'container_name:lab_datadog_1', 'datadog.pipelines:false', 'datadog.submission_auth:private_api_key', 'docker_image:datadog/agent:7.41.1', 'env:dd101-dev', 'hostname:lab-host', 'image_name:datadog/agent', 'image_tag:7.41.1', 'instance-id:7497601202021312403', 'instance-type:custom-1-4096', 'instruqt_aws_accounts:', 'instruqt_azure_subscriptions:', 'instruqt_gcp_projects:', 'internal-hostname:lab-host.d4rjybavkary.svc.cluster.local', 'numeric_project_id:3390740675', 'p-d4rjybavkary', 'project:instruqt-prod', 'service:agent', 'short_image:agent', 'source:agent', 'zone:europe-west1-b'], 'timestamp': datetime.datetime(2023, 7, 7, 13, 57, 27, 206000, tzinfo=tzutc())})]" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "documents = loader.load()\n", + "documents" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.11" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/langchain/document_loaders/__init__.py b/langchain/document_loaders/__init__.py index bae2151032e..da9a19e0c9c 100644 --- a/langchain/document_loaders/__init__.py +++ b/langchain/document_loaders/__init__.py @@ -30,6 +30,7 @@ from langchain.document_loaders.confluence import ConfluenceLoader from langchain.document_loaders.conllu import CoNLLULoader from langchain.document_loaders.csv_loader import CSVLoader, UnstructuredCSVLoader from langchain.document_loaders.cube_semantic import CubeSemanticLoader +from langchain.document_loaders.datadog_logs import DatadogLogsLoader from langchain.document_loaders.dataframe import DataFrameLoader from langchain.document_loaders.diffbot import DiffbotLoader from langchain.document_loaders.directory import DirectoryLoader @@ -179,6 +180,7 @@ __all__ = [ "CollegeConfidentialLoader", "ConfluenceLoader", "CubeSemanticLoader", + "DatadogLogsLoader", "DataFrameLoader", "DiffbotLoader", "DirectoryLoader", diff --git a/langchain/document_loaders/datadog_logs.py b/langchain/document_loaders/datadog_logs.py new file mode 100644 index 00000000000..613288a99ea --- /dev/null +++ b/langchain/document_loaders/datadog_logs.py @@ -0,0 +1,137 @@ +"""Load Datadog logs.""" +from datetime import datetime, timedelta +from typing import List, Optional + +from langchain.docstore.document import Document +from langchain.document_loaders.base import BaseLoader + + +class DatadogLogsLoader(BaseLoader): + """Loads a query result from Datadog into a list of documents. + + Logs are written into the `page_content` and into the `metadata`. + """ + + def __init__( + self, + query: str, + api_key: str, + app_key: str, + from_time: Optional[int] = None, + to_time: Optional[int] = None, + limit: int = 100, + ) -> None: + """Initialize Datadog document loader. + + Requirements: + - Must have datadog_api_client installed. Install with `pip install datadog_api_client`. + + Args: + query: The query to run in Datadog. + api_key: The Datadog API key. + app_key: The Datadog APP key. + from_time: Optional. The start of the time range to query. + Supports date math and regular timestamps (milliseconds) like '1688732708951' + Defaults to 20 minutes ago. + to_time: Optional. The end of the time range to query. + Supports date math and regular timestamps (milliseconds) like '1688732708951' + Defaults to now. + limit: The maximum number of logs to return. + Defaults to 100. + """ # noqa: E501 + try: + from datadog_api_client import Configuration + except ImportError as ex: + raise ImportError( + "Could not import datadog_api_client python package. " + "Please install it with `pip install datadog_api_client`." + ) from ex + + self.query = query + configuration = Configuration() + configuration.api_key["apiKeyAuth"] = api_key + configuration.api_key["appKeyAuth"] = app_key + self.configuration = configuration + self.from_time = from_time + self.to_time = to_time + self.limit = limit + + def parse_log(self, log: dict) -> Document: + """ + Create Document objects from Datadog log items. + """ + attributes = log.get("attributes", {}) + metadata = { + "id": log.get("id", ""), + "status": attributes.get("status"), + "service": attributes.get("service", ""), + "tags": attributes.get("tags", []), + "timestamp": attributes.get("timestamp", ""), + } + + message = attributes.get("message", "") + inside_attributes = attributes.get("attributes", {}) + content_dict = {**inside_attributes, "message": message} + content = ", ".join(f"{k}: {v}" for k, v in content_dict.items()) + return Document(page_content=content, metadata=metadata) + + def load(self) -> List[Document]: + """ + Get logs from Datadog. + + Returns: + A list of Document objects. + - page_content + - metadata + - id + - service + - status + - tags + - timestamp + """ + try: + from datadog_api_client import ApiClient + from datadog_api_client.v2.api.logs_api import LogsApi + from datadog_api_client.v2.model.logs_list_request import LogsListRequest + from datadog_api_client.v2.model.logs_list_request_page import ( + LogsListRequestPage, + ) + from datadog_api_client.v2.model.logs_query_filter import LogsQueryFilter + from datadog_api_client.v2.model.logs_sort import LogsSort + except ImportError as ex: + raise ImportError( + "Could not import datadog_api_client python package. " + "Please install it with `pip install datadog_api_client`." + ) from ex + + now = datetime.now() + twenty_minutes_before = now - timedelta(minutes=20) + now_timestamp = int(now.timestamp() * 1000) + twenty_minutes_before_timestamp = int(twenty_minutes_before.timestamp() * 1000) + _from = ( + self.from_time + if self.from_time is not None + else twenty_minutes_before_timestamp + ) + + body = LogsListRequest( + filter=LogsQueryFilter( + query=self.query, + _from=_from, + to=f"{self.to_time if self.to_time is not None else now_timestamp}", + ), + sort=LogsSort.TIMESTAMP_ASCENDING, + page=LogsListRequestPage( + limit=self.limit, + ), + ) + + with ApiClient(configuration=self.configuration) as api_client: + api_instance = LogsApi(api_client) + response = api_instance.list_logs(body=body).to_dict() + + docs: List[Document] = [] + for row in response["data"]: + docs.append(self.parse_log(row)) + + return docs