From d593833e4d5fd713d5281229a9cfb53ccaaf872e Mon Sep 17 00:00:00 2001 From: Constantin Musca Date: Thu, 20 Jul 2023 17:03:20 +0300 Subject: [PATCH] Add Golden Query Tool (#7930) **Description:** Golden Query is a wrapper on top of the [Golden Query API](https://docs.golden.com/reference/query-api) which enables programmatic access to query results on entities across Golden's Knowledge Base. For more information about Golden API, please see the [Golden API Getting Started](https://docs.golden.com/reference/getting-started) page. **Issue:** None **Dependencies:** requests(already present in project) **Tag maintainer:** @hinthornw Signed-off-by: Constantin Musca --- .../ecosystem/integrations/golden_query.mdx | 36 +++++ .../tools/integrations/golden_query.ipynb | 142 ++++++++++++++++++ langchain/__init__.py | 2 + langchain/agents/load_tools.py | 7 + langchain/tools/golden_query/__init__.py | 8 + langchain/tools/golden_query/tool.py | 44 ++++++ langchain/utilities/__init__.py | 2 + langchain/utilities/golden_query.py | 67 +++++++++ .../utilities/test_golden_query_api.py | 11 ++ 9 files changed, 319 insertions(+) create mode 100644 docs/extras/ecosystem/integrations/golden_query.mdx create mode 100644 docs/extras/modules/agents/tools/integrations/golden_query.ipynb create mode 100644 langchain/tools/golden_query/__init__.py create mode 100644 langchain/tools/golden_query/tool.py create mode 100644 langchain/utilities/golden_query.py create mode 100644 tests/integration_tests/utilities/test_golden_query_api.py diff --git a/docs/extras/ecosystem/integrations/golden_query.mdx b/docs/extras/ecosystem/integrations/golden_query.mdx new file mode 100644 index 00000000000..29088371610 --- /dev/null +++ b/docs/extras/ecosystem/integrations/golden_query.mdx @@ -0,0 +1,36 @@ +# Golden Query + +>Golden Query is a wrapper on top of the [Golden Query API](https://docs.golden.com/reference/query-api) which enables programmatic access to query results on entities across Golden's Knowledge Base. +>See the [Golden Query API docs](https://docs.golden.com/reference/query-api) for more information. + +This page covers how to use `Golden Query` within LangChain. + +## Installation and Setup +- Go to the [Golden API docs](https://docs.golden.com/) to get an overview about the Golden API. +- Create a Golden account if you don't have one on the [Golden Website](https://golden.com). +- Get your API key from the [Golden API Settings](https://golden.com/settings/api) page. +- Save your API key into GOLDEN_API_KEY env variable + + +## Wrappers + +### Utility + +There exists a GoldenQueryAPIWrapper utility which wraps this API. To import this utility: + +```python +from langchain.utilities.golden_query import GoldenQueryAPIWrapper +``` + +For a more detailed walkthrough of this wrapper, see [this notebook](/docs/modules/agents/tools/integrations/golden_query.html). + +### Tool + +You can also easily load this wrapper as a Tool (to use with an Agent). +You can do this with: +```python +from langchain.agents import load_tools +tools = load_tools(["golden-query"]) +``` + +For more information on tools, see [this page](/docs/modules/agents/tools/). diff --git a/docs/extras/modules/agents/tools/integrations/golden_query.ipynb b/docs/extras/modules/agents/tools/integrations/golden_query.ipynb new file mode 100644 index 00000000000..b9ff485576c --- /dev/null +++ b/docs/extras/modules/agents/tools/integrations/golden_query.ipynb @@ -0,0 +1,142 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "245a954a", + "metadata": {}, + "source": [ + "# Golden Query\n", + "\n", + "This notebook goes over how to use the golden-query tool.\n", + "\n", + "- Go to the [Golden API docs](https://docs.golden.com/) to get an overview about the Golden API.\n", + "- Create a Golden account if you don't have one on the [Golden Website](golden.com).\n", + "- Get your API key from the [Golden API Settings](https://golden.com/settings/api) page.\n", + "- Save your API key into GOLDEN_API_KEY env variable" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "34bb5968", + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "\n", + "os.environ[\"GOLDEN_API_KEY\"] = \"\"" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "ac4910f8", + "metadata": {}, + "outputs": [], + "source": [ + "from langchain.utilities.golden_query import GoldenQueryAPIWrapper" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "84b8f773", + "metadata": {}, + "outputs": [], + "source": [ + "golden_query = GoldenQueryAPIWrapper()" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "068991a6", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'results': [{'id': 4673886,\n", + " 'latestVersionId': 60276991,\n", + " 'properties': [{'predicateId': 'name',\n", + " 'instances': [{'value': 'Samsung', 'citations': []}]}]},\n", + " {'id': 7008,\n", + " 'latestVersionId': 61087416,\n", + " 'properties': [{'predicateId': 'name',\n", + " 'instances': [{'value': 'Intel', 'citations': []}]}]},\n", + " {'id': 24193,\n", + " 'latestVersionId': 60274482,\n", + " 'properties': [{'predicateId': 'name',\n", + " 'instances': [{'value': 'Texas Instruments', 'citations': []}]}]},\n", + " {'id': 1142,\n", + " 'latestVersionId': 61406205,\n", + " 'properties': [{'predicateId': 'name',\n", + " 'instances': [{'value': 'Advanced Micro Devices', 'citations': []}]}]},\n", + " {'id': 193948,\n", + " 'latestVersionId': 58326582,\n", + " 'properties': [{'predicateId': 'name',\n", + " 'instances': [{'value': 'Freescale Semiconductor', 'citations': []}]}]},\n", + " {'id': 91316,\n", + " 'latestVersionId': 60387380,\n", + " 'properties': [{'predicateId': 'name',\n", + " 'instances': [{'value': 'Agilent Technologies', 'citations': []}]}]},\n", + " {'id': 90014,\n", + " 'latestVersionId': 60388078,\n", + " 'properties': [{'predicateId': 'name',\n", + " 'instances': [{'value': 'Novartis', 'citations': []}]}]},\n", + " {'id': 237458,\n", + " 'latestVersionId': 61406160,\n", + " 'properties': [{'predicateId': 'name',\n", + " 'instances': [{'value': 'Analog Devices', 'citations': []}]}]},\n", + " {'id': 3941943,\n", + " 'latestVersionId': 60382250,\n", + " 'properties': [{'predicateId': 'name',\n", + " 'instances': [{'value': 'AbbVie Inc.', 'citations': []}]}]},\n", + " {'id': 4178762,\n", + " 'latestVersionId': 60542667,\n", + " 'properties': [{'predicateId': 'name',\n", + " 'instances': [{'value': 'IBM', 'citations': []}]}]}],\n", + " 'next': 'https://golden.com/api/v2/public/queries/59044/results/?cursor=eyJwb3NpdGlvbiI6IFsxNzYxNiwgIklCTS04M1lQM1oiXX0%3D&pageSize=10',\n", + " 'previous': None}" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import json\n", + "\n", + "json.loads(golden_query.run(\"companies in nanotech\"))" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.13" + }, + "vscode": { + "interpreter": { + "hash": "53f3bc57609c7a84333bb558594977aa5b4026b1d6070b93987956689e367341" + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/langchain/__init__.py b/langchain/__init__.py index b0cfdb48047..0fe0260aa32 100644 --- a/langchain/__init__.py +++ b/langchain/__init__.py @@ -45,6 +45,7 @@ from langchain.prompts import ( from langchain.schema.prompt_template import BasePromptTemplate from langchain.sql_database import SQLDatabase from langchain.utilities.arxiv import ArxivAPIWrapper +from langchain.utilities.golden_query import GoldenQueryAPIWrapper from langchain.utilities.google_search import GoogleSearchAPIWrapper from langchain.utilities.google_serper import GoogleSerperAPIWrapper from langchain.utilities.powerbi import PowerBIDataset @@ -74,6 +75,7 @@ __all__ = [ "LLMCheckerChain", "LLMMathChain", "ArxivAPIWrapper", + "GoldenQueryAPIWrapper", "SelfAskWithSearchChain", "SerpAPIWrapper", "SerpAPIChain", diff --git a/langchain/agents/load_tools.py b/langchain/agents/load_tools.py index 0cdd80a901c..c93ba947be2 100644 --- a/langchain/agents/load_tools.py +++ b/langchain/agents/load_tools.py @@ -14,6 +14,7 @@ from langchain.chains.llm_math.base import LLMMathChain from langchain.chains.pal.base import PALChain from langchain.requests import TextRequestsWrapper from langchain.tools.arxiv.tool import ArxivQueryRun +from langchain.tools.golden_query.tool import GoldenQueryRun from langchain.tools.pubmed.tool import PubmedQueryRun from langchain.tools.base import BaseTool from langchain.tools.bing_search.tool import BingSearchRun @@ -41,6 +42,7 @@ from langchain.tools.openweathermap.tool import OpenWeatherMapQueryRun from langchain.tools.dataforseo_api_search import DataForSeoAPISearchRun from langchain.tools.dataforseo_api_search import DataForSeoAPISearchResults from langchain.utilities import ArxivAPIWrapper +from langchain.utilities import GoldenQueryAPIWrapper from langchain.utilities import PubMedAPIWrapper from langchain.utilities.bing_search import BingSearchAPIWrapper from langchain.utilities.duckduckgo_search import DuckDuckGoSearchAPIWrapper @@ -209,6 +211,10 @@ def _get_arxiv(**kwargs: Any) -> BaseTool: return ArxivQueryRun(api_wrapper=ArxivAPIWrapper(**kwargs)) +def _get_golden_query(**kwargs: Any) -> BaseTool: + return GoldenQueryRun(api_wrapper=GoldenQueryAPIWrapper(**kwargs)) + + def _get_pupmed(**kwargs: Any) -> BaseTool: return PubmedQueryRun(api_wrapper=PubMedAPIWrapper(**kwargs)) @@ -325,6 +331,7 @@ _EXTRA_OPTIONAL_TOOLS: Dict[str, Tuple[Callable[[KwArg(Any)], BaseTool], List[st _get_arxiv, ["top_k_results", "load_max_docs", "load_all_available_meta"], ), + "golden-query": (_get_golden_query, ["golden_api_key"]), "pupmed": ( _get_pupmed, ["top_k_results", "load_max_docs", "load_all_available_meta"], diff --git a/langchain/tools/golden_query/__init__.py b/langchain/tools/golden_query/__init__.py new file mode 100644 index 00000000000..d4cdf07ef71 --- /dev/null +++ b/langchain/tools/golden_query/__init__.py @@ -0,0 +1,8 @@ +"""Golden API toolkit.""" + + +from langchain.tools.golden_query.tool import GoldenQueryRun + +__all__ = [ + "GoldenQueryRun", +] diff --git a/langchain/tools/golden_query/tool.py b/langchain/tools/golden_query/tool.py new file mode 100644 index 00000000000..de14226d7f6 --- /dev/null +++ b/langchain/tools/golden_query/tool.py @@ -0,0 +1,44 @@ +"""Tool for the Golden API.""" + +from typing import Optional + +from langchain.callbacks.manager import ( + AsyncCallbackManagerForToolRun, + CallbackManagerForToolRun, +) +from langchain.tools.base import BaseTool +from langchain.utilities.golden_query import GoldenQueryAPIWrapper + + +class GoldenQueryRun(BaseTool): + """Tool that adds the capability to query using the Golden API and get back JSON.""" + + name = "Golden Query" + description = ( + "A wrapper around Golden Query API." + " Useful for getting entities that match" + " a natural language query from Golden's Knowledge Base." + "\nExample queries:" + "\n- companies in nanotech" + "\n- list of cloud providers starting in 2019" + "\nInput should be the natural language query." + "\nOutput is a paginated list of results or an error object" + " in JSON format." + ) + api_wrapper: GoldenQueryAPIWrapper + + def _run( + self, + query: str, + run_manager: Optional[CallbackManagerForToolRun] = None, + ) -> str: + """Use the Golden tool.""" + return self.api_wrapper.run(query) + + async def _arun( + self, + query: str, + run_manager: Optional[AsyncCallbackManagerForToolRun] = None, + ) -> str: + """Use the Golden tool asynchronously.""" + raise NotImplementedError("Golden does not support async") diff --git a/langchain/utilities/__init__.py b/langchain/utilities/__init__.py index 1f72e024cea..38863dacef7 100644 --- a/langchain/utilities/__init__.py +++ b/langchain/utilities/__init__.py @@ -8,6 +8,7 @@ from langchain.utilities.bibtex import BibtexparserWrapper from langchain.utilities.bing_search import BingSearchAPIWrapper from langchain.utilities.brave_search import BraveSearchWrapper from langchain.utilities.duckduckgo_search import DuckDuckGoSearchAPIWrapper +from langchain.utilities.golden_query import GoldenQueryAPIWrapper from langchain.utilities.google_places_api import GooglePlacesAPIWrapper from langchain.utilities.google_search import GoogleSearchAPIWrapper from langchain.utilities.google_serper import GoogleSerperAPIWrapper @@ -31,6 +32,7 @@ from langchain.utilities.zapier import ZapierNLAWrapper __all__ = [ "ApifyWrapper", "ArxivAPIWrapper", + "GoldenQueryAPIWrapper", "BashProcess", "BibtexparserWrapper", "BingSearchAPIWrapper", diff --git a/langchain/utilities/golden_query.py b/langchain/utilities/golden_query.py new file mode 100644 index 00000000000..df7e505faab --- /dev/null +++ b/langchain/utilities/golden_query.py @@ -0,0 +1,67 @@ +"""Util that calls Golden.""" +import json +from typing import Dict, Optional + +import requests +from pydantic import BaseModel, Extra, root_validator + +from langchain.utils import get_from_dict_or_env + +GOLDEN_BASE_URL = "https://golden.com" +GOLDEN_TIMEOUT = 5000 + + +class GoldenQueryAPIWrapper(BaseModel): + """Wrapper for Golden. + + Docs for using: + + 1. Go to https://golden.com and sign up for an account + 2. Get your API Key from https://golden.com/settings/api + 3. Save your API Key into GOLDEN_API_KEY env variable + + """ + + golden_api_key: Optional[str] = None + + class Config: + """Configuration for this pydantic object.""" + + extra = Extra.forbid + + @root_validator() + def validate_environment(cls, values: Dict) -> Dict: + """Validate that api key and python package exists in environment.""" + golden_api_key = get_from_dict_or_env( + values, "golden_api_key", "GOLDEN_API_KEY" + ) + values["golden_api_key"] = golden_api_key + + return values + + def run(self, query: str) -> str: + """Run query through Golden Query API and return the JSON raw result.""" + + headers = {"apikey": self.golden_api_key or ""} + + response = requests.post( + f"{GOLDEN_BASE_URL}/api/v2/public/queries/", + json={"prompt": query}, + headers=headers, + timeout=GOLDEN_TIMEOUT, + ) + if response.status_code != 201: + return response.text + + content = json.loads(response.content) + query_id = content["id"] + + response = requests.get( + ( + f"{GOLDEN_BASE_URL}/api/v2/public/queries/{query_id}/results/" + "?pageSize=10" + ), + headers=headers, + timeout=GOLDEN_TIMEOUT, + ) + return response.text diff --git a/tests/integration_tests/utilities/test_golden_query_api.py b/tests/integration_tests/utilities/test_golden_query_api.py new file mode 100644 index 00000000000..da4bea2503a --- /dev/null +++ b/tests/integration_tests/utilities/test_golden_query_api.py @@ -0,0 +1,11 @@ +"""Integration test for Golden API Wrapper.""" +import json + +from langchain.utilities.golden_query import GoldenQueryAPIWrapper + + +def test_call() -> None: + """Test that call gives the correct answer.""" + search = GoldenQueryAPIWrapper() + output = json.loads(search.run("companies in nanotech")) + assert len(output.get("results", [])) > 0