SearchApi integration (#11023)

Based on the customers' requests for native langchain integration,
SearchApi is ready to invest in AI and LLM space, especially in
open-source development.

- This is our initial PR and later we want to improve it based on
customers' and langchain users' feedback. Most likely changes will
affect how the final results string is being built.
- We are creating similar native integration in Python and JavaScript.
- The next plan is to integrate into Java, Ruby, Go, and others.
- Feel free to assign @SebastjanPrachovskij as a main reviewer for any
SearchApi-related searches. We will be glad to help and support
langchain development.
This commit is contained in:
Donatas Remeika
2023-09-29 04:08:37 +03:00
committed by GitHub
parent 8cd18a48e4
commit a4e0cf6300
10 changed files with 1079 additions and 1 deletions

View File

@@ -22,6 +22,7 @@ from langchain.tools.ddg_search.tool import DuckDuckGoSearchRun
from langchain.tools.google_search.tool import GoogleSearchResults, GoogleSearchRun
from langchain.tools.metaphor_search.tool import MetaphorSearchResults
from langchain.tools.google_serper.tool import GoogleSerperResults, GoogleSerperRun
from langchain.tools.searchapi.tool import SearchAPIResults, SearchAPIRun
from langchain.tools.graphql.tool import BaseGraphQLTool
from langchain.tools.human.tool import HumanInputRun
from langchain.tools.python.tool import PythonREPLTool
@@ -52,6 +53,7 @@ from langchain.utilities.google_serper import GoogleSerperAPIWrapper
from langchain.utilities.metaphor_search import MetaphorSearchAPIWrapper
from langchain.utilities.awslambda import LambdaWrapper
from langchain.utilities.graphql import GraphQLAPIWrapper
from langchain.utilities.searchapi import SearchApiAPIWrapper
from langchain.utilities.searx_search import SearxSearchWrapper
from langchain.utilities.serpapi import SerpAPIWrapper
from langchain.utilities.twilio import TwilioAPIWrapper
@@ -214,6 +216,14 @@ def _get_google_search_results_json(**kwargs: Any) -> BaseTool:
return GoogleSearchResults(api_wrapper=GoogleSearchAPIWrapper(**kwargs))
def _get_searchapi(**kwargs: Any) -> BaseTool:
return SearchAPIRun(api_wrapper=SearchApiAPIWrapper(**kwargs))
def _get_searchapi_results_json(**kwargs: Any) -> BaseTool:
return SearchAPIResults(api_wrapper=SearchApiAPIWrapper(**kwargs))
def _get_serpapi(**kwargs: Any) -> BaseTool:
return Tool(
name="Search",
@@ -298,7 +308,6 @@ _EXTRA_LLM_TOOLS: Dict[
"tmdb-api": (_get_tmdb_api, ["tmdb_bearer_token"]),
"podcast-api": (_get_podcast_api, ["listen_api_key"]),
}
_EXTRA_OPTIONAL_TOOLS: Dict[str, Tuple[Callable[[KwArg(Any)], BaseTool], List[str]]] = {
"wolfram-alpha": (_get_wolfram_alpha, ["wolfram_alpha_appid"]),
"google-search": (_get_google_search, ["google_api_key", "google_cse_id"]),
@@ -318,6 +327,11 @@ _EXTRA_OPTIONAL_TOOLS: Dict[str, Tuple[Callable[[KwArg(Any)], BaseTool], List[st
_get_google_serper_results_json,
["serper_api_key", "aiosession"],
),
"searchapi": (_get_searchapi, ["searchapi_api_key", "aiosession"]),
"searchapi-results-json": (
_get_searchapi_results_json,
["searchapi_api_key", "aiosession"],
),
"serpapi": (_get_serpapi, ["serpapi_api_key", "aiosession"]),
"dalle-image-generator": (_get_dalle_image_generator, ["openai_api_key"]),
"twilio": (_get_twilio, ["account_sid", "auth_token", "from_number"]),

View File

@@ -0,0 +1,6 @@
from langchain.tools.searchapi.tool import SearchAPIResults, SearchAPIRun
"""SearchApi.io API Toolkit."""
"""Tool for the SearchApi.io Google SERP API."""
__all__ = ["SearchAPIResults", "SearchAPIRun"]

View File

@@ -0,0 +1,68 @@
"""Tool for the SearchApi.io search API."""
from typing import Optional
from langchain.callbacks.manager import (
AsyncCallbackManagerForToolRun,
CallbackManagerForToolRun,
)
from langchain.pydantic_v1 import Field
from langchain.tools.base import BaseTool
from langchain.utilities.searchapi import SearchApiAPIWrapper
class SearchAPIRun(BaseTool):
"""Tool that queries the SearchApi.io search API."""
name: str = "searchapi"
description: str = (
"Google search API provided by SearchApi.io."
"This tool is handy when you need to answer questions about current events."
"Input should be a search query."
)
api_wrapper: SearchApiAPIWrapper
def _run(
self,
query: str,
run_manager: Optional[CallbackManagerForToolRun] = None,
) -> str:
"""Use the tool."""
return self.api_wrapper.run(query)
async def _arun(
self,
query: str,
run_manager: Optional[AsyncCallbackManagerForToolRun] = None,
) -> str:
"""Use the tool asynchronously."""
return await self.api_wrapper.arun(query)
class SearchAPIResults(BaseTool):
"""Tool that queries the SearchApi.io search API and returns JSON."""
name: str = "searchapi_results_json"
description: str = (
"Google search API provided by SearchApi.io."
"This tool is handy when you need to answer questions about current events."
"The input should be a search query and the output is a JSON object "
"with the query results."
)
api_wrapper: SearchApiAPIWrapper = Field(default_factory=SearchApiAPIWrapper)
def _run(
self,
query: str,
run_manager: Optional[CallbackManagerForToolRun] = None,
) -> str:
"""Use the tool."""
return str(self.api_wrapper.results(query))
async def _arun(
self,
query: str,
run_manager: Optional[AsyncCallbackManagerForToolRun] = None,
) -> str:
"""Use the tool asynchronously."""
return (await self.api_wrapper.aresults(query)).__str__()

View File

@@ -27,6 +27,7 @@ from langchain.utilities.pubmed import PubMedAPIWrapper
from langchain.utilities.python import PythonREPL
from langchain.utilities.requests import Requests, RequestsWrapper, TextRequestsWrapper
from langchain.utilities.scenexplain import SceneXplainAPIWrapper
from langchain.utilities.searchapi import SearchApiAPIWrapper
from langchain.utilities.searx_search import SearxSearchWrapper
from langchain.utilities.serpapi import SerpAPIWrapper
from langchain.utilities.spark_sql import SparkSQL
@@ -64,6 +65,7 @@ __all__ = [
"RequestsWrapper",
"SQLDatabase",
"SceneXplainAPIWrapper",
"SearchApiAPIWrapper",
"SearxSearchWrapper",
"SerpAPIWrapper",
"SparkSQL",

View File

@@ -0,0 +1,139 @@
from typing import Any, Dict, Optional
import aiohttp
import requests
from langchain.pydantic_v1 import BaseModel, root_validator
from langchain.utils import get_from_dict_or_env
class SearchApiAPIWrapper(BaseModel):
"""
Wrapper around SearchApi API.
To use, you should have the environment variable ``SEARCHAPI_API_KEY``
set with your API key, or pass `searchapi_api_key`
as a named parameter to the constructor.
Example:
.. code-block:: python
from langchain.utilities import SearchApiAPIWrapper
searchapi = SearchApiAPIWrapper()
"""
# Use "google" engine by default.
# Full list of supported ones can be found in https://www.searchapi.io docs
engine: str = "google"
searchapi_api_key: Optional[str] = None
aiosession: Optional[aiohttp.ClientSession] = None
class Config:
"""Configuration for this pydantic object."""
arbitrary_types_allowed = True
@root_validator()
def validate_environment(cls, values: Dict) -> Dict:
"""Validate that API key exists in environment."""
searchapi_api_key = get_from_dict_or_env(
values, "searchapi_api_key", "SEARCHAPI_API_KEY"
)
values["searchapi_api_key"] = searchapi_api_key
return values
def run(self, query: str, **kwargs: Any) -> str:
results = self.results(query, **kwargs)
return self._result_as_string(results)
async def arun(self, query: str, **kwargs: Any) -> str:
results = await self.aresults(query, **kwargs)
return self._result_as_string(results)
def results(self, query: str, **kwargs: Any) -> dict:
results = self._search_api_results(query, **kwargs)
return results
async def aresults(self, query: str, **kwargs: Any) -> dict:
results = await self._async_search_api_results(query, **kwargs)
return results
def _prepare_request(self, query: str, **kwargs: Any) -> dict:
return {
"url": "https://www.searchapi.io/api/v1/search",
"headers": {
"Authorization": f"Bearer {self.searchapi_api_key}",
},
"params": {
"engine": self.engine,
"q": query,
**{key: value for key, value in kwargs.items() if value is not None},
},
}
def _search_api_results(self, query: str, **kwargs: Any) -> dict:
request_details = self._prepare_request(query, **kwargs)
response = requests.get(
url=request_details["url"],
params=request_details["params"],
headers=request_details["headers"],
)
response.raise_for_status()
return response.json()
async def _async_search_api_results(self, query: str, **kwargs: Any) -> dict:
"""Use aiohttp to send request to SearchApi API and return results async."""
request_details = self._prepare_request(query, **kwargs)
if not self.aiosession:
async with aiohttp.ClientSession() as session:
async with session.get(
url=request_details["url"],
headers=request_details["headers"],
params=request_details["params"],
raise_for_status=True,
) as response:
results = await response.json()
else:
async with self.aiosession.get(
url=request_details["url"],
headers=request_details["headers"],
params=request_details["params"],
raise_for_status=True,
) as response:
results = await response.json()
return results
@staticmethod
def _result_as_string(result: dict) -> str:
toret = "No good search result found"
if "answer_box" in result.keys() and "answer" in result["answer_box"].keys():
toret = result["answer_box"]["answer"]
elif "answer_box" in result.keys() and "snippet" in result["answer_box"].keys():
toret = result["answer_box"]["snippet"]
elif "knowledge_graph" in result.keys():
toret = result["knowledge_graph"]["description"]
elif "organic_results" in result.keys():
snippets = [
r["snippet"] for r in result["organic_results"] if "snippet" in r.keys()
]
toret = "\n".join(snippets)
elif "jobs" in result.keys():
jobs = [
r["description"] for r in result["jobs"] if "description" in r.keys()
]
toret = "\n".join(jobs)
elif "videos" in result.keys():
videos = [
f"""Title: "{r["title"]}" Link: {r["link"]}"""
for r in result["videos"]
if "title" in r.keys()
]
toret = "\n".join(videos)
elif "images" in result.keys():
images = [
f"""Title: "{r["title"]}" Link: {r["original"]["link"]}"""
for r in result["images"]
if "original" in r.keys()
]
toret = "\n".join(images)
return toret

View File

@@ -2,6 +2,12 @@
# your api key from https://platform.openai.com/account/api-keys
OPENAI_API_KEY=
# searchapi
# your api key from https://www.searchapi.io/
SEARCHAPI_API_KEY=your_searchapi_api_key_here
# pinecone
# your api key from left menu "API Keys" in https://app.pinecone.io
PINECONE_API_KEY=your_pinecone_api_key_here

View File

@@ -0,0 +1,64 @@
"""Integration tests for SearchApi"""
import pytest
from langchain.utilities.searchapi import SearchApiAPIWrapper
def test_call() -> None:
"""Test that call gives correct answer."""
search = SearchApiAPIWrapper()
output = search.run("What is the capital of Lithuania?")
assert "Vilnius" in output
def test_results() -> None:
"""Test that call gives correct answer."""
search = SearchApiAPIWrapper()
output = search.results("What is the capital of Lithuania?")
assert "Vilnius" in output["answer_box"]["answer"]
assert "Vilnius" in output["answer_box"]["snippet"]
assert "Vilnius" in output["knowledge_graph"]["description"]
assert "Vilnius" in output["organic_results"][0]["snippet"]
def test_results_with_custom_params() -> None:
"""Test that call gives correct answer with custom params."""
search = SearchApiAPIWrapper()
output = search.results(
"cafeteria",
hl="es",
gl="es",
google_domain="google.es",
location="Madrid, Spain",
)
assert "Madrid" in output["search_information"]["detected_location"]
def test_scholar_call() -> None:
"""Test that call gives correct answer for scholar search."""
search = SearchApiAPIWrapper(engine="google_scholar")
output = search.run("large language models")
assert "state of large language models and their applications" in output
def test_jobs_call() -> None:
"""Test that call gives correct answer for jobs search."""
search = SearchApiAPIWrapper(engine="google_jobs")
output = search.run("AI")
assert "years of experience" in output
@pytest.mark.asyncio
async def test_async_call() -> None:
"""Test that call gives the correct answer."""
search = SearchApiAPIWrapper()
output = await search.arun("What is Obama's full name?")
assert "Barack Hussein Obama II" in output
@pytest.mark.asyncio
async def test_async_results() -> None:
"""Test that call gives the correct answer."""
search = SearchApiAPIWrapper()
output = await search.aresults("What is Obama's full name?")
assert "Barack Hussein Obama II" in output["knowledge_graph"]["description"]