mirror of
https://github.com/hwchase17/langchain.git
synced 2025-09-15 14:36:54 +00:00
SearchApi integration (#11023)
Based on the customers' requests for native langchain integration, SearchApi is ready to invest in AI and LLM space, especially in open-source development. - This is our initial PR and later we want to improve it based on customers' and langchain users' feedback. Most likely changes will affect how the final results string is being built. - We are creating similar native integration in Python and JavaScript. - The next plan is to integrate into Java, Ruby, Go, and others. - Feel free to assign @SebastjanPrachovskij as a main reviewer for any SearchApi-related searches. We will be glad to help and support langchain development.
This commit is contained in:
@@ -22,6 +22,7 @@ from langchain.tools.ddg_search.tool import DuckDuckGoSearchRun
|
||||
from langchain.tools.google_search.tool import GoogleSearchResults, GoogleSearchRun
|
||||
from langchain.tools.metaphor_search.tool import MetaphorSearchResults
|
||||
from langchain.tools.google_serper.tool import GoogleSerperResults, GoogleSerperRun
|
||||
from langchain.tools.searchapi.tool import SearchAPIResults, SearchAPIRun
|
||||
from langchain.tools.graphql.tool import BaseGraphQLTool
|
||||
from langchain.tools.human.tool import HumanInputRun
|
||||
from langchain.tools.python.tool import PythonREPLTool
|
||||
@@ -52,6 +53,7 @@ from langchain.utilities.google_serper import GoogleSerperAPIWrapper
|
||||
from langchain.utilities.metaphor_search import MetaphorSearchAPIWrapper
|
||||
from langchain.utilities.awslambda import LambdaWrapper
|
||||
from langchain.utilities.graphql import GraphQLAPIWrapper
|
||||
from langchain.utilities.searchapi import SearchApiAPIWrapper
|
||||
from langchain.utilities.searx_search import SearxSearchWrapper
|
||||
from langchain.utilities.serpapi import SerpAPIWrapper
|
||||
from langchain.utilities.twilio import TwilioAPIWrapper
|
||||
@@ -214,6 +216,14 @@ def _get_google_search_results_json(**kwargs: Any) -> BaseTool:
|
||||
return GoogleSearchResults(api_wrapper=GoogleSearchAPIWrapper(**kwargs))
|
||||
|
||||
|
||||
def _get_searchapi(**kwargs: Any) -> BaseTool:
|
||||
return SearchAPIRun(api_wrapper=SearchApiAPIWrapper(**kwargs))
|
||||
|
||||
|
||||
def _get_searchapi_results_json(**kwargs: Any) -> BaseTool:
|
||||
return SearchAPIResults(api_wrapper=SearchApiAPIWrapper(**kwargs))
|
||||
|
||||
|
||||
def _get_serpapi(**kwargs: Any) -> BaseTool:
|
||||
return Tool(
|
||||
name="Search",
|
||||
@@ -298,7 +308,6 @@ _EXTRA_LLM_TOOLS: Dict[
|
||||
"tmdb-api": (_get_tmdb_api, ["tmdb_bearer_token"]),
|
||||
"podcast-api": (_get_podcast_api, ["listen_api_key"]),
|
||||
}
|
||||
|
||||
_EXTRA_OPTIONAL_TOOLS: Dict[str, Tuple[Callable[[KwArg(Any)], BaseTool], List[str]]] = {
|
||||
"wolfram-alpha": (_get_wolfram_alpha, ["wolfram_alpha_appid"]),
|
||||
"google-search": (_get_google_search, ["google_api_key", "google_cse_id"]),
|
||||
@@ -318,6 +327,11 @@ _EXTRA_OPTIONAL_TOOLS: Dict[str, Tuple[Callable[[KwArg(Any)], BaseTool], List[st
|
||||
_get_google_serper_results_json,
|
||||
["serper_api_key", "aiosession"],
|
||||
),
|
||||
"searchapi": (_get_searchapi, ["searchapi_api_key", "aiosession"]),
|
||||
"searchapi-results-json": (
|
||||
_get_searchapi_results_json,
|
||||
["searchapi_api_key", "aiosession"],
|
||||
),
|
||||
"serpapi": (_get_serpapi, ["serpapi_api_key", "aiosession"]),
|
||||
"dalle-image-generator": (_get_dalle_image_generator, ["openai_api_key"]),
|
||||
"twilio": (_get_twilio, ["account_sid", "auth_token", "from_number"]),
|
||||
|
6
libs/langchain/langchain/tools/searchapi/__init__.py
Normal file
6
libs/langchain/langchain/tools/searchapi/__init__.py
Normal file
@@ -0,0 +1,6 @@
|
||||
from langchain.tools.searchapi.tool import SearchAPIResults, SearchAPIRun
|
||||
|
||||
"""SearchApi.io API Toolkit."""
|
||||
"""Tool for the SearchApi.io Google SERP API."""
|
||||
|
||||
__all__ = ["SearchAPIResults", "SearchAPIRun"]
|
68
libs/langchain/langchain/tools/searchapi/tool.py
Normal file
68
libs/langchain/langchain/tools/searchapi/tool.py
Normal file
@@ -0,0 +1,68 @@
|
||||
"""Tool for the SearchApi.io search API."""
|
||||
|
||||
from typing import Optional
|
||||
|
||||
from langchain.callbacks.manager import (
|
||||
AsyncCallbackManagerForToolRun,
|
||||
CallbackManagerForToolRun,
|
||||
)
|
||||
from langchain.pydantic_v1 import Field
|
||||
from langchain.tools.base import BaseTool
|
||||
from langchain.utilities.searchapi import SearchApiAPIWrapper
|
||||
|
||||
|
||||
class SearchAPIRun(BaseTool):
|
||||
"""Tool that queries the SearchApi.io search API."""
|
||||
|
||||
name: str = "searchapi"
|
||||
description: str = (
|
||||
"Google search API provided by SearchApi.io."
|
||||
"This tool is handy when you need to answer questions about current events."
|
||||
"Input should be a search query."
|
||||
)
|
||||
api_wrapper: SearchApiAPIWrapper
|
||||
|
||||
def _run(
|
||||
self,
|
||||
query: str,
|
||||
run_manager: Optional[CallbackManagerForToolRun] = None,
|
||||
) -> str:
|
||||
"""Use the tool."""
|
||||
return self.api_wrapper.run(query)
|
||||
|
||||
async def _arun(
|
||||
self,
|
||||
query: str,
|
||||
run_manager: Optional[AsyncCallbackManagerForToolRun] = None,
|
||||
) -> str:
|
||||
"""Use the tool asynchronously."""
|
||||
return await self.api_wrapper.arun(query)
|
||||
|
||||
|
||||
class SearchAPIResults(BaseTool):
|
||||
"""Tool that queries the SearchApi.io search API and returns JSON."""
|
||||
|
||||
name: str = "searchapi_results_json"
|
||||
description: str = (
|
||||
"Google search API provided by SearchApi.io."
|
||||
"This tool is handy when you need to answer questions about current events."
|
||||
"The input should be a search query and the output is a JSON object "
|
||||
"with the query results."
|
||||
)
|
||||
api_wrapper: SearchApiAPIWrapper = Field(default_factory=SearchApiAPIWrapper)
|
||||
|
||||
def _run(
|
||||
self,
|
||||
query: str,
|
||||
run_manager: Optional[CallbackManagerForToolRun] = None,
|
||||
) -> str:
|
||||
"""Use the tool."""
|
||||
return str(self.api_wrapper.results(query))
|
||||
|
||||
async def _arun(
|
||||
self,
|
||||
query: str,
|
||||
run_manager: Optional[AsyncCallbackManagerForToolRun] = None,
|
||||
) -> str:
|
||||
"""Use the tool asynchronously."""
|
||||
return (await self.api_wrapper.aresults(query)).__str__()
|
@@ -27,6 +27,7 @@ from langchain.utilities.pubmed import PubMedAPIWrapper
|
||||
from langchain.utilities.python import PythonREPL
|
||||
from langchain.utilities.requests import Requests, RequestsWrapper, TextRequestsWrapper
|
||||
from langchain.utilities.scenexplain import SceneXplainAPIWrapper
|
||||
from langchain.utilities.searchapi import SearchApiAPIWrapper
|
||||
from langchain.utilities.searx_search import SearxSearchWrapper
|
||||
from langchain.utilities.serpapi import SerpAPIWrapper
|
||||
from langchain.utilities.spark_sql import SparkSQL
|
||||
@@ -64,6 +65,7 @@ __all__ = [
|
||||
"RequestsWrapper",
|
||||
"SQLDatabase",
|
||||
"SceneXplainAPIWrapper",
|
||||
"SearchApiAPIWrapper",
|
||||
"SearxSearchWrapper",
|
||||
"SerpAPIWrapper",
|
||||
"SparkSQL",
|
||||
|
139
libs/langchain/langchain/utilities/searchapi.py
Normal file
139
libs/langchain/langchain/utilities/searchapi.py
Normal file
@@ -0,0 +1,139 @@
|
||||
from typing import Any, Dict, Optional
|
||||
|
||||
import aiohttp
|
||||
import requests
|
||||
|
||||
from langchain.pydantic_v1 import BaseModel, root_validator
|
||||
from langchain.utils import get_from_dict_or_env
|
||||
|
||||
|
||||
class SearchApiAPIWrapper(BaseModel):
|
||||
"""
|
||||
Wrapper around SearchApi API.
|
||||
|
||||
To use, you should have the environment variable ``SEARCHAPI_API_KEY``
|
||||
set with your API key, or pass `searchapi_api_key`
|
||||
as a named parameter to the constructor.
|
||||
|
||||
Example:
|
||||
.. code-block:: python
|
||||
|
||||
from langchain.utilities import SearchApiAPIWrapper
|
||||
searchapi = SearchApiAPIWrapper()
|
||||
"""
|
||||
|
||||
# Use "google" engine by default.
|
||||
# Full list of supported ones can be found in https://www.searchapi.io docs
|
||||
engine: str = "google"
|
||||
searchapi_api_key: Optional[str] = None
|
||||
aiosession: Optional[aiohttp.ClientSession] = None
|
||||
|
||||
class Config:
|
||||
"""Configuration for this pydantic object."""
|
||||
|
||||
arbitrary_types_allowed = True
|
||||
|
||||
@root_validator()
|
||||
def validate_environment(cls, values: Dict) -> Dict:
|
||||
"""Validate that API key exists in environment."""
|
||||
searchapi_api_key = get_from_dict_or_env(
|
||||
values, "searchapi_api_key", "SEARCHAPI_API_KEY"
|
||||
)
|
||||
values["searchapi_api_key"] = searchapi_api_key
|
||||
return values
|
||||
|
||||
def run(self, query: str, **kwargs: Any) -> str:
|
||||
results = self.results(query, **kwargs)
|
||||
return self._result_as_string(results)
|
||||
|
||||
async def arun(self, query: str, **kwargs: Any) -> str:
|
||||
results = await self.aresults(query, **kwargs)
|
||||
return self._result_as_string(results)
|
||||
|
||||
def results(self, query: str, **kwargs: Any) -> dict:
|
||||
results = self._search_api_results(query, **kwargs)
|
||||
return results
|
||||
|
||||
async def aresults(self, query: str, **kwargs: Any) -> dict:
|
||||
results = await self._async_search_api_results(query, **kwargs)
|
||||
return results
|
||||
|
||||
def _prepare_request(self, query: str, **kwargs: Any) -> dict:
|
||||
return {
|
||||
"url": "https://www.searchapi.io/api/v1/search",
|
||||
"headers": {
|
||||
"Authorization": f"Bearer {self.searchapi_api_key}",
|
||||
},
|
||||
"params": {
|
||||
"engine": self.engine,
|
||||
"q": query,
|
||||
**{key: value for key, value in kwargs.items() if value is not None},
|
||||
},
|
||||
}
|
||||
|
||||
def _search_api_results(self, query: str, **kwargs: Any) -> dict:
|
||||
request_details = self._prepare_request(query, **kwargs)
|
||||
response = requests.get(
|
||||
url=request_details["url"],
|
||||
params=request_details["params"],
|
||||
headers=request_details["headers"],
|
||||
)
|
||||
response.raise_for_status()
|
||||
return response.json()
|
||||
|
||||
async def _async_search_api_results(self, query: str, **kwargs: Any) -> dict:
|
||||
"""Use aiohttp to send request to SearchApi API and return results async."""
|
||||
request_details = self._prepare_request(query, **kwargs)
|
||||
if not self.aiosession:
|
||||
async with aiohttp.ClientSession() as session:
|
||||
async with session.get(
|
||||
url=request_details["url"],
|
||||
headers=request_details["headers"],
|
||||
params=request_details["params"],
|
||||
raise_for_status=True,
|
||||
) as response:
|
||||
results = await response.json()
|
||||
else:
|
||||
async with self.aiosession.get(
|
||||
url=request_details["url"],
|
||||
headers=request_details["headers"],
|
||||
params=request_details["params"],
|
||||
raise_for_status=True,
|
||||
) as response:
|
||||
results = await response.json()
|
||||
return results
|
||||
|
||||
@staticmethod
|
||||
def _result_as_string(result: dict) -> str:
|
||||
toret = "No good search result found"
|
||||
if "answer_box" in result.keys() and "answer" in result["answer_box"].keys():
|
||||
toret = result["answer_box"]["answer"]
|
||||
elif "answer_box" in result.keys() and "snippet" in result["answer_box"].keys():
|
||||
toret = result["answer_box"]["snippet"]
|
||||
elif "knowledge_graph" in result.keys():
|
||||
toret = result["knowledge_graph"]["description"]
|
||||
elif "organic_results" in result.keys():
|
||||
snippets = [
|
||||
r["snippet"] for r in result["organic_results"] if "snippet" in r.keys()
|
||||
]
|
||||
toret = "\n".join(snippets)
|
||||
elif "jobs" in result.keys():
|
||||
jobs = [
|
||||
r["description"] for r in result["jobs"] if "description" in r.keys()
|
||||
]
|
||||
toret = "\n".join(jobs)
|
||||
elif "videos" in result.keys():
|
||||
videos = [
|
||||
f"""Title: "{r["title"]}" Link: {r["link"]}"""
|
||||
for r in result["videos"]
|
||||
if "title" in r.keys()
|
||||
]
|
||||
toret = "\n".join(videos)
|
||||
elif "images" in result.keys():
|
||||
images = [
|
||||
f"""Title: "{r["title"]}" Link: {r["original"]["link"]}"""
|
||||
for r in result["images"]
|
||||
if "original" in r.keys()
|
||||
]
|
||||
toret = "\n".join(images)
|
||||
return toret
|
@@ -2,6 +2,12 @@
|
||||
# your api key from https://platform.openai.com/account/api-keys
|
||||
OPENAI_API_KEY=
|
||||
|
||||
|
||||
# searchapi
|
||||
# your api key from https://www.searchapi.io/
|
||||
SEARCHAPI_API_KEY=your_searchapi_api_key_here
|
||||
|
||||
|
||||
# pinecone
|
||||
# your api key from left menu "API Keys" in https://app.pinecone.io
|
||||
PINECONE_API_KEY=your_pinecone_api_key_here
|
||||
|
@@ -0,0 +1,64 @@
|
||||
"""Integration tests for SearchApi"""
|
||||
import pytest
|
||||
|
||||
from langchain.utilities.searchapi import SearchApiAPIWrapper
|
||||
|
||||
|
||||
def test_call() -> None:
|
||||
"""Test that call gives correct answer."""
|
||||
search = SearchApiAPIWrapper()
|
||||
output = search.run("What is the capital of Lithuania?")
|
||||
assert "Vilnius" in output
|
||||
|
||||
|
||||
def test_results() -> None:
|
||||
"""Test that call gives correct answer."""
|
||||
search = SearchApiAPIWrapper()
|
||||
output = search.results("What is the capital of Lithuania?")
|
||||
assert "Vilnius" in output["answer_box"]["answer"]
|
||||
assert "Vilnius" in output["answer_box"]["snippet"]
|
||||
assert "Vilnius" in output["knowledge_graph"]["description"]
|
||||
assert "Vilnius" in output["organic_results"][0]["snippet"]
|
||||
|
||||
|
||||
def test_results_with_custom_params() -> None:
|
||||
"""Test that call gives correct answer with custom params."""
|
||||
search = SearchApiAPIWrapper()
|
||||
output = search.results(
|
||||
"cafeteria",
|
||||
hl="es",
|
||||
gl="es",
|
||||
google_domain="google.es",
|
||||
location="Madrid, Spain",
|
||||
)
|
||||
assert "Madrid" in output["search_information"]["detected_location"]
|
||||
|
||||
|
||||
def test_scholar_call() -> None:
|
||||
"""Test that call gives correct answer for scholar search."""
|
||||
search = SearchApiAPIWrapper(engine="google_scholar")
|
||||
output = search.run("large language models")
|
||||
assert "state of large language models and their applications" in output
|
||||
|
||||
|
||||
def test_jobs_call() -> None:
|
||||
"""Test that call gives correct answer for jobs search."""
|
||||
search = SearchApiAPIWrapper(engine="google_jobs")
|
||||
output = search.run("AI")
|
||||
assert "years of experience" in output
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_async_call() -> None:
|
||||
"""Test that call gives the correct answer."""
|
||||
search = SearchApiAPIWrapper()
|
||||
output = await search.arun("What is Obama's full name?")
|
||||
assert "Barack Hussein Obama II" in output
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_async_results() -> None:
|
||||
"""Test that call gives the correct answer."""
|
||||
search = SearchApiAPIWrapper()
|
||||
output = await search.aresults("What is Obama's full name?")
|
||||
assert "Barack Hussein Obama II" in output["knowledge_graph"]["description"]
|
Reference in New Issue
Block a user