mirror of
https://github.com/hwchase17/langchain.git
synced 2025-07-04 04:07:54 +00:00
searx: implement async + helper tool providing json results (#2129)
- implemented `arun` and `aresults`. Reuses aiosession if available. - helper tools `SearxSearchRun` and `SearxSearchResults` - update doc Co-authored-by: blob42 <spike@w530>
This commit is contained in:
parent
ccee1aedd2
commit
031e32f331
@ -47,12 +47,24 @@ s.run("what is a large language model?")
|
||||
|
||||
### Tool
|
||||
|
||||
You can also easily load this wrapper as a Tool (to use with an Agent).
|
||||
You can also load this wrapper as a Tool (to use with an Agent).
|
||||
|
||||
You can do this with:
|
||||
|
||||
```python
|
||||
from langchain.agents import load_tools
|
||||
tools = load_tools(["searx-search"], searx_host="http://localhost:8888")
|
||||
tools = load_tools(["searx-search"],
|
||||
searx_host="http://localhost:8888",
|
||||
engines=["github"])
|
||||
```
|
||||
|
||||
Note that we could _optionally_ pass custom engines to use.
|
||||
|
||||
If you want to obtain results with metadata as *json* you can use:
|
||||
```python
|
||||
tools = load_tools(["searx-search-results-json"],
|
||||
searx_host="http://localhost:8888",
|
||||
num_results=5)
|
||||
```
|
||||
|
||||
For more information on tools, see [this page](../modules/agents/tools/getting_started.md)
|
||||
|
@ -13,6 +13,7 @@ from langchain.requests import RequestsWrapper
|
||||
from langchain.tools.base import BaseTool
|
||||
from langchain.tools.bing_search.tool import BingSearchRun
|
||||
from langchain.tools.google_search.tool import GoogleSearchResults, GoogleSearchRun
|
||||
from langchain.tools.searx_search.tool import SearxSearchResults, SearxSearchRun
|
||||
from langchain.tools.human.tool import HumanInputRun
|
||||
from langchain.tools.python.tool import PythonREPLTool
|
||||
from langchain.tools.requests.tool import RequestsGetTool
|
||||
@ -167,11 +168,12 @@ def _get_serpapi(**kwargs: Any) -> BaseTool:
|
||||
|
||||
|
||||
def _get_searx_search(**kwargs: Any) -> BaseTool:
|
||||
return Tool(
|
||||
name="SearX Search",
|
||||
description="A meta search engine. Useful for when you need to answer questions about current events. Input should be a search query.",
|
||||
func=SearxSearchWrapper(**kwargs).run,
|
||||
)
|
||||
return SearxSearchRun(wrapper=SearxSearchWrapper(**kwargs))
|
||||
|
||||
|
||||
def _get_searx_search_results_json(**kwargs: Any) -> BaseTool:
|
||||
wrapper_kwargs = {k: v for k, v in kwargs.items() if k != "num_results"}
|
||||
return SearxSearchResults(wrapper=SearxSearchWrapper(**wrapper_kwargs), **kwargs)
|
||||
|
||||
|
||||
def _get_bing_search(**kwargs: Any) -> BaseTool:
|
||||
@ -195,10 +197,14 @@ _EXTRA_OPTIONAL_TOOLS = {
|
||||
_get_google_search_results_json,
|
||||
["google_api_key", "google_cse_id", "num_results"],
|
||||
),
|
||||
"searx-search-results-json": (
|
||||
_get_searx_search_results_json,
|
||||
["searx_host", "engines", "num_results", "aiosession"],
|
||||
),
|
||||
"bing-search": (_get_bing_search, ["bing_subscription_key", "bing_search_url"]),
|
||||
"google-serper": (_get_google_serper, ["serper_api_key"]),
|
||||
"serpapi": (_get_serpapi, ["serpapi_api_key", "aiosession"]),
|
||||
"searx-search": (_get_searx_search, ["searx_host"]),
|
||||
"searx-search": (_get_searx_search, ["searx_host", "engines", "aiosession"]),
|
||||
"wikipedia": (_get_wikipedia, ["top_k_results"]),
|
||||
"human": (_get_human_tool, ["prompt_func", "input_func"]),
|
||||
}
|
||||
|
0
langchain/tools/searx_search/__init__.py
Normal file
0
langchain/tools/searx_search/__init__.py
Normal file
51
langchain/tools/searx_search/tool.py
Normal file
51
langchain/tools/searx_search/tool.py
Normal file
@ -0,0 +1,51 @@
|
||||
"""Tool for the SearxNG search API."""
|
||||
from pydantic import Extra
|
||||
|
||||
from langchain.tools.base import BaseTool
|
||||
from langchain.utilities.searx_search import SearxSearchWrapper
|
||||
|
||||
|
||||
class SearxSearchRun(BaseTool):
|
||||
"""Tool that adds the capability to query a Searx instance."""
|
||||
|
||||
name = "Searx Search"
|
||||
description = (
|
||||
"A meta search engine."
|
||||
"Useful for when you need to answer questions about current events."
|
||||
"Input should be a search query."
|
||||
)
|
||||
wrapper: SearxSearchWrapper
|
||||
|
||||
def _run(self, query: str) -> str:
|
||||
"""Use the tool."""
|
||||
return self.wrapper.run(query)
|
||||
|
||||
async def _arun(self, query: str) -> str:
|
||||
"""Use the tool asynchronously."""
|
||||
return await self.wrapper.arun(query)
|
||||
|
||||
|
||||
class SearxSearchResults(BaseTool):
|
||||
"""Tool that has capability to query a Searx instance and get back json."""
|
||||
|
||||
name = "Searx Search"
|
||||
description = (
|
||||
"A meta search engine."
|
||||
"Useful for when you need to answer questions about current events."
|
||||
"Input should be a search query. Output is a JSON array of the query results"
|
||||
)
|
||||
wrapper: SearxSearchWrapper
|
||||
num_results: int = 4
|
||||
|
||||
class Config:
|
||||
"""Pydantic config."""
|
||||
|
||||
extra = Extra.allow
|
||||
|
||||
def _run(self, query: str) -> str:
|
||||
"""Use the tool."""
|
||||
return str(self.wrapper.results(query, self.num_results))
|
||||
|
||||
async def _arun(self, query: str) -> str:
|
||||
"""Use the tool asynchronously."""
|
||||
return (await self.wrapper.aresults(query, self.num_results)).__str__()
|
@ -15,7 +15,7 @@ Quick Start
|
||||
-----------
|
||||
|
||||
|
||||
In order to use this tool you need to provide the searx host. This can be done
|
||||
In order to use this utility you need to provide the searx host. This can be done
|
||||
by passing the named parameter :attr:`searx_host <SearxSearchWrapper.searx_host>`
|
||||
or exporting the environment variable SEARX_HOST.
|
||||
Note: this is the only required parameter.
|
||||
@ -129,6 +129,7 @@ For a list of public SearxNG instances see https://searx.space/
|
||||
import json
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
import aiohttp
|
||||
import requests
|
||||
from pydantic import BaseModel, Extra, Field, PrivateAttr, root_validator, validator
|
||||
|
||||
@ -204,6 +205,13 @@ class SearxSearchWrapper(BaseModel):
|
||||
engines: Optional[List[str]] = []
|
||||
query_suffix: Optional[str] = ""
|
||||
k: int = 10
|
||||
aiosession: Optional[Any] = None
|
||||
|
||||
class Config:
|
||||
"""Configuration for this pydantic object."""
|
||||
|
||||
extra = Extra.forbid
|
||||
arbitrary_types_allowed = True
|
||||
|
||||
@validator("unsecure")
|
||||
def disable_ssl_warnings(cls, v: bool) -> bool:
|
||||
@ -244,11 +252,6 @@ class SearxSearchWrapper(BaseModel):
|
||||
|
||||
return values
|
||||
|
||||
class Config:
|
||||
"""Configuration for this pydantic object."""
|
||||
|
||||
extra = Extra.forbid
|
||||
|
||||
def _searx_api_query(self, params: dict) -> SearxResults:
|
||||
"""Actual request to searx API."""
|
||||
raw_result = requests.get(
|
||||
@ -264,6 +267,33 @@ class SearxSearchWrapper(BaseModel):
|
||||
self._result = res
|
||||
return res
|
||||
|
||||
async def _asearx_api_query(self, params: dict) -> SearxResults:
|
||||
if not self.aiosession:
|
||||
async with aiohttp.ClientSession() as session:
|
||||
async with session.get(
|
||||
self.searx_host,
|
||||
headers=self.headers,
|
||||
params=params,
|
||||
ssl=(lambda: False if self.unsecure else None)(),
|
||||
) as response:
|
||||
if not response.ok:
|
||||
raise ValueError("Searx API returned an error: ", response.text)
|
||||
result = SearxResults(await response.text())
|
||||
self._result = result
|
||||
else:
|
||||
async with self.aiosession.get(
|
||||
self.searx_host,
|
||||
headers=self.headers,
|
||||
params=params,
|
||||
verify=not self.unsecure,
|
||||
) as response:
|
||||
if not response.ok:
|
||||
raise ValueError("Searx API returned an error: ", response.text)
|
||||
result = SearxResults(await response.text())
|
||||
self._result = result
|
||||
|
||||
return result
|
||||
|
||||
def run(
|
||||
self,
|
||||
query: str,
|
||||
@ -281,6 +311,13 @@ class SearxSearchWrapper(BaseModel):
|
||||
engines: List of engines to use for the query.
|
||||
**kwargs: extra parameters to pass to the searx API.
|
||||
|
||||
Returns:
|
||||
str: The result of the query.
|
||||
|
||||
Raises:
|
||||
ValueError: If an error occured with the query.
|
||||
|
||||
|
||||
Example:
|
||||
This will make a query to the qwant engine:
|
||||
|
||||
@ -321,6 +358,41 @@ class SearxSearchWrapper(BaseModel):
|
||||
|
||||
return toret
|
||||
|
||||
async def arun(
|
||||
self,
|
||||
query: str,
|
||||
engines: Optional[List[str]] = None,
|
||||
query_suffix: Optional[str] = "",
|
||||
**kwargs: Any,
|
||||
) -> str:
|
||||
"""Asynchronously version of `run`."""
|
||||
_params = {
|
||||
"q": query,
|
||||
}
|
||||
params = {**self.params, **_params, **kwargs}
|
||||
|
||||
if self.query_suffix and len(self.query_suffix) > 0:
|
||||
params["q"] += " " + self.query_suffix
|
||||
|
||||
if isinstance(query_suffix, str) and len(query_suffix) > 0:
|
||||
params["q"] += " " + query_suffix
|
||||
|
||||
if isinstance(engines, list) and len(engines) > 0:
|
||||
params["engines"] = ",".join(engines)
|
||||
|
||||
res = await self._asearx_api_query(params)
|
||||
|
||||
if len(res.answers) > 0:
|
||||
toret = res.answers[0]
|
||||
|
||||
# only return the content of the results list
|
||||
elif len(res.results) > 0:
|
||||
toret = "\n\n".join([r.get("content", "") for r in res.results[: self.k]])
|
||||
else:
|
||||
toret = "No good search result found"
|
||||
|
||||
return toret
|
||||
|
||||
def results(
|
||||
self,
|
||||
query: str,
|
||||
@ -383,3 +455,41 @@ class SearxSearchWrapper(BaseModel):
|
||||
}
|
||||
for result in results
|
||||
]
|
||||
|
||||
async def aresults(
|
||||
self,
|
||||
query: str,
|
||||
num_results: int,
|
||||
engines: Optional[List[str]] = None,
|
||||
query_suffix: Optional[str] = "",
|
||||
**kwargs: Any,
|
||||
) -> List[Dict]:
|
||||
"""Asynchronously query with json results.
|
||||
|
||||
Uses aiohttp. See `results` for more info.
|
||||
"""
|
||||
_params = {
|
||||
"q": query,
|
||||
}
|
||||
params = {**self.params, **_params, **kwargs}
|
||||
|
||||
if self.query_suffix and len(self.query_suffix) > 0:
|
||||
params["q"] += " " + self.query_suffix
|
||||
if isinstance(query_suffix, str) and len(query_suffix) > 0:
|
||||
params["q"] += " " + query_suffix
|
||||
if isinstance(engines, list) and len(engines) > 0:
|
||||
params["engines"] = ",".join(engines)
|
||||
results = (await self._asearx_api_query(params)).results[:num_results]
|
||||
if len(results) == 0:
|
||||
return [{"Result": "No good Search Result was found"}]
|
||||
|
||||
return [
|
||||
{
|
||||
"snippet": result.get("content", ""),
|
||||
"title": result["title"],
|
||||
"link": result["url"],
|
||||
"engines": result["engines"],
|
||||
"category": result["category"],
|
||||
}
|
||||
for result in results
|
||||
]
|
||||
|
Loading…
Reference in New Issue
Block a user