mirror of
				https://github.com/hwchase17/langchain.git
				synced 2025-10-26 05:10:22 +00:00 
			
		
		
		
	Moved the following modules to new package langchain-community in a backwards compatible fashion: ``` mv langchain/langchain/adapters community/langchain_community mv langchain/langchain/callbacks community/langchain_community/callbacks mv langchain/langchain/chat_loaders community/langchain_community mv langchain/langchain/chat_models community/langchain_community mv langchain/langchain/document_loaders community/langchain_community mv langchain/langchain/docstore community/langchain_community mv langchain/langchain/document_transformers community/langchain_community mv langchain/langchain/embeddings community/langchain_community mv langchain/langchain/graphs community/langchain_community mv langchain/langchain/llms community/langchain_community mv langchain/langchain/memory/chat_message_histories community/langchain_community mv langchain/langchain/retrievers community/langchain_community mv langchain/langchain/storage community/langchain_community mv langchain/langchain/tools community/langchain_community mv langchain/langchain/utilities community/langchain_community mv langchain/langchain/vectorstores community/langchain_community mv langchain/langchain/agents/agent_toolkits community/langchain_community mv langchain/langchain/cache.py community/langchain_community mv langchain/langchain/adapters community/langchain_community mv langchain/langchain/callbacks community/langchain_community/callbacks mv langchain/langchain/chat_loaders community/langchain_community mv langchain/langchain/chat_models community/langchain_community mv langchain/langchain/document_loaders community/langchain_community mv langchain/langchain/docstore community/langchain_community mv langchain/langchain/document_transformers community/langchain_community mv langchain/langchain/embeddings community/langchain_community mv langchain/langchain/graphs community/langchain_community mv langchain/langchain/llms community/langchain_community mv langchain/langchain/memory/chat_message_histories community/langchain_community mv langchain/langchain/retrievers community/langchain_community mv langchain/langchain/storage community/langchain_community mv langchain/langchain/tools community/langchain_community mv langchain/langchain/utilities community/langchain_community mv langchain/langchain/vectorstores community/langchain_community mv langchain/langchain/agents/agent_toolkits community/langchain_community mv langchain/langchain/cache.py community/langchain_community ``` Moved the following to core ``` mv langchain/langchain/utils/json_schema.py core/langchain_core/utils mv langchain/langchain/utils/html.py core/langchain_core/utils mv langchain/langchain/utils/strings.py core/langchain_core/utils cat langchain/langchain/utils/env.py >> core/langchain_core/utils/env.py rm langchain/langchain/utils/env.py ``` See .scripts/community_split/script_integrations.sh for all changes
		
			
				
	
	
		
			506 lines
		
	
	
		
			16 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			506 lines
		
	
	
		
			16 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| """Utility for using SearxNG meta search API.
 | |
| 
 | |
| SearxNG is a privacy-friendly free metasearch engine that aggregates results from
 | |
| `multiple search engines
 | |
| <https://docs.searxng.org/admin/engines/configured_engines.html>`_ and databases and
 | |
| supports the `OpenSearch
 | |
| <https://github.com/dewitt/opensearch/blob/master/opensearch-1-1-draft-6.md>`_
 | |
| specification.
 | |
| 
 | |
| More details on the installation instructions `here. <../../integrations/searx.html>`_
 | |
| 
 | |
| For the search API refer to https://docs.searxng.org/dev/search_api.html
 | |
| 
 | |
| Quick Start
 | |
| -----------
 | |
| 
 | |
| 
 | |
| In order to use this utility you need to provide the searx host. This can be done
 | |
| by passing the named parameter :attr:`searx_host <SearxSearchWrapper.searx_host>`
 | |
| or exporting the environment variable SEARX_HOST.
 | |
| Note: this is the only required parameter.
 | |
| 
 | |
| Then create a searx search instance like this:
 | |
| 
 | |
|     .. code-block:: python
 | |
| 
 | |
|         from langchain_community.utilities import SearxSearchWrapper
 | |
| 
 | |
|         # when the host starts with `http` SSL is disabled and the connection
 | |
|         # is assumed to be on a private network
 | |
|         searx_host='http://self.hosted'
 | |
| 
 | |
|         search = SearxSearchWrapper(searx_host=searx_host)
 | |
| 
 | |
| 
 | |
| You can now use the ``search`` instance to query the searx API.
 | |
| 
 | |
| Searching
 | |
| ---------
 | |
| 
 | |
| Use the :meth:`run() <SearxSearchWrapper.run>` and
 | |
| :meth:`results() <SearxSearchWrapper.results>` methods to query the searx API.
 | |
| Other methods are available for convenience.
 | |
| 
 | |
| :class:`SearxResults` is a convenience wrapper around the raw json result.
 | |
| 
 | |
| Example usage of the ``run`` method to make a search:
 | |
| 
 | |
|     .. code-block:: python
 | |
| 
 | |
|         s.run(query="what is the best search engine?")
 | |
| 
 | |
| Engine Parameters
 | |
| -----------------
 | |
| 
 | |
| You can pass any `accepted searx search API
 | |
| <https://docs.searxng.org/dev/search_api.html>`_ parameters to the
 | |
| :py:class:`SearxSearchWrapper` instance.
 | |
| 
 | |
| In the following example we are using the
 | |
| :attr:`engines <SearxSearchWrapper.engines>` and the ``language`` parameters:
 | |
| 
 | |
|     .. code-block:: python
 | |
| 
 | |
|         # assuming the searx host is set as above or exported as an env variable
 | |
|         s = SearxSearchWrapper(engines=['google', 'bing'],
 | |
|                             language='es')
 | |
| 
 | |
| Search Tips
 | |
| -----------
 | |
| 
 | |
| Searx offers a special
 | |
| `search syntax <https://docs.searxng.org/user/index.html#search-syntax>`_
 | |
| that can also be used instead of passing engine parameters.
 | |
| 
 | |
| For example the following query:
 | |
| 
 | |
|     .. code-block:: python
 | |
| 
 | |
|         s = SearxSearchWrapper("langchain library", engines=['github'])
 | |
| 
 | |
|         # can also be written as:
 | |
|         s = SearxSearchWrapper("langchain library !github")
 | |
|         # or even:
 | |
|         s = SearxSearchWrapper("langchain library !gh")
 | |
| 
 | |
| 
 | |
| In some situations you might want to pass an extra string to the search query.
 | |
| For example when the `run()` method is called by an agent. The search suffix can
 | |
| also be used as a way to pass extra parameters to searx or the underlying search
 | |
| engines.
 | |
| 
 | |
|     .. code-block:: python
 | |
| 
 | |
|         # select the github engine and pass the search suffix
 | |
|         s = SearchWrapper("langchain library", query_suffix="!gh")
 | |
| 
 | |
| 
 | |
|         s = SearchWrapper("langchain library")
 | |
|         # select github the conventional google search syntax
 | |
|         s.run("large language models", query_suffix="site:github.com")
 | |
| 
 | |
| 
 | |
| *NOTE*: A search suffix can be defined on both the instance and the method level.
 | |
| The resulting query will be the concatenation of the two with the former taking
 | |
| precedence.
 | |
| 
 | |
| 
 | |
| See `SearxNG Configured Engines
 | |
| <https://docs.searxng.org/admin/engines/configured_engines.html>`_ and
 | |
| `SearxNG Search Syntax <https://docs.searxng.org/user/index.html#id1>`_
 | |
| for more details.
 | |
| 
 | |
| Notes
 | |
| -----
 | |
| This wrapper is based on the SearxNG fork https://github.com/searxng/searxng which is
 | |
| better maintained than the original Searx project and offers more features.
 | |
| 
 | |
| Public searxNG instances often use a rate limiter for API usage, so you might want to
 | |
| use a self hosted instance and disable the rate limiter.
 | |
| 
 | |
| If you are self-hosting an instance you can customize the rate limiter for your
 | |
| own network as described
 | |
| `here <https://docs.searxng.org/src/searx.botdetection.html#limiter-src>`_.
 | |
| 
 | |
| 
 | |
| For a list of public SearxNG instances see https://searx.space/
 | |
| """
 | |
| 
 | |
| import json
 | |
| from typing import Any, Dict, List, Optional
 | |
| 
 | |
| import aiohttp
 | |
| import requests
 | |
| from langchain_core.pydantic_v1 import (
 | |
|     BaseModel,
 | |
|     Extra,
 | |
|     Field,
 | |
|     PrivateAttr,
 | |
|     root_validator,
 | |
|     validator,
 | |
| )
 | |
| from langchain_core.utils import get_from_dict_or_env
 | |
| 
 | |
| 
 | |
| def _get_default_params() -> dict:
 | |
|     return {"language": "en", "format": "json"}
 | |
| 
 | |
| 
 | |
| class SearxResults(dict):
 | |
|     """Dict like wrapper around search api results."""
 | |
| 
 | |
|     _data: str = ""
 | |
| 
 | |
|     def __init__(self, data: str):
 | |
|         """Take a raw result from Searx and make it into a dict like object."""
 | |
|         json_data = json.loads(data)
 | |
|         super().__init__(json_data)
 | |
|         self.__dict__ = self
 | |
| 
 | |
|     def __str__(self) -> str:
 | |
|         """Text representation of searx result."""
 | |
|         return self._data
 | |
| 
 | |
|     @property
 | |
|     def results(self) -> Any:
 | |
|         """Silence mypy for accessing this field.
 | |
| 
 | |
|         :meta private:
 | |
|         """
 | |
|         return self.get("results")
 | |
| 
 | |
|     @property
 | |
|     def answers(self) -> Any:
 | |
|         """Helper accessor on the json result."""
 | |
|         return self.get("answers")
 | |
| 
 | |
| 
 | |
| class SearxSearchWrapper(BaseModel):
 | |
|     """Wrapper for Searx API.
 | |
| 
 | |
|     To use you need to provide the searx host by passing the named parameter
 | |
|     ``searx_host`` or exporting the environment variable ``SEARX_HOST``.
 | |
| 
 | |
|     In some situations you might want to disable SSL verification, for example
 | |
|     if you are running searx locally. You can do this by passing the named parameter
 | |
|     ``unsecure``. You can also pass the host url scheme as ``http`` to disable SSL.
 | |
| 
 | |
|     Example:
 | |
|         .. code-block:: python
 | |
| 
 | |
|             from langchain_community.utilities import SearxSearchWrapper
 | |
|             searx = SearxSearchWrapper(searx_host="http://localhost:8888")
 | |
| 
 | |
|     Example with SSL disabled:
 | |
|         .. code-block:: python
 | |
| 
 | |
|             from langchain_community.utilities import SearxSearchWrapper
 | |
|             # note the unsecure parameter is not needed if you pass the url scheme as
 | |
|             # http
 | |
|             searx = SearxSearchWrapper(searx_host="http://localhost:8888",
 | |
|                                                     unsecure=True)
 | |
| 
 | |
| 
 | |
|     """
 | |
| 
 | |
|     _result: SearxResults = PrivateAttr()
 | |
|     searx_host: str = ""
 | |
|     unsecure: bool = False
 | |
|     params: dict = Field(default_factory=_get_default_params)
 | |
|     headers: Optional[dict] = None
 | |
|     engines: Optional[List[str]] = []
 | |
|     categories: Optional[List[str]] = []
 | |
|     query_suffix: Optional[str] = ""
 | |
|     k: int = 10
 | |
|     aiosession: Optional[Any] = None
 | |
| 
 | |
|     @validator("unsecure")
 | |
|     def disable_ssl_warnings(cls, v: bool) -> bool:
 | |
|         """Disable SSL warnings."""
 | |
|         if v:
 | |
|             # requests.urllib3.disable_warnings()
 | |
|             try:
 | |
|                 import urllib3
 | |
| 
 | |
|                 urllib3.disable_warnings()
 | |
|             except ImportError as e:
 | |
|                 print(e)
 | |
| 
 | |
|         return v
 | |
| 
 | |
|     @root_validator()
 | |
|     def validate_params(cls, values: Dict) -> Dict:
 | |
|         """Validate that custom searx params are merged with default ones."""
 | |
|         user_params = values["params"]
 | |
|         default = _get_default_params()
 | |
|         values["params"] = {**default, **user_params}
 | |
| 
 | |
|         engines = values.get("engines")
 | |
|         if engines:
 | |
|             values["params"]["engines"] = ",".join(engines)
 | |
| 
 | |
|         categories = values.get("categories")
 | |
|         if categories:
 | |
|             values["params"]["categories"] = ",".join(categories)
 | |
| 
 | |
|         searx_host = get_from_dict_or_env(values, "searx_host", "SEARX_HOST")
 | |
|         if not searx_host.startswith("http"):
 | |
|             print(
 | |
|                 f"Warning: missing the url scheme on host \
 | |
|                 ! assuming secure https://{searx_host} "
 | |
|             )
 | |
|             searx_host = "https://" + searx_host
 | |
|         elif searx_host.startswith("http://"):
 | |
|             values["unsecure"] = True
 | |
|             cls.disable_ssl_warnings(True)
 | |
|         values["searx_host"] = searx_host
 | |
| 
 | |
|         return values
 | |
| 
 | |
|     class Config:
 | |
|         """Configuration for this pydantic object."""
 | |
| 
 | |
|         extra = Extra.forbid
 | |
| 
 | |
|     def _searx_api_query(self, params: dict) -> SearxResults:
 | |
|         """Actual request to searx API."""
 | |
|         raw_result = requests.get(
 | |
|             self.searx_host,
 | |
|             headers=self.headers,
 | |
|             params=params,
 | |
|             verify=not self.unsecure,
 | |
|         )
 | |
|         # test if http result is ok
 | |
|         if not raw_result.ok:
 | |
|             raise ValueError("Searx API returned an error: ", raw_result.text)
 | |
|         res = SearxResults(raw_result.text)
 | |
|         self._result = res
 | |
|         return res
 | |
| 
 | |
|     async def _asearx_api_query(self, params: dict) -> SearxResults:
 | |
|         if not self.aiosession:
 | |
|             async with aiohttp.ClientSession() as session:
 | |
|                 async with session.get(
 | |
|                     self.searx_host,
 | |
|                     headers=self.headers,
 | |
|                     params=params,
 | |
|                     ssl=(lambda: False if self.unsecure else None)(),
 | |
|                 ) as response:
 | |
|                     if not response.ok:
 | |
|                         raise ValueError("Searx API returned an error: ", response.text)
 | |
|                     result = SearxResults(await response.text())
 | |
|                     self._result = result
 | |
|         else:
 | |
|             async with self.aiosession.get(
 | |
|                 self.searx_host,
 | |
|                 headers=self.headers,
 | |
|                 params=params,
 | |
|                 verify=not self.unsecure,
 | |
|             ) as response:
 | |
|                 if not response.ok:
 | |
|                     raise ValueError("Searx API returned an error: ", response.text)
 | |
|                 result = SearxResults(await response.text())
 | |
|                 self._result = result
 | |
| 
 | |
|         return result
 | |
| 
 | |
|     def run(
 | |
|         self,
 | |
|         query: str,
 | |
|         engines: Optional[List[str]] = None,
 | |
|         categories: Optional[List[str]] = None,
 | |
|         query_suffix: Optional[str] = "",
 | |
|         **kwargs: Any,
 | |
|     ) -> str:
 | |
|         """Run query through Searx API and parse results.
 | |
| 
 | |
|         You can pass any other params to the searx query API.
 | |
| 
 | |
|         Args:
 | |
|             query: The query to search for.
 | |
|             query_suffix: Extra suffix appended to the query.
 | |
|             engines: List of engines to use for the query.
 | |
|             categories: List of categories to use for the query.
 | |
|             **kwargs: extra parameters to pass to the searx API.
 | |
| 
 | |
|         Returns:
 | |
|             str: The result of the query.
 | |
| 
 | |
|         Raises:
 | |
|             ValueError: If an error occurred with the query.
 | |
| 
 | |
| 
 | |
|         Example:
 | |
|             This will make a query to the qwant engine:
 | |
| 
 | |
|             .. code-block:: python
 | |
| 
 | |
|                 from langchain_community.utilities import SearxSearchWrapper
 | |
|                 searx = SearxSearchWrapper(searx_host="http://my.searx.host")
 | |
|                 searx.run("what is the weather in France ?", engine="qwant")
 | |
| 
 | |
|                 # the same result can be achieved using the `!` syntax of searx
 | |
|                 # to select the engine using `query_suffix`
 | |
|                 searx.run("what is the weather in France ?", query_suffix="!qwant")
 | |
|         """
 | |
|         _params = {
 | |
|             "q": query,
 | |
|         }
 | |
|         params = {**self.params, **_params, **kwargs}
 | |
| 
 | |
|         if self.query_suffix and len(self.query_suffix) > 0:
 | |
|             params["q"] += " " + self.query_suffix
 | |
| 
 | |
|         if isinstance(query_suffix, str) and len(query_suffix) > 0:
 | |
|             params["q"] += " " + query_suffix
 | |
| 
 | |
|         if isinstance(engines, list) and len(engines) > 0:
 | |
|             params["engines"] = ",".join(engines)
 | |
| 
 | |
|         if isinstance(categories, list) and len(categories) > 0:
 | |
|             params["categories"] = ",".join(categories)
 | |
| 
 | |
|         res = self._searx_api_query(params)
 | |
| 
 | |
|         if len(res.answers) > 0:
 | |
|             toret = res.answers[0]
 | |
| 
 | |
|         # only return the content of the results list
 | |
|         elif len(res.results) > 0:
 | |
|             toret = "\n\n".join([r.get("content", "") for r in res.results[: self.k]])
 | |
|         else:
 | |
|             toret = "No good search result found"
 | |
| 
 | |
|         return toret
 | |
| 
 | |
|     async def arun(
 | |
|         self,
 | |
|         query: str,
 | |
|         engines: Optional[List[str]] = None,
 | |
|         query_suffix: Optional[str] = "",
 | |
|         **kwargs: Any,
 | |
|     ) -> str:
 | |
|         """Asynchronously version of `run`."""
 | |
|         _params = {
 | |
|             "q": query,
 | |
|         }
 | |
|         params = {**self.params, **_params, **kwargs}
 | |
| 
 | |
|         if self.query_suffix and len(self.query_suffix) > 0:
 | |
|             params["q"] += " " + self.query_suffix
 | |
| 
 | |
|         if isinstance(query_suffix, str) and len(query_suffix) > 0:
 | |
|             params["q"] += " " + query_suffix
 | |
| 
 | |
|         if isinstance(engines, list) and len(engines) > 0:
 | |
|             params["engines"] = ",".join(engines)
 | |
| 
 | |
|         res = await self._asearx_api_query(params)
 | |
| 
 | |
|         if len(res.answers) > 0:
 | |
|             toret = res.answers[0]
 | |
| 
 | |
|         # only return the content of the results list
 | |
|         elif len(res.results) > 0:
 | |
|             toret = "\n\n".join([r.get("content", "") for r in res.results[: self.k]])
 | |
|         else:
 | |
|             toret = "No good search result found"
 | |
| 
 | |
|         return toret
 | |
| 
 | |
|     def results(
 | |
|         self,
 | |
|         query: str,
 | |
|         num_results: int,
 | |
|         engines: Optional[List[str]] = None,
 | |
|         categories: Optional[List[str]] = None,
 | |
|         query_suffix: Optional[str] = "",
 | |
|         **kwargs: Any,
 | |
|     ) -> List[Dict]:
 | |
|         """Run query through Searx API and returns the results with metadata.
 | |
| 
 | |
|         Args:
 | |
|             query: The query to search for.
 | |
|             query_suffix: Extra suffix appended to the query.
 | |
|             num_results: Limit the number of results to return.
 | |
|             engines: List of engines to use for the query.
 | |
|             categories: List of categories to use for the query.
 | |
|             **kwargs: extra parameters to pass to the searx API.
 | |
| 
 | |
|         Returns:
 | |
|             Dict with the following keys:
 | |
|             {
 | |
|                 snippet:  The description of the result.
 | |
|                 title:  The title of the result.
 | |
|                 link: The link to the result.
 | |
|                 engines: The engines used for the result.
 | |
|                 category: Searx category of the result.
 | |
|             }
 | |
| 
 | |
|         """
 | |
|         _params = {
 | |
|             "q": query,
 | |
|         }
 | |
|         params = {**self.params, **_params, **kwargs}
 | |
|         if self.query_suffix and len(self.query_suffix) > 0:
 | |
|             params["q"] += " " + self.query_suffix
 | |
|         if isinstance(query_suffix, str) and len(query_suffix) > 0:
 | |
|             params["q"] += " " + query_suffix
 | |
|         if isinstance(engines, list) and len(engines) > 0:
 | |
|             params["engines"] = ",".join(engines)
 | |
|         if isinstance(categories, list) and len(categories) > 0:
 | |
|             params["categories"] = ",".join(categories)
 | |
|         results = self._searx_api_query(params).results[:num_results]
 | |
|         if len(results) == 0:
 | |
|             return [{"Result": "No good Search Result was found"}]
 | |
| 
 | |
|         return [
 | |
|             {
 | |
|                 "snippet": result.get("content", ""),
 | |
|                 "title": result["title"],
 | |
|                 "link": result["url"],
 | |
|                 "engines": result["engines"],
 | |
|                 "category": result["category"],
 | |
|             }
 | |
|             for result in results
 | |
|         ]
 | |
| 
 | |
|     async def aresults(
 | |
|         self,
 | |
|         query: str,
 | |
|         num_results: int,
 | |
|         engines: Optional[List[str]] = None,
 | |
|         query_suffix: Optional[str] = "",
 | |
|         **kwargs: Any,
 | |
|     ) -> List[Dict]:
 | |
|         """Asynchronously query with json results.
 | |
| 
 | |
|         Uses aiohttp. See `results` for more info.
 | |
|         """
 | |
|         _params = {
 | |
|             "q": query,
 | |
|         }
 | |
|         params = {**self.params, **_params, **kwargs}
 | |
| 
 | |
|         if self.query_suffix and len(self.query_suffix) > 0:
 | |
|             params["q"] += " " + self.query_suffix
 | |
|         if isinstance(query_suffix, str) and len(query_suffix) > 0:
 | |
|             params["q"] += " " + query_suffix
 | |
|         if isinstance(engines, list) and len(engines) > 0:
 | |
|             params["engines"] = ",".join(engines)
 | |
|         results = (await self._asearx_api_query(params)).results[:num_results]
 | |
|         if len(results) == 0:
 | |
|             return [{"Result": "No good Search Result was found"}]
 | |
| 
 | |
|         return [
 | |
|             {
 | |
|                 "snippet": result.get("content", ""),
 | |
|                 "title": result["title"],
 | |
|                 "link": result["url"],
 | |
|                 "engines": result["engines"],
 | |
|                 "category": result["category"],
 | |
|             }
 | |
|             for result in results
 | |
|         ]
 |