mirror of
https://github.com/hwchase17/langchain.git
synced 2025-07-06 13:18:12 +00:00
Add Wikipedia search utility and tool (#1561)
The Python `wikipedia` package gives easy access for searching and fetching pages from Wikipedia, see https://pypi.org/project/wikipedia/. It can serve as an additional search and retrieval tool, like the existing Google and SerpAPI helpers, for both chains and agents.
This commit is contained in:
parent
b44c8bd969
commit
cdb97f3dfb
@ -48,6 +48,7 @@ from langchain.utilities.google_search import GoogleSearchAPIWrapper
|
||||
from langchain.utilities.google_serper import GoogleSerperAPIWrapper
|
||||
from langchain.utilities.searx_search import SearxSearchWrapper
|
||||
from langchain.utilities.serpapi import SerpAPIWrapper
|
||||
from langchain.utilities.wikipedia import WikipediaAPIWrapper
|
||||
from langchain.utilities.wolfram_alpha import WolframAlphaAPIWrapper
|
||||
from langchain.vectorstores import FAISS, ElasticVectorSearch
|
||||
|
||||
@ -70,6 +71,7 @@ __all__ = [
|
||||
"GoogleSearchAPIWrapper",
|
||||
"GoogleSerperAPIWrapper",
|
||||
"WolframAlphaAPIWrapper",
|
||||
"WikipediaAPIWrapper",
|
||||
"Anthropic",
|
||||
"Banana",
|
||||
"CerebriumAI",
|
||||
|
@ -9,12 +9,13 @@ from langchain.chains.api.base import APIChain
|
||||
from langchain.chains.llm_math.base import LLMMathChain
|
||||
from langchain.chains.pal.base import PALChain
|
||||
from langchain.llms.base import BaseLLM
|
||||
from langchain.tools.python.tool import PythonREPLTool
|
||||
from langchain.requests import RequestsWrapper
|
||||
from langchain.tools.base import BaseTool
|
||||
from langchain.tools.bing_search.tool import BingSearchRun
|
||||
from langchain.tools.google_search.tool import GoogleSearchResults, GoogleSearchRun
|
||||
from langchain.tools.python.tool import PythonREPLTool
|
||||
from langchain.tools.requests.tool import RequestsGetTool
|
||||
from langchain.tools.wikipedia.tool import WikipediaQueryRun
|
||||
from langchain.tools.wolfram_alpha.tool import WolframAlphaQueryRun
|
||||
from langchain.utilities.bash import BashProcess
|
||||
from langchain.utilities.bing_search import BingSearchAPIWrapper
|
||||
@ -22,6 +23,7 @@ from langchain.utilities.google_search import GoogleSearchAPIWrapper
|
||||
from langchain.utilities.google_serper import GoogleSerperAPIWrapper
|
||||
from langchain.utilities.searx_search import SearxSearchWrapper
|
||||
from langchain.utilities.serpapi import SerpAPIWrapper
|
||||
from langchain.utilities.wikipedia import WikipediaAPIWrapper
|
||||
from langchain.utilities.wolfram_alpha import WolframAlphaAPIWrapper
|
||||
|
||||
|
||||
@ -124,6 +126,10 @@ def _get_google_search(**kwargs: Any) -> BaseTool:
|
||||
return GoogleSearchRun(api_wrapper=GoogleSearchAPIWrapper(**kwargs))
|
||||
|
||||
|
||||
def _get_wikipedia(**kwargs: Any) -> BaseTool:
|
||||
return WikipediaQueryRun(api_wrapper=WikipediaAPIWrapper(**kwargs))
|
||||
|
||||
|
||||
def _get_google_serper(**kwargs: Any) -> BaseTool:
|
||||
return Tool(
|
||||
name="Serper Search",
|
||||
@ -173,6 +179,7 @@ _EXTRA_OPTIONAL_TOOLS = {
|
||||
"google-serper": (_get_google_serper, ["serper_api_key"]),
|
||||
"serpapi": (_get_serpapi, ["serpapi_api_key", "aiosession"]),
|
||||
"searx-search": (_get_searx_search, ["searx_host"]),
|
||||
"wikipedia": (_get_wikipedia, ["top_k_results"]),
|
||||
}
|
||||
|
||||
|
||||
|
1
langchain/tools/wikipedia/__init__.py
Normal file
1
langchain/tools/wikipedia/__init__.py
Normal file
@ -0,0 +1 @@
|
||||
"""Wikipedia API toolkit."""
|
25
langchain/tools/wikipedia/tool.py
Normal file
25
langchain/tools/wikipedia/tool.py
Normal file
@ -0,0 +1,25 @@
|
||||
"""Tool for the Wolfram Alpha API."""
|
||||
|
||||
from langchain.tools.base import BaseTool
|
||||
from langchain.utilities.wikipedia import WikipediaAPIWrapper
|
||||
|
||||
|
||||
class WikipediaQueryRun(BaseTool):
|
||||
"""Tool that adds the capability to search using the Wikipedia API."""
|
||||
|
||||
name = "Wikipedia"
|
||||
description = (
|
||||
"A wrapper around Wikipedia. "
|
||||
"Useful for when you need to answer general questions about "
|
||||
"people, places, companies, historical events, or other subjects. "
|
||||
"Input should be a search query."
|
||||
)
|
||||
api_wrapper: WikipediaAPIWrapper
|
||||
|
||||
def _run(self, query: str) -> str:
|
||||
"""Use the Wikipedia tool."""
|
||||
return self.api_wrapper.run(query)
|
||||
|
||||
async def _arun(self, query: str) -> str:
|
||||
"""Use the Wikipedia tool asynchronously."""
|
||||
raise NotImplementedError("WikipediaQueryRun does not support async")
|
@ -7,6 +7,7 @@ from langchain.utilities.google_search import GoogleSearchAPIWrapper
|
||||
from langchain.utilities.google_serper import GoogleSerperAPIWrapper
|
||||
from langchain.utilities.searx_search import SearxSearchWrapper
|
||||
from langchain.utilities.serpapi import SerpAPIWrapper
|
||||
from langchain.utilities.wikipedia import WikipediaAPIWrapper
|
||||
from langchain.utilities.wolfram_alpha import WolframAlphaAPIWrapper
|
||||
|
||||
__all__ = [
|
||||
@ -19,4 +20,5 @@ __all__ = [
|
||||
"SerpAPIWrapper",
|
||||
"SearxSearchWrapper",
|
||||
"BingSearchAPIWrapper",
|
||||
"WikipediaAPIWrapper",
|
||||
]
|
||||
|
56
langchain/utilities/wikipedia.py
Normal file
56
langchain/utilities/wikipedia.py
Normal file
@ -0,0 +1,56 @@
|
||||
"""Util that calls Wikipedia."""
|
||||
from typing import Any, Dict, Optional
|
||||
|
||||
from pydantic import BaseModel, Extra, root_validator
|
||||
|
||||
|
||||
class WikipediaAPIWrapper(BaseModel):
|
||||
"""Wrapper around WikipediaAPI.
|
||||
|
||||
To use, you should have the ``wikipedia`` python package installed.
|
||||
This wrapper will use the Wikipedia API to conduct searches and
|
||||
fetch page summaries. By default, it will return the page summaries
|
||||
of the top-k results of an input search.
|
||||
"""
|
||||
|
||||
wiki_client: Any #: :meta private:
|
||||
top_k_results: int = 3
|
||||
|
||||
class Config:
|
||||
"""Configuration for this pydantic object."""
|
||||
|
||||
extra = Extra.forbid
|
||||
|
||||
@root_validator()
|
||||
def validate_environment(cls, values: Dict) -> Dict:
|
||||
"""Validate that the python package exists in environment."""
|
||||
try:
|
||||
import wikipedia
|
||||
|
||||
values["wiki_client"] = wikipedia
|
||||
except ImportError:
|
||||
raise ValueError(
|
||||
"Could not import wikipedia python package. "
|
||||
"Please it install it with `pip install wikipedia`."
|
||||
)
|
||||
return values
|
||||
|
||||
def run(self, query: str) -> str:
|
||||
"""Run Wikipedia search and get page summaries."""
|
||||
search_results = self.wiki_client.search(query)
|
||||
summaries = []
|
||||
for i in range(min(self.top_k_results, len(search_results))):
|
||||
summary = self.fetch_formatted_page_summary(search_results[i])
|
||||
if summary is not None:
|
||||
summaries.append(summary)
|
||||
return "\n\n".join(summaries)
|
||||
|
||||
def fetch_formatted_page_summary(self, page: str) -> Optional[str]:
|
||||
try:
|
||||
wiki_page = self.wiki_client.page(title=page)
|
||||
return f"Page: {page}\nSummary: {wiki_page.summary}"
|
||||
except (
|
||||
self.wiki_client.exceptions.PageError,
|
||||
self.wiki_client.exceptions.DisambiguationError,
|
||||
):
|
||||
return None
|
Loading…
Reference in New Issue
Block a user