mirror of
https://github.com/hwchase17/langchain.git
synced 2025-09-23 03:19:38 +00:00
Bugfix duckduckgo_search news search (#13670)
- **Description:** Bugfix duckduckgo_search news search - **Issue:** https://github.com/langchain-ai/langchain/issues/13648 - **Dependencies:** None - **Tag maintainer:** @baskaryan --------- Co-authored-by: Harrison Chase <hw.chase.17@gmail.com>
This commit is contained in:
@@ -46,11 +46,11 @@ class DuckDuckGoSearchResults(BaseTool):
|
||||
"Useful for when you need to answer questions about current events. "
|
||||
"Input should be a search query. Output is a JSON array of the query results"
|
||||
)
|
||||
num_results: int = 4
|
||||
max_results: int = Field(alias="num_results", default=4)
|
||||
api_wrapper: DuckDuckGoSearchAPIWrapper = Field(
|
||||
default_factory=DuckDuckGoSearchAPIWrapper
|
||||
)
|
||||
backend: str = "api"
|
||||
backend: str = "text"
|
||||
args_schema: Type[BaseModel] = DDGInput
|
||||
|
||||
def _run(
|
||||
@@ -59,7 +59,7 @@ class DuckDuckGoSearchResults(BaseTool):
|
||||
run_manager: Optional[CallbackManagerForToolRun] = None,
|
||||
) -> str:
|
||||
"""Use the tool."""
|
||||
res = self.api_wrapper.results(query, self.num_results, backend=self.backend)
|
||||
res = self.api_wrapper.results(query, self.max_results, source=self.backend)
|
||||
res_strs = [", ".join([f"{k}: {v}" for k, v in d.items()]) for d in res]
|
||||
return ", ".join([f"[{rs}]" for rs in res_strs])
|
||||
|
||||
|
@@ -18,6 +18,8 @@ class DuckDuckGoSearchAPIWrapper(BaseModel):
|
||||
safesearch: str = "moderate"
|
||||
time: Optional[str] = "y"
|
||||
max_results: int = 5
|
||||
backend: str = "api" # which backend to use in DDGS.text() (api, html, lite)
|
||||
source: str = "text" # which function to use in DDGS (DDGS.text() or DDGS.news())
|
||||
|
||||
class Config:
|
||||
"""Configuration for this pydantic object."""
|
||||
@@ -32,43 +34,69 @@ class DuckDuckGoSearchAPIWrapper(BaseModel):
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"Could not import duckduckgo-search python package. "
|
||||
"Please install it with `pip install duckduckgo-search`."
|
||||
"Please install it with `pip install -U duckduckgo-search`."
|
||||
)
|
||||
return values
|
||||
|
||||
def get_snippets(self, query: str) -> List[str]:
|
||||
"""Run query through DuckDuckGo and return concatenated results."""
|
||||
def _ddgs_text(
|
||||
self, query: str, max_results: Optional[int] = None
|
||||
) -> List[Dict[str, str]]:
|
||||
"""Run query through DuckDuckGo text search and return results."""
|
||||
from duckduckgo_search import DDGS
|
||||
|
||||
with DDGS() as ddgs:
|
||||
results = ddgs.text(
|
||||
ddgs_gen = ddgs.text(
|
||||
query,
|
||||
region=self.region,
|
||||
safesearch=self.safesearch,
|
||||
timelimit=self.time,
|
||||
max_results=max_results or self.max_results,
|
||||
backend=self.backend,
|
||||
)
|
||||
if results is None:
|
||||
return ["No good DuckDuckGo Search Result was found"]
|
||||
snippets = []
|
||||
for i, res in enumerate(results, 1):
|
||||
if res is not None:
|
||||
snippets.append(res["body"])
|
||||
if len(snippets) == self.max_results:
|
||||
break
|
||||
return snippets
|
||||
if ddgs_gen:
|
||||
return [r for r in ddgs_gen]
|
||||
return []
|
||||
|
||||
def _ddgs_news(
|
||||
self, query: str, max_results: Optional[int] = None
|
||||
) -> List[Dict[str, str]]:
|
||||
"""Run query through DuckDuckGo news search and return results."""
|
||||
from duckduckgo_search import DDGS
|
||||
|
||||
with DDGS() as ddgs:
|
||||
ddgs_gen = ddgs.news(
|
||||
query,
|
||||
region=self.region,
|
||||
safesearch=self.safesearch,
|
||||
timelimit=self.time,
|
||||
max_results=max_results or self.max_results,
|
||||
)
|
||||
if ddgs_gen:
|
||||
return [r for r in ddgs_gen]
|
||||
return []
|
||||
|
||||
def run(self, query: str) -> str:
|
||||
snippets = self.get_snippets(query)
|
||||
return " ".join(snippets)
|
||||
"""Run query through DuckDuckGo and return concatenated results."""
|
||||
if self.source == "text":
|
||||
results = self._ddgs_text(query)
|
||||
elif self.source == "news":
|
||||
results = self._ddgs_news(query)
|
||||
else:
|
||||
results = []
|
||||
|
||||
if not results:
|
||||
return "No good DuckDuckGo Search Result was found"
|
||||
return " ".join(r["body"] for r in results)
|
||||
|
||||
def results(
|
||||
self, query: str, num_results: int, backend: str = "api"
|
||||
self, query: str, max_results: int, source: Optional[str] = None
|
||||
) -> List[Dict[str, str]]:
|
||||
"""Run query through DuckDuckGo and return metadata.
|
||||
|
||||
Args:
|
||||
query: The query to search for.
|
||||
num_results: The number of results to return.
|
||||
max_results: The number of results to return.
|
||||
source: The source to look from.
|
||||
|
||||
Returns:
|
||||
A list of dictionaries with the following keys:
|
||||
@@ -76,38 +104,27 @@ class DuckDuckGoSearchAPIWrapper(BaseModel):
|
||||
title - The title of the result.
|
||||
link - The link to the result.
|
||||
"""
|
||||
from duckduckgo_search import DDGS
|
||||
|
||||
with DDGS() as ddgs:
|
||||
results = ddgs.text(
|
||||
query,
|
||||
region=self.region,
|
||||
safesearch=self.safesearch,
|
||||
timelimit=self.time,
|
||||
backend=backend,
|
||||
)
|
||||
if results is None:
|
||||
return [{"Result": "No good DuckDuckGo Search Result was found"}]
|
||||
|
||||
def to_metadata(result: Dict) -> Dict[str, str]:
|
||||
if backend == "news":
|
||||
return {
|
||||
"date": result["date"],
|
||||
"title": result["title"],
|
||||
"snippet": result["body"],
|
||||
"source": result["source"],
|
||||
"link": result["url"],
|
||||
}
|
||||
return {
|
||||
"snippet": result["body"],
|
||||
"title": result["title"],
|
||||
"link": result["href"],
|
||||
source = source or self.source
|
||||
if source == "text":
|
||||
results = [
|
||||
{"snippet": r["body"], "title": r["title"], "link": r["href"]}
|
||||
for r in self._ddgs_text(query, max_results=max_results)
|
||||
]
|
||||
elif source == "news":
|
||||
results = [
|
||||
{
|
||||
"snippet": r["body"],
|
||||
"title": r["title"],
|
||||
"link": r["url"],
|
||||
"date": r["date"],
|
||||
"source": r["source"],
|
||||
}
|
||||
for r in self._ddgs_news(query, max_results=max_results)
|
||||
]
|
||||
else:
|
||||
results = []
|
||||
|
||||
formatted_results = []
|
||||
for i, res in enumerate(results, 1):
|
||||
if res is not None:
|
||||
formatted_results.append(to_metadata(res))
|
||||
if len(formatted_results) == num_results:
|
||||
break
|
||||
return formatted_results
|
||||
if results is None:
|
||||
results = [{"Result": "No good DuckDuckGo Search Result was found"}]
|
||||
|
||||
return results
|
||||
|
@@ -1,11 +1,11 @@
|
||||
import pytest
|
||||
|
||||
from langchain.tools.ddg_search.tool import DuckDuckGoSearchRun
|
||||
from langchain.tools.ddg_search.tool import DuckDuckGoSearchResults, DuckDuckGoSearchRun
|
||||
|
||||
|
||||
def ddg_installed() -> bool:
|
||||
try:
|
||||
from duckduckgo_search import ddg # noqa: F401
|
||||
from duckduckgo_search import DDGS # noqa: F401
|
||||
|
||||
return True
|
||||
except Exception as e:
|
||||
@@ -20,3 +20,12 @@ def test_ddg_search_tool() -> None:
|
||||
result = tool(keywords)
|
||||
print(result)
|
||||
assert len(result.split()) > 20
|
||||
|
||||
|
||||
@pytest.mark.skipif(not ddg_installed(), reason="requires duckduckgo-search package")
|
||||
def test_ddg_search_news_tool() -> None:
|
||||
keywords = "Tesla"
|
||||
tool = DuckDuckGoSearchResults(source="news")
|
||||
result = tool(keywords)
|
||||
print(result)
|
||||
assert len(result.split()) > 20
|
||||
|
Reference in New Issue
Block a user