Bugfix duckduckgo_search news search (#13670)

- **Description:** 
Bugfix duckduckgo_search news search
  - **Issue:** 
https://github.com/langchain-ai/langchain/issues/13648
  - **Dependencies:** 
None
  - **Tag maintainer:** 
@baskaryan

---------

Co-authored-by: Harrison Chase <hw.chase.17@gmail.com>
This commit is contained in:
deedy5
2023-12-05 00:48:20 +00:00
committed by GitHub
parent 676a077c4e
commit ee9abb6722
4 changed files with 111 additions and 77 deletions

View File

@@ -46,11 +46,11 @@ class DuckDuckGoSearchResults(BaseTool):
"Useful for when you need to answer questions about current events. "
"Input should be a search query. Output is a JSON array of the query results"
)
num_results: int = 4
max_results: int = Field(alias="num_results", default=4)
api_wrapper: DuckDuckGoSearchAPIWrapper = Field(
default_factory=DuckDuckGoSearchAPIWrapper
)
backend: str = "api"
backend: str = "text"
args_schema: Type[BaseModel] = DDGInput
def _run(
@@ -59,7 +59,7 @@ class DuckDuckGoSearchResults(BaseTool):
run_manager: Optional[CallbackManagerForToolRun] = None,
) -> str:
"""Use the tool."""
res = self.api_wrapper.results(query, self.num_results, backend=self.backend)
res = self.api_wrapper.results(query, self.max_results, source=self.backend)
res_strs = [", ".join([f"{k}: {v}" for k, v in d.items()]) for d in res]
return ", ".join([f"[{rs}]" for rs in res_strs])

View File

@@ -18,6 +18,8 @@ class DuckDuckGoSearchAPIWrapper(BaseModel):
safesearch: str = "moderate"
time: Optional[str] = "y"
max_results: int = 5
backend: str = "api" # which backend to use in DDGS.text() (api, html, lite)
source: str = "text" # which function to use in DDGS (DDGS.text() or DDGS.news())
class Config:
"""Configuration for this pydantic object."""
@@ -32,43 +34,69 @@ class DuckDuckGoSearchAPIWrapper(BaseModel):
except ImportError:
raise ImportError(
"Could not import duckduckgo-search python package. "
"Please install it with `pip install duckduckgo-search`."
"Please install it with `pip install -U duckduckgo-search`."
)
return values
def get_snippets(self, query: str) -> List[str]:
"""Run query through DuckDuckGo and return concatenated results."""
def _ddgs_text(
self, query: str, max_results: Optional[int] = None
) -> List[Dict[str, str]]:
"""Run query through DuckDuckGo text search and return results."""
from duckduckgo_search import DDGS
with DDGS() as ddgs:
results = ddgs.text(
ddgs_gen = ddgs.text(
query,
region=self.region,
safesearch=self.safesearch,
timelimit=self.time,
max_results=max_results or self.max_results,
backend=self.backend,
)
if results is None:
return ["No good DuckDuckGo Search Result was found"]
snippets = []
for i, res in enumerate(results, 1):
if res is not None:
snippets.append(res["body"])
if len(snippets) == self.max_results:
break
return snippets
if ddgs_gen:
return [r for r in ddgs_gen]
return []
def _ddgs_news(
self, query: str, max_results: Optional[int] = None
) -> List[Dict[str, str]]:
"""Run query through DuckDuckGo news search and return results."""
from duckduckgo_search import DDGS
with DDGS() as ddgs:
ddgs_gen = ddgs.news(
query,
region=self.region,
safesearch=self.safesearch,
timelimit=self.time,
max_results=max_results or self.max_results,
)
if ddgs_gen:
return [r for r in ddgs_gen]
return []
def run(self, query: str) -> str:
snippets = self.get_snippets(query)
return " ".join(snippets)
"""Run query through DuckDuckGo and return concatenated results."""
if self.source == "text":
results = self._ddgs_text(query)
elif self.source == "news":
results = self._ddgs_news(query)
else:
results = []
if not results:
return "No good DuckDuckGo Search Result was found"
return " ".join(r["body"] for r in results)
def results(
self, query: str, num_results: int, backend: str = "api"
self, query: str, max_results: int, source: Optional[str] = None
) -> List[Dict[str, str]]:
"""Run query through DuckDuckGo and return metadata.
Args:
query: The query to search for.
num_results: The number of results to return.
max_results: The number of results to return.
source: The source to look from.
Returns:
A list of dictionaries with the following keys:
@@ -76,38 +104,27 @@ class DuckDuckGoSearchAPIWrapper(BaseModel):
title - The title of the result.
link - The link to the result.
"""
from duckduckgo_search import DDGS
with DDGS() as ddgs:
results = ddgs.text(
query,
region=self.region,
safesearch=self.safesearch,
timelimit=self.time,
backend=backend,
)
if results is None:
return [{"Result": "No good DuckDuckGo Search Result was found"}]
def to_metadata(result: Dict) -> Dict[str, str]:
if backend == "news":
return {
"date": result["date"],
"title": result["title"],
"snippet": result["body"],
"source": result["source"],
"link": result["url"],
}
return {
"snippet": result["body"],
"title": result["title"],
"link": result["href"],
source = source or self.source
if source == "text":
results = [
{"snippet": r["body"], "title": r["title"], "link": r["href"]}
for r in self._ddgs_text(query, max_results=max_results)
]
elif source == "news":
results = [
{
"snippet": r["body"],
"title": r["title"],
"link": r["url"],
"date": r["date"],
"source": r["source"],
}
for r in self._ddgs_news(query, max_results=max_results)
]
else:
results = []
formatted_results = []
for i, res in enumerate(results, 1):
if res is not None:
formatted_results.append(to_metadata(res))
if len(formatted_results) == num_results:
break
return formatted_results
if results is None:
results = [{"Result": "No good DuckDuckGo Search Result was found"}]
return results

View File

@@ -1,11 +1,11 @@
import pytest
from langchain.tools.ddg_search.tool import DuckDuckGoSearchRun
from langchain.tools.ddg_search.tool import DuckDuckGoSearchResults, DuckDuckGoSearchRun
def ddg_installed() -> bool:
try:
from duckduckgo_search import ddg # noqa: F401
from duckduckgo_search import DDGS # noqa: F401
return True
except Exception as e:
@@ -20,3 +20,12 @@ def test_ddg_search_tool() -> None:
result = tool(keywords)
print(result)
assert len(result.split()) > 20
@pytest.mark.skipif(not ddg_installed(), reason="requires duckduckgo-search package")
def test_ddg_search_news_tool() -> None:
keywords = "Tesla"
tool = DuckDuckGoSearchResults(source="news")
result = tool(keywords)
print(result)
assert len(result.split()) > 20