Harrison/ddg (#3206)

Co-authored-by: itai <itai.marks@gmail.com>
Co-authored-by: Itai Marks <itaim@users.noreply.github.com>
Co-authored-by: Tianyi Pan <60060750+tipani86@users.noreply.github.com>
Co-authored-by: Tianyi Pan <tianyi.pan@clobotics.com>
Co-authored-by: Adilzhan Ismailov <13088690+aismlv@users.noreply.github.com>
Co-authored-by: Justin Flick <Justinjayflick@gmail.com>
Co-authored-by: Justin Flick <jflick@homesite.com>
This commit is contained in:
Harrison Chase 2023-04-19 21:32:26 -07:00 committed by GitHub
parent 36c10f8a52
commit d2520a5f1e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 261 additions and 4 deletions

View File

@ -0,0 +1,91 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "245a954a",
"metadata": {},
"source": [
"# DuckDuckGo Search\n",
"\n",
"This notebook goes over how to use the duck-duck-go search component."
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "21e46d4d",
"metadata": {},
"outputs": [],
"source": [
"# !pip install duckduckgo-search"
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "ac4910f8",
"metadata": {},
"outputs": [],
"source": [
"from langchain.tools import DuckDuckGoSearchTool"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "84b8f773",
"metadata": {},
"outputs": [],
"source": [
"search = DuckDuckGoSearchTool()"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "068991a6",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'Barack Obama, in full Barack Hussein Obama II, (born August 4, 1961, Honolulu, Hawaii, U.S.), 44th president of the United States (2009-17) and the first African American to hold the office. Before winning the presidency, Obama represented Illinois in the U.S. Senate (2005-08). Barack Hussein Obama II (/ b ə ˈ r ɑː k h uː ˈ s eɪ n oʊ ˈ b ɑː m ə / bə-RAHK hoo-SAYN oh-BAH-mə; born August 4, 1961) is an American former politician who served as the 44th president of the United States from 2009 to 2017. A member of the Democratic Party, he was the first African-American president of the United States. Obama previously served as a U.S. senator representing ... Barack Obama was the first African American president of the United States (2009-17). He oversaw the recovery of the U.S. economy (from the Great Recession of 2008-09) and the enactment of landmark health care reform (the Patient Protection and Affordable Care Act ). In 2009 he was awarded the Nobel Peace Prize. His birth certificate lists his first name as Barack: That\\'s how Obama has spelled his name throughout his life. His name derives from a Hebrew name which means \"lightning.\". The Hebrew word has been transliterated into English in various spellings, including Barak, Buraq, Burack, and Barack. Most common names of U.S. presidents 1789-2021. Published by. Aaron O\\'Neill , Jun 21, 2022. The most common first name for a U.S. president is James, followed by John and then William. Six U.S ...'"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"search.run(\"Obama's first name?\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.1"
},
"vscode": {
"interpreter": {
"hash": "a0a0263b650d907a3bfe41c0f8d6a63a071b884df3cfdc1579f00cdc1aed6b03"
}
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@ -1,9 +1,17 @@
"""Core toolkit implementations."""
from langchain.tools.base import BaseTool
from langchain.tools.ddg_search.tool import DuckDuckGoSearchTool
from langchain.tools.ifttt import IFTTTWebhook
from langchain.tools.openapi.utils.api_models import APIOperation
from langchain.tools.openapi.utils.openapi_utils import OpenAPISpec
from langchain.tools.plugin import AIPluginTool
__all__ = ["BaseTool", "IFTTTWebhook", "AIPluginTool", "OpenAPISpec", "APIOperation"]
__all__ = [
"BaseTool",
"IFTTTWebhook",
"AIPluginTool",
"OpenAPISpec",
"APIOperation",
"DuckDuckGoSearchTool",
]

View File

@ -0,0 +1 @@
"""DuckDuckGo Search API toolkit."""

View File

@ -0,0 +1,28 @@
"""Tool for the DuckDuckGo search API."""
from pydantic import Field
from langchain.tools.base import BaseTool
from langchain.utilities.duckduckgo_search import DuckDuckGoSearchAPIWrapper
class DuckDuckGoSearchTool(BaseTool):
"""Tool that adds the capability to query the DuckDuckGo search API."""
name = "DuckDuckGo Search"
description = (
"A wrapper around DuckDuckGo Search. "
"Useful for when you need to answer questions about current events. "
"Input should be a search query."
)
api_wrapper: DuckDuckGoSearchAPIWrapper = Field(
default_factory=DuckDuckGoSearchAPIWrapper
)
def _run(self, query: str) -> str:
"""Use the tool."""
return self.api_wrapper.run(query)
async def _arun(self, query: str) -> str:
"""Use the tool asynchronously."""
raise NotImplementedError("DuckDuckGoSearch does not support async")

View File

@ -0,0 +1,90 @@
"""Util that calls DuckDuckGo Search.
No setup required. Free.
https://pypi.org/project/duckduckgo-search/
"""
from typing import Dict, List, Optional
from pydantic import BaseModel, Extra
from pydantic.class_validators import root_validator
class DuckDuckGoSearchAPIWrapper(BaseModel):
"""Wrapper for DuckDuckGo Search API.
Free and does not require any setup
"""
k: int = 10
region: Optional[str] = "wt-wt"
safesearch: str = "moderate"
time: Optional[str] = "y"
max_results: int = 5
class Config:
"""Configuration for this pydantic object."""
extra = Extra.forbid
@root_validator()
def validate_environment(cls, values: Dict) -> Dict:
"""Validate that python package exists in environment."""
try:
from duckduckgo_search import ddg # noqa: F401
except ImportError:
raise ValueError(
"Could not import duckduckgo-search python package. "
"Please install it with `pip install duckduckgo-search`."
)
return values
def run(self, query: str) -> str:
from duckduckgo_search import ddg
"""Run query through DuckDuckGo and return results."""
results = ddg(
query,
region=self.region,
safesearch=self.safesearch,
time=self.time,
max_results=self.max_results,
)
if len(results) == 0:
return "No good DuckDuckGo Search Result was found"
snippets = [result["body"] for result in results]
return " ".join(snippets)
def results(self, query: str, num_results: int) -> List[Dict]:
"""Run query through DuckDuckGo and return metadata.
Args:
query: The query to search for.
num_results: The number of results to return.
Returns:
A list of dictionaries with the following keys:
snippet - The description of the result.
title - The title of the result.
link - The link to the result.
"""
from duckduckgo_search import ddg
results = ddg(
query,
region=self.region,
safesearch=self.safesearch,
time=self.time,
max_results=num_results,
)
if len(results) == 0:
return [{"Result": "No good DuckDuckGo Search Result was found"}]
def to_metadata(result: Dict) -> Dict:
return {
"snippet": result["body"],
"title": result["title"],
"link": result["href"],
}
return [to_metadata(result) for result in results]

20
poetry.lock generated
View File

@ -1612,6 +1612,22 @@ duckdb = ">=0.4.0"
numpy = "*"
sqlalchemy = ">=1.3.19"
[[package]]
name = "duckduckgo-search"
version = "2.8.6"
description = "Search for words, documents, images, news, maps and text translation using the DuckDuckGo.com search engine."
category = "main"
optional = true
python-versions = ">=3.7"
files = [
{file = "duckduckgo_search-2.8.6-py3-none-any.whl", hash = "sha256:c9312ad278d03d059ba7ced978dd1bc7806bb735aa239948322936d0570d8d7f"},
{file = "duckduckgo_search-2.8.6.tar.gz", hash = "sha256:ffd620febb8c471bdb4aed520b26e645cd05ae79acdd78db6c0c927cb7b0237c"},
]
[package.dependencies]
click = ">=8.1.3"
requests = ">=2.28.2"
[[package]]
name = "ecdsa"
version = "0.18.0"
@ -9151,7 +9167,7 @@ cffi = {version = ">=1.11", markers = "platform_python_implementation == \"PyPy\
cffi = ["cffi (>=1.11)"]
[extras]
all = ["anthropic", "cohere", "openai", "nlpcloud", "huggingface_hub", "jina", "manifest-ml", "elasticsearch", "opensearch-py", "google-search-results", "faiss-cpu", "sentence-transformers", "transformers", "spacy", "nltk", "wikipedia", "beautifulsoup4", "tiktoken", "torch", "jinja2", "pinecone-client", "pinecone-text", "weaviate-client", "redis", "google-api-python-client", "wolframalpha", "qdrant-client", "tensorflow-text", "pypdf", "networkx", "nomic", "aleph-alpha-client", "deeplake", "pgvector", "psycopg2-binary", "pyowm", "pytesseract", "html2text", "atlassian-python-api", "gptcache"]
all = ["anthropic", "cohere", "openai", "nlpcloud", "huggingface_hub", "jina", "manifest-ml", "elasticsearch", "opensearch-py", "google-search-results", "faiss-cpu", "sentence-transformers", "transformers", "spacy", "nltk", "wikipedia", "beautifulsoup4", "tiktoken", "torch", "jinja2", "pinecone-client", "pinecone-text", "weaviate-client", "redis", "google-api-python-client", "wolframalpha", "qdrant-client", "tensorflow-text", "pypdf", "networkx", "nomic", "aleph-alpha-client", "deeplake", "pgvector", "psycopg2-binary", "pyowm", "pytesseract", "html2text", "atlassian-python-api", "gptcache", "duckduckgo-search"]
cohere = ["cohere"]
llms = ["anthropic", "cohere", "openai", "nlpcloud", "huggingface_hub", "manifest-ml", "torch", "transformers"]
openai = ["openai"]
@ -9160,4 +9176,4 @@ qdrant = ["qdrant-client"]
[metadata]
lock-version = "2.0"
python-versions = ">=3.8.1,<4.0"
content-hash = "568b190c884e62df4e7bd897f402e3b6e61b24134af7f189f3d44b2ba5f00082"
content-hash = "19a145090188b0b446c68ca33599f4d4943bf9fb1312bcfa98a23268101e1323"

View File

@ -66,6 +66,7 @@ atlassian-python-api = {version = "^3.36.0", optional=true}
pytesseract = {version = "^0.3.10", optional=true}
html2text = {version="^2020.1.16", optional=true}
numexpr = "^2.8.4"
duckduckgo-search = {version="^2.8.6", optional=true}
[tool.poetry.group.docs.dependencies]
autodoc_pydantic = "^1.8.0"
@ -139,7 +140,7 @@ llms = ["anthropic", "cohere", "openai", "nlpcloud", "huggingface_hub", "manifes
qdrant = ["qdrant-client"]
openai = ["openai"]
cohere = ["cohere"]
all = ["anthropic", "cohere", "openai", "nlpcloud", "huggingface_hub", "jina", "manifest-ml", "elasticsearch", "opensearch-py", "google-search-results", "faiss-cpu", "sentence_transformers", "transformers", "spacy", "nltk", "wikipedia", "beautifulsoup4", "tiktoken", "torch", "jinja2", "pinecone-client", "pinecone-text", "weaviate-client", "redis", "google-api-python-client", "wolframalpha", "qdrant-client", "tensorflow-text", "pypdf", "networkx", "nomic", "aleph-alpha-client", "deeplake", "pgvector", "psycopg2-binary", "boto3", "pyowm", "pytesseract", "html2text", "atlassian-python-api", "gptcache"]
all = ["anthropic", "cohere", "openai", "nlpcloud", "huggingface_hub", "jina", "manifest-ml", "elasticsearch", "opensearch-py", "google-search-results", "faiss-cpu", "sentence_transformers", "transformers", "spacy", "nltk", "wikipedia", "beautifulsoup4", "tiktoken", "torch", "jinja2", "pinecone-client", "pinecone-text", "weaviate-client", "redis", "google-api-python-client", "wolframalpha", "qdrant-client", "tensorflow-text", "pypdf", "networkx", "nomic", "aleph-alpha-client", "deeplake", "pgvector", "psycopg2-binary", "boto3", "pyowm", "pytesseract", "html2text", "atlassian-python-api", "gptcache", "duckduckgo-search"]
[tool.ruff]
select = [

View File

@ -0,0 +1,22 @@
import pytest
from langchain.tools.ddg_search.tool import DuckDuckGoSearchTool
def ddg_installed() -> bool:
try:
from duckduckgo_search import ddg # noqa: F401
return True
except Exception as e:
print(f"duckduckgo not installed, skipping test {e}")
return False
@pytest.mark.skipif(not ddg_installed(), reason="requires duckduckgo-search package")
def test_ddg_search_tool() -> None:
keywords = "Bella Ciao"
tool = DuckDuckGoSearchTool()
result = tool(keywords)
print(result)
assert len(result.split()) > 20