mirror of
https://github.com/hwchase17/langchain.git
synced 2025-05-09 01:00:01 +00:00
**Description:** This update significantly improves the Brave Search Tool's utility within the LangChain library by enriching the search results it returns. The tool previously returned title, link, and snippet, with the snippet being a truncated 140-character description from the search engine. To make the search results more informative, this update enables extra_snippets by default and introduces additional result fields: title, link, description (enhancing and renaming the former snippet field), age, and snippets. The snippets field provides a list of strings summarizing the webpage, utilizing Brave's capability for more detailed search insights. This enhancement aims to make the search tool far more informative and beneficial for users. **Issue:** N/A **Dependencies:** No additional dependencies introduced. **Twitter handle:** @davidalexr987 **Code Changes Summary:** - Changed the default setting to include extra_snippets in search results. - Renamed the snippet field to description to accurately reflect its content and included an age field for search results. - Introduced a snippets field that lists webpage summaries, providing users with comprehensive search result insights. **Backward Compatibility Note:** The renaming of snippet to description improves the accuracy of the returned data field but may impact existing users who have developed integration's or analyses based on the snippet field. I believe this change is essential for clarity and utility, and it aligns better with the data provided by Brave Search. **Additional Notes:** This proposal focuses exclusively on the Brave Search package, without affecting other LangChain packages or introducing new dependencies.
81 lines
2.6 KiB
Python
81 lines
2.6 KiB
Python
import json
|
|
from typing import List
|
|
|
|
import requests
|
|
from langchain_core.documents import Document
|
|
from langchain_core.pydantic_v1 import BaseModel, Field
|
|
|
|
|
|
class BraveSearchWrapper(BaseModel):
|
|
"""Wrapper around the Brave search engine."""
|
|
|
|
api_key: str
|
|
"""The API key to use for the Brave search engine."""
|
|
search_kwargs: dict = Field(default_factory=dict)
|
|
"""Additional keyword arguments to pass to the search request."""
|
|
base_url: str = "https://api.search.brave.com/res/v1/web/search"
|
|
"""The base URL for the Brave search engine."""
|
|
|
|
def run(self, query: str) -> str:
|
|
"""Query the Brave search engine and return the results as a JSON string.
|
|
|
|
Args:
|
|
query: The query to search for.
|
|
|
|
Returns: The results as a JSON string.
|
|
|
|
"""
|
|
web_search_results = self._search_request(query=query)
|
|
final_results = [
|
|
{
|
|
"title": item.get("title"),
|
|
"link": item.get("url"),
|
|
"snippet": " ".join(
|
|
filter(
|
|
None, [item.get("description"), *item.get("extra_snippets", [])]
|
|
)
|
|
),
|
|
}
|
|
for item in web_search_results
|
|
]
|
|
return json.dumps(final_results)
|
|
|
|
def download_documents(self, query: str) -> List[Document]:
|
|
"""Query the Brave search engine and return the results as a list of Documents.
|
|
|
|
Args:
|
|
query: The query to search for.
|
|
|
|
Returns: The results as a list of Documents.
|
|
|
|
"""
|
|
results = self._search_request(query)
|
|
return [
|
|
Document(
|
|
page_content=" ".join(
|
|
filter(
|
|
None, [item.get("description"), *item.get("extra_snippets", [])]
|
|
)
|
|
),
|
|
metadata={"title": item.get("title"), "link": item.get("url")},
|
|
)
|
|
for item in results
|
|
]
|
|
|
|
def _search_request(self, query: str) -> List[dict]:
|
|
headers = {
|
|
"X-Subscription-Token": self.api_key,
|
|
"Accept": "application/json",
|
|
}
|
|
req = requests.PreparedRequest()
|
|
params = {**self.search_kwargs, **{"q": query, "extra_snippets": True}}
|
|
req.prepare_url(self.base_url, params)
|
|
if req.url is None:
|
|
raise ValueError("prepared url is None, this should not happen")
|
|
|
|
response = requests.get(req.url, headers=headers)
|
|
if not response.ok:
|
|
raise Exception(f"HTTP error {response.status_code}")
|
|
|
|
return response.json().get("web", {}).get("results", [])
|