From bfb7f8d40a3e5efcb62a85ff46383fd168abf3bf Mon Sep 17 00:00:00 2001 From: David Robertson Date: Tue, 30 Jul 2024 11:29:38 -0400 Subject: [PATCH] Brave Search: Enhance search result details with extra snippets (#19209) **Description:** This update significantly improves the Brave Search Tool's utility within the LangChain library by enriching the search results it returns. The tool previously returned title, link, and snippet, with the snippet being a truncated 140-character description from the search engine. To make the search results more informative, this update enables extra_snippets by default and introduces additional result fields: title, link, description (enhancing and renaming the former snippet field), age, and snippets. The snippets field provides a list of strings summarizing the webpage, utilizing Brave's capability for more detailed search insights. This enhancement aims to make the search tool far more informative and beneficial for users. **Issue:** N/A **Dependencies:** No additional dependencies introduced. **Twitter handle:** @davidalexr987 **Code Changes Summary:** - Changed the default setting to include extra_snippets in search results. - Renamed the snippet field to description to accurately reflect its content and included an age field for search results. - Introduced a snippets field that lists webpage summaries, providing users with comprehensive search result insights. **Backward Compatibility Note:** The renaming of snippet to description improves the accuracy of the returned data field but may impact existing users who have developed integration's or analyses based on the snippet field. I believe this change is essential for clarity and utility, and it aligns better with the data provided by Brave Search. **Additional Notes:** This proposal focuses exclusively on the Brave Search package, without affecting other LangChain packages or introducing new dependencies. --- .../langchain_community/utilities/brave_search.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/libs/community/langchain_community/utilities/brave_search.py b/libs/community/langchain_community/utilities/brave_search.py index fd282fc3465..cb822e55f43 100644 --- a/libs/community/langchain_community/utilities/brave_search.py +++ b/libs/community/langchain_community/utilities/brave_search.py @@ -30,7 +30,11 @@ class BraveSearchWrapper(BaseModel): { "title": item.get("title"), "link": item.get("url"), - "snippet": item.get("description"), + "snippet": " ".join( + filter( + None, [item.get("description"), *item.get("extra_snippets", [])] + ) + ), } for item in web_search_results ] @@ -48,7 +52,11 @@ class BraveSearchWrapper(BaseModel): results = self._search_request(query) return [ Document( - page_content=item.get("description"), # type: ignore[arg-type] + page_content=" ".join( + filter( + None, [item.get("description"), *item.get("extra_snippets", [])] + ) + ), metadata={"title": item.get("title"), "link": item.get("url")}, ) for item in results @@ -60,7 +68,7 @@ class BraveSearchWrapper(BaseModel): "Accept": "application/json", } req = requests.PreparedRequest() - params = {**self.search_kwargs, **{"q": query}} + params = {**self.search_kwargs, **{"q": query, "extra_snippets": True}} req.prepare_url(self.base_url, params) if req.url is None: raise ValueError("prepared url is None, this should not happen")