diff --git a/libs/partners/exa/README.md b/libs/partners/exa/README.md index ab06cd4f1b0..85dee325b75 100644 --- a/libs/partners/exa/README.md +++ b/libs/partners/exa/README.md @@ -27,6 +27,30 @@ results = exa.invoke("What is the capital of France?") print(results) ``` +### Advanced Features + +You can use advanced features like text limits, summaries, and live crawling: + +```python +from langchain_exa import ExaSearchRetriever, TextContentsOptions + +# Create a new instance with advanced options +exa = ExaSearchRetriever( + exa_api_key="YOUR API KEY", + k=20, # Number of results (1-100) + type="auto", # Can be "neural", "keyword", or "auto" + livecrawl="always", # Can be "always", "fallback", or "never" + summary=True, # Get an AI-generated summary of each result + text_contents_options={"max_characters": 3000} # Limit text length +) + +# Search for a query with custom summary prompt +exa_with_custom_summary = ExaSearchRetriever( + exa_api_key="YOUR API KEY", + summary={"query": "generate one line summary in simple words."} # Custom summary prompt +) +``` + ## Exa Search Results You can run the ExaSearchResults module as follows @@ -48,6 +72,33 @@ search_results = search_tool._run( print("Search Results:", search_results) ``` +### Advanced Features + +You can use advanced features like text limits, summaries, and live crawling: + +```python +from langchain_exa import ExaSearchResults + +# Initialize the ExaSearchResults tool +search_tool = ExaSearchResults(exa_api_key="YOUR API KEY") + +# Perform a search query with advanced options +search_results = search_tool._run( + query="Latest AI research papers", + num_results=10, # Number of results (1-100) + type="auto", # Can be "neural", "keyword", or "auto" + livecrawl="always", # Can be "always", "fallback", or "never" + summary=True, # Get an AI-generated summary of each result + text_contents_options={"max_characters": 2000} # Limit text length +) + +# With custom summary prompt +search_results_with_custom_summary = search_tool._run( + query="Latest AI research papers", + summary={"query": "generate one liner"} # Custom summary prompt +) +``` + ## Exa Find Similar Results You can run the ExaFindSimilarResults module as follows @@ -67,4 +118,22 @@ similar_results = find_similar_tool._run( ) print("Similar Results:", similar_results) +``` + +### Advanced Features + +```python +from langchain_exa import ExaFindSimilarResults + +# Initialize the ExaFindSimilarResults tool +find_similar_tool = ExaFindSimilarResults(exa_api_key="YOUR API KEY") + +# Find similar results with advanced options +similar_results = find_similar_tool._run( + url="http://espn.com", + num_results=10, # Number of results (1-100) + livecrawl="fallback", # Can be "always", "fallback", or "never" + summary=True, # Get an AI-generated summary of each result + text_contents_options={"max_characters": 1500} # Limit text length +) ``` \ No newline at end of file diff --git a/libs/partners/exa/langchain_exa/retrievers.py b/libs/partners/exa/langchain_exa/retrievers.py index 1585e6a5796..08b9b450fc8 100644 --- a/libs/partners/exa/langchain_exa/retrievers.py +++ b/libs/partners/exa/langchain_exa/retrievers.py @@ -27,6 +27,8 @@ def _get_metadata(result: Any) -> dict[str, Any]: metadata["highlights"] = result.highlights if getattr(result, "highlight_scores"): metadata["highlight_scores"] = result.highlight_scores + if getattr(result, "summary"): + metadata["summary"] = result.summary return metadata @@ -34,7 +36,7 @@ class ExaSearchRetriever(BaseRetriever): """Exa Search retriever.""" k: int = 10 # num_results - """The number of search results to return.""" + """The number of search results to return (1 to 100).""" include_domains: Optional[list[str]] = None """A list of domains to include in the search.""" exclude_domains: Optional[list[str]] = None @@ -50,11 +52,20 @@ class ExaSearchRetriever(BaseRetriever): use_autoprompt: Optional[bool] = None """Whether to use autoprompt for the search.""" type: str = "neural" - """The type of search, 'keyword' or 'neural'. Default: neural""" + """The type of search, 'keyword', 'neural', or 'auto'. Default: neural""" highlights: Optional[Union[HighlightsContentsOptions, bool]] = None """Whether to set the page content to the highlights of the results.""" - text_contents_options: Union[TextContentsOptions, Literal[True]] = True - """How to set the page content of the results""" + text_contents_options: Union[TextContentsOptions, dict[str, Any], Literal[True]] = ( + True + ) + """How to set the page content of the results. Can be True or a dict with options + like max_characters.""" + livecrawl: Optional[Literal["always", "fallback", "never"]] = None + """Option to crawl live webpages if content is not in the index. Options: "always", + "fallback", "never".""" + summary: Optional[Union[bool, dict[str, str]]] = None + """Whether to include a summary of the content. Can be a boolean or a dict with a + custom query.""" client: Exa = Field(default=None) exa_api_key: SecretStr = Field(default=None) @@ -82,6 +93,9 @@ class ExaSearchRetriever(BaseRetriever): start_published_date=self.start_published_date, end_published_date=self.end_published_date, use_autoprompt=self.use_autoprompt, + livecrawl=self.livecrawl, + summary=self.summary, + type=self.type, ) results = response.results diff --git a/libs/partners/exa/langchain_exa/tools.py b/libs/partners/exa/langchain_exa/tools.py index e22e4e709d8..381dade1c53 100644 --- a/libs/partners/exa/langchain_exa/tools.py +++ b/libs/partners/exa/langchain_exa/tools.py @@ -1,6 +1,6 @@ """Tool for the Exa Search API.""" -from typing import Any, Optional, Union +from typing import Any, Literal, Optional, Union from exa_py import Exa # type: ignore[untyped-import] from exa_py.api import ( @@ -74,8 +74,10 @@ class ExaSearchResults(BaseTool): # type: ignore[override] def _run( self, query: str, - num_results: int, - text_contents_options: Optional[Union[TextContentsOptions, bool]] = None, + num_results: int = 10, + text_contents_options: Optional[ + Union[TextContentsOptions, dict[str, Any], bool] + ] = None, highlights: Optional[Union[HighlightsContentsOptions, bool]] = None, include_domains: Optional[list[str]] = None, exclude_domains: Optional[list[str]] = None, @@ -84,9 +86,30 @@ class ExaSearchResults(BaseTool): # type: ignore[override] start_published_date: Optional[str] = None, end_published_date: Optional[str] = None, use_autoprompt: Optional[bool] = None, + livecrawl: Optional[Literal["always", "fallback", "never"]] = None, + summary: Optional[Union[bool, dict[str, str]]] = None, + type: Optional[Literal["neural", "keyword", "auto"]] = None, run_manager: Optional[CallbackManagerForToolRun] = None, ) -> Union[list[dict], str]: - """Use the tool.""" + """Use the tool. + + Args: + query: The search query. + num_results: The number of search results to return (1 to 100). Default: 10 + text_contents_options: How to set the page content of the results. Can be True or a dict with options like max_characters. + highlights: Whether to include highlights in the results. + include_domains: A list of domains to include in the search. + exclude_domains: A list of domains to exclude from the search. + start_crawl_date: The start date for the crawl (in YYYY-MM-DD format). + end_crawl_date: The end date for the crawl (in YYYY-MM-DD format). + start_published_date: The start date for when the document was published (in YYYY-MM-DD format). + end_published_date: The end date for when the document was published (in YYYY-MM-DD format). + use_autoprompt: Whether to use autoprompt for the search. + livecrawl: Option to crawl live webpages if content is not in the index. Options: "always", "fallback", "never" + summary: Whether to include a summary of the content. Can be a boolean or a dict with a custom query. + type: The type of search, 'keyword', 'neural', or 'auto'. + run_manager: The run manager for callbacks. + """ # noqa: E501 try: return self.client.search_and_contents( query, @@ -100,6 +123,9 @@ class ExaSearchResults(BaseTool): # type: ignore[override] start_published_date=start_published_date, end_published_date=end_published_date, use_autoprompt=use_autoprompt, + livecrawl=livecrawl, + summary=summary, + type=type, ) # type: ignore except Exception as e: return repr(e) @@ -128,8 +154,10 @@ class ExaFindSimilarResults(BaseTool): # type: ignore[override] def _run( self, url: str, - num_results: int, - text_contents_options: Optional[Union[TextContentsOptions, bool]] = None, + num_results: int = 10, + text_contents_options: Optional[ + Union[TextContentsOptions, dict[str, Any], bool] + ] = None, highlights: Optional[Union[HighlightsContentsOptions, bool]] = None, include_domains: Optional[list[str]] = None, exclude_domains: Optional[list[str]] = None, @@ -139,9 +167,29 @@ class ExaFindSimilarResults(BaseTool): # type: ignore[override] end_published_date: Optional[str] = None, exclude_source_domain: Optional[bool] = None, category: Optional[str] = None, + livecrawl: Optional[Literal["always", "fallback", "never"]] = None, + summary: Optional[Union[bool, dict[str, str]]] = None, run_manager: Optional[CallbackManagerForToolRun] = None, ) -> Union[list[dict], str]: - """Use the tool.""" + """Use the tool. + + Args: + url: The URL to find similar pages for. + num_results: The number of search results to return (1 to 100). Default: 10 + text_contents_options: How to set the page content of the results. Can be True or a dict with options like max_characters. + highlights: Whether to include highlights in the results. + include_domains: A list of domains to include in the search. + exclude_domains: A list of domains to exclude from the search. + start_crawl_date: The start date for the crawl (in YYYY-MM-DD format). + end_crawl_date: The end date for the crawl (in YYYY-MM-DD format). + start_published_date: The start date for when the document was published (in YYYY-MM-DD format). + end_published_date: The end date for when the document was published (in YYYY-MM-DD format). + exclude_source_domain: If True, exclude pages from the same domain as the source URL. + category: Filter for similar pages by category. + livecrawl: Option to crawl live webpages if content is not in the index. Options: "always", "fallback", "never" + summary: Whether to include a summary of the content. Can be a boolean or a dict with a custom query. + run_manager: The run manager for callbacks. + """ # noqa: E501 try: return self.client.find_similar_and_contents( url, @@ -156,6 +204,8 @@ class ExaFindSimilarResults(BaseTool): # type: ignore[override] end_published_date=end_published_date, exclude_source_domain=exclude_source_domain, category=category, + livecrawl=livecrawl, + summary=summary, ) # type: ignore except Exception as e: return repr(e) diff --git a/libs/partners/exa/pyproject.toml b/libs/partners/exa/pyproject.toml index 6fa6e07317e..64e73a160eb 100644 --- a/libs/partners/exa/pyproject.toml +++ b/libs/partners/exa/pyproject.toml @@ -6,9 +6,9 @@ build-backend = "pdm.backend" authors = [] license = { text = "MIT" } requires-python = ">=3.9" -dependencies = ["langchain-core<1.0.0,>=0.3.15", "exa-py<2.0.0,>=1.0.8"] +dependencies = ["langchain-core<1.0.0,>=0.3.60", "exa-py<2.0.0,>=1.0.8"] name = "langchain-exa" -version = "0.2.1" +version = "0.3.0" description = "An integration package connecting Exa and LangChain" readme = "README.md" diff --git a/libs/partners/exa/tests/integration_tests/test_retriever.py b/libs/partners/exa/tests/integration_tests/test_retriever.py index c1592b3c8c2..dc49e192dba 100644 --- a/libs/partners/exa/tests/integration_tests/test_retriever.py +++ b/libs/partners/exa/tests/integration_tests/test_retriever.py @@ -26,3 +26,19 @@ def test_exa_retriever_highlights() -> None: assert isinstance(highlight_scores, list) assert isinstance(highlights[0], str) assert isinstance(highlight_scores[0], float) + + +def test_exa_retriever_advanced_features() -> None: + retriever = ExaSearchRetriever( + k=3, text_contents_options={"max_characters": 1000}, summary=True, type="auto" + ) + res = retriever.invoke("best time to visit japan") + print(res) # noqa: T201 + assert len(res) == 3 # requested k=3 + assert isinstance(res, list) + assert isinstance(res[0], Document) + # Verify summary is in metadata + assert "summary" in res[0].metadata + assert isinstance(res[0].metadata["summary"], str) + # Verify text was limited + assert len(res[0].page_content) <= 1000 diff --git a/libs/partners/exa/tests/integration_tests/test_search_tool.py b/libs/partners/exa/tests/integration_tests/test_search_tool.py index 7fe49d0ddf2..46fed61dd5e 100644 --- a/libs/partners/exa/tests/integration_tests/test_search_tool.py +++ b/libs/partners/exa/tests/integration_tests/test_search_tool.py @@ -8,3 +8,23 @@ def test_search_tool() -> None: res = tool.invoke({"query": "best time to visit japan", "num_results": 5}) print(res) # noqa: T201 assert not isinstance(res, str) # str means error for this tool\ + + +def test_search_tool_advanced_features() -> None: + tool = ExaSearchResults() + res = tool.invoke( + { + "query": "best time to visit japan", + "num_results": 3, + "text_contents_options": {"max_characters": 1000}, + "summary": True, + "type": "auto", + } + ) + print(res) # noqa: T201 + assert not isinstance(res, str) # str means error for this tool + assert len(res.results) == 3 + # Verify summary exists + assert hasattr(res.results[0], "summary") + # Verify text was limited + assert len(res.results[0].text) <= 1000 diff --git a/libs/partners/exa/uv.lock b/libs/partners/exa/uv.lock index ec8dc0dc1fc..2378b3e4f10 100644 --- a/libs/partners/exa/uv.lock +++ b/libs/partners/exa/uv.lock @@ -1,5 +1,4 @@ version = 1 -revision = 1 requires-python = ">=3.9" resolution-markers = [ "python_full_version >= '3.13'", @@ -394,7 +393,7 @@ wheels = [ [[package]] name = "langchain-core" -version = "0.3.56" +version = "0.3.60" source = { editable = "../../core" } dependencies = [ { name = "jsonpatch" }, @@ -409,10 +408,9 @@ dependencies = [ [package.metadata] requires-dist = [ { name = "jsonpatch", specifier = ">=1.33,<2.0" }, - { name = "langsmith", specifier = ">=0.1.125,<0.4" }, + { name = "langsmith", specifier = ">=0.1.126,<0.4" }, { name = "packaging", specifier = ">=23.2,<25" }, - { name = "pydantic", marker = "python_full_version < '3.12.4'", specifier = ">=2.5.2,<3.0.0" }, - { name = "pydantic", marker = "python_full_version >= '3.12.4'", specifier = ">=2.7.4,<3.0.0" }, + { name = "pydantic", specifier = ">=2.7.4" }, { name = "pyyaml", specifier = ">=5.3" }, { name = "tenacity", specifier = ">=8.1.0,!=8.4.0,<10.0.0" }, { name = "typing-extensions", specifier = ">=4.7" }, @@ -453,7 +451,7 @@ typing = [ [[package]] name = "langchain-exa" -version = "0.2.1" +version = "0.3.0" source = { editable = "." } dependencies = [ { name = "exa-py" },