mirror of
https://github.com/hwchase17/langchain.git
synced 2025-08-10 05:20:39 +00:00
exa docs and python package update (#31307)
Added support for new Exa API features. Updated Exa docs and python package (langchain-exa). Description Added support for new Exa API features in the langchain-exa package: - Added max_characters option for text content - Added support for summary and custom summary prompts - Added livecrawl option with "always", "fallback", "never" settings - Added "auto" option for search type - Updated documentation and tests Dependencies - No new dependencies required. Using existing features from exa-py. twitter: @theishangoswami --------- Co-authored-by: Chester Curme <chester.curme@gmail.com>
This commit is contained in:
parent
cf1fa27e27
commit
f16456139b
@ -27,6 +27,30 @@ results = exa.invoke("What is the capital of France?")
|
|||||||
print(results)
|
print(results)
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### Advanced Features
|
||||||
|
|
||||||
|
You can use advanced features like text limits, summaries, and live crawling:
|
||||||
|
|
||||||
|
```python
|
||||||
|
from langchain_exa import ExaSearchRetriever, TextContentsOptions
|
||||||
|
|
||||||
|
# Create a new instance with advanced options
|
||||||
|
exa = ExaSearchRetriever(
|
||||||
|
exa_api_key="YOUR API KEY",
|
||||||
|
k=20, # Number of results (1-100)
|
||||||
|
type="auto", # Can be "neural", "keyword", or "auto"
|
||||||
|
livecrawl="always", # Can be "always", "fallback", or "never"
|
||||||
|
summary=True, # Get an AI-generated summary of each result
|
||||||
|
text_contents_options={"max_characters": 3000} # Limit text length
|
||||||
|
)
|
||||||
|
|
||||||
|
# Search for a query with custom summary prompt
|
||||||
|
exa_with_custom_summary = ExaSearchRetriever(
|
||||||
|
exa_api_key="YOUR API KEY",
|
||||||
|
summary={"query": "generate one line summary in simple words."} # Custom summary prompt
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
## Exa Search Results
|
## Exa Search Results
|
||||||
|
|
||||||
You can run the ExaSearchResults module as follows
|
You can run the ExaSearchResults module as follows
|
||||||
@ -48,6 +72,33 @@ search_results = search_tool._run(
|
|||||||
print("Search Results:", search_results)
|
print("Search Results:", search_results)
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### Advanced Features
|
||||||
|
|
||||||
|
You can use advanced features like text limits, summaries, and live crawling:
|
||||||
|
|
||||||
|
```python
|
||||||
|
from langchain_exa import ExaSearchResults
|
||||||
|
|
||||||
|
# Initialize the ExaSearchResults tool
|
||||||
|
search_tool = ExaSearchResults(exa_api_key="YOUR API KEY")
|
||||||
|
|
||||||
|
# Perform a search query with advanced options
|
||||||
|
search_results = search_tool._run(
|
||||||
|
query="Latest AI research papers",
|
||||||
|
num_results=10, # Number of results (1-100)
|
||||||
|
type="auto", # Can be "neural", "keyword", or "auto"
|
||||||
|
livecrawl="always", # Can be "always", "fallback", or "never"
|
||||||
|
summary=True, # Get an AI-generated summary of each result
|
||||||
|
text_contents_options={"max_characters": 2000} # Limit text length
|
||||||
|
)
|
||||||
|
|
||||||
|
# With custom summary prompt
|
||||||
|
search_results_with_custom_summary = search_tool._run(
|
||||||
|
query="Latest AI research papers",
|
||||||
|
summary={"query": "generate one liner"} # Custom summary prompt
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
## Exa Find Similar Results
|
## Exa Find Similar Results
|
||||||
|
|
||||||
You can run the ExaFindSimilarResults module as follows
|
You can run the ExaFindSimilarResults module as follows
|
||||||
@ -68,3 +119,21 @@ similar_results = find_similar_tool._run(
|
|||||||
|
|
||||||
print("Similar Results:", similar_results)
|
print("Similar Results:", similar_results)
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### Advanced Features
|
||||||
|
|
||||||
|
```python
|
||||||
|
from langchain_exa import ExaFindSimilarResults
|
||||||
|
|
||||||
|
# Initialize the ExaFindSimilarResults tool
|
||||||
|
find_similar_tool = ExaFindSimilarResults(exa_api_key="YOUR API KEY")
|
||||||
|
|
||||||
|
# Find similar results with advanced options
|
||||||
|
similar_results = find_similar_tool._run(
|
||||||
|
url="http://espn.com",
|
||||||
|
num_results=10, # Number of results (1-100)
|
||||||
|
livecrawl="fallback", # Can be "always", "fallback", or "never"
|
||||||
|
summary=True, # Get an AI-generated summary of each result
|
||||||
|
text_contents_options={"max_characters": 1500} # Limit text length
|
||||||
|
)
|
||||||
|
```
|
@ -27,6 +27,8 @@ def _get_metadata(result: Any) -> dict[str, Any]:
|
|||||||
metadata["highlights"] = result.highlights
|
metadata["highlights"] = result.highlights
|
||||||
if getattr(result, "highlight_scores"):
|
if getattr(result, "highlight_scores"):
|
||||||
metadata["highlight_scores"] = result.highlight_scores
|
metadata["highlight_scores"] = result.highlight_scores
|
||||||
|
if getattr(result, "summary"):
|
||||||
|
metadata["summary"] = result.summary
|
||||||
return metadata
|
return metadata
|
||||||
|
|
||||||
|
|
||||||
@ -34,7 +36,7 @@ class ExaSearchRetriever(BaseRetriever):
|
|||||||
"""Exa Search retriever."""
|
"""Exa Search retriever."""
|
||||||
|
|
||||||
k: int = 10 # num_results
|
k: int = 10 # num_results
|
||||||
"""The number of search results to return."""
|
"""The number of search results to return (1 to 100)."""
|
||||||
include_domains: Optional[list[str]] = None
|
include_domains: Optional[list[str]] = None
|
||||||
"""A list of domains to include in the search."""
|
"""A list of domains to include in the search."""
|
||||||
exclude_domains: Optional[list[str]] = None
|
exclude_domains: Optional[list[str]] = None
|
||||||
@ -50,11 +52,20 @@ class ExaSearchRetriever(BaseRetriever):
|
|||||||
use_autoprompt: Optional[bool] = None
|
use_autoprompt: Optional[bool] = None
|
||||||
"""Whether to use autoprompt for the search."""
|
"""Whether to use autoprompt for the search."""
|
||||||
type: str = "neural"
|
type: str = "neural"
|
||||||
"""The type of search, 'keyword' or 'neural'. Default: neural"""
|
"""The type of search, 'keyword', 'neural', or 'auto'. Default: neural"""
|
||||||
highlights: Optional[Union[HighlightsContentsOptions, bool]] = None
|
highlights: Optional[Union[HighlightsContentsOptions, bool]] = None
|
||||||
"""Whether to set the page content to the highlights of the results."""
|
"""Whether to set the page content to the highlights of the results."""
|
||||||
text_contents_options: Union[TextContentsOptions, Literal[True]] = True
|
text_contents_options: Union[TextContentsOptions, dict[str, Any], Literal[True]] = (
|
||||||
"""How to set the page content of the results"""
|
True
|
||||||
|
)
|
||||||
|
"""How to set the page content of the results. Can be True or a dict with options
|
||||||
|
like max_characters."""
|
||||||
|
livecrawl: Optional[Literal["always", "fallback", "never"]] = None
|
||||||
|
"""Option to crawl live webpages if content is not in the index. Options: "always",
|
||||||
|
"fallback", "never"."""
|
||||||
|
summary: Optional[Union[bool, dict[str, str]]] = None
|
||||||
|
"""Whether to include a summary of the content. Can be a boolean or a dict with a
|
||||||
|
custom query."""
|
||||||
|
|
||||||
client: Exa = Field(default=None)
|
client: Exa = Field(default=None)
|
||||||
exa_api_key: SecretStr = Field(default=None)
|
exa_api_key: SecretStr = Field(default=None)
|
||||||
@ -82,6 +93,9 @@ class ExaSearchRetriever(BaseRetriever):
|
|||||||
start_published_date=self.start_published_date,
|
start_published_date=self.start_published_date,
|
||||||
end_published_date=self.end_published_date,
|
end_published_date=self.end_published_date,
|
||||||
use_autoprompt=self.use_autoprompt,
|
use_autoprompt=self.use_autoprompt,
|
||||||
|
livecrawl=self.livecrawl,
|
||||||
|
summary=self.summary,
|
||||||
|
type=self.type,
|
||||||
)
|
)
|
||||||
|
|
||||||
results = response.results
|
results = response.results
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
"""Tool for the Exa Search API."""
|
"""Tool for the Exa Search API."""
|
||||||
|
|
||||||
from typing import Any, Optional, Union
|
from typing import Any, Literal, Optional, Union
|
||||||
|
|
||||||
from exa_py import Exa # type: ignore[untyped-import]
|
from exa_py import Exa # type: ignore[untyped-import]
|
||||||
from exa_py.api import (
|
from exa_py.api import (
|
||||||
@ -74,8 +74,10 @@ class ExaSearchResults(BaseTool): # type: ignore[override]
|
|||||||
def _run(
|
def _run(
|
||||||
self,
|
self,
|
||||||
query: str,
|
query: str,
|
||||||
num_results: int,
|
num_results: int = 10,
|
||||||
text_contents_options: Optional[Union[TextContentsOptions, bool]] = None,
|
text_contents_options: Optional[
|
||||||
|
Union[TextContentsOptions, dict[str, Any], bool]
|
||||||
|
] = None,
|
||||||
highlights: Optional[Union[HighlightsContentsOptions, bool]] = None,
|
highlights: Optional[Union[HighlightsContentsOptions, bool]] = None,
|
||||||
include_domains: Optional[list[str]] = None,
|
include_domains: Optional[list[str]] = None,
|
||||||
exclude_domains: Optional[list[str]] = None,
|
exclude_domains: Optional[list[str]] = None,
|
||||||
@ -84,9 +86,30 @@ class ExaSearchResults(BaseTool): # type: ignore[override]
|
|||||||
start_published_date: Optional[str] = None,
|
start_published_date: Optional[str] = None,
|
||||||
end_published_date: Optional[str] = None,
|
end_published_date: Optional[str] = None,
|
||||||
use_autoprompt: Optional[bool] = None,
|
use_autoprompt: Optional[bool] = None,
|
||||||
|
livecrawl: Optional[Literal["always", "fallback", "never"]] = None,
|
||||||
|
summary: Optional[Union[bool, dict[str, str]]] = None,
|
||||||
|
type: Optional[Literal["neural", "keyword", "auto"]] = None,
|
||||||
run_manager: Optional[CallbackManagerForToolRun] = None,
|
run_manager: Optional[CallbackManagerForToolRun] = None,
|
||||||
) -> Union[list[dict], str]:
|
) -> Union[list[dict], str]:
|
||||||
"""Use the tool."""
|
"""Use the tool.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
query: The search query.
|
||||||
|
num_results: The number of search results to return (1 to 100). Default: 10
|
||||||
|
text_contents_options: How to set the page content of the results. Can be True or a dict with options like max_characters.
|
||||||
|
highlights: Whether to include highlights in the results.
|
||||||
|
include_domains: A list of domains to include in the search.
|
||||||
|
exclude_domains: A list of domains to exclude from the search.
|
||||||
|
start_crawl_date: The start date for the crawl (in YYYY-MM-DD format).
|
||||||
|
end_crawl_date: The end date for the crawl (in YYYY-MM-DD format).
|
||||||
|
start_published_date: The start date for when the document was published (in YYYY-MM-DD format).
|
||||||
|
end_published_date: The end date for when the document was published (in YYYY-MM-DD format).
|
||||||
|
use_autoprompt: Whether to use autoprompt for the search.
|
||||||
|
livecrawl: Option to crawl live webpages if content is not in the index. Options: "always", "fallback", "never"
|
||||||
|
summary: Whether to include a summary of the content. Can be a boolean or a dict with a custom query.
|
||||||
|
type: The type of search, 'keyword', 'neural', or 'auto'.
|
||||||
|
run_manager: The run manager for callbacks.
|
||||||
|
""" # noqa: E501
|
||||||
try:
|
try:
|
||||||
return self.client.search_and_contents(
|
return self.client.search_and_contents(
|
||||||
query,
|
query,
|
||||||
@ -100,6 +123,9 @@ class ExaSearchResults(BaseTool): # type: ignore[override]
|
|||||||
start_published_date=start_published_date,
|
start_published_date=start_published_date,
|
||||||
end_published_date=end_published_date,
|
end_published_date=end_published_date,
|
||||||
use_autoprompt=use_autoprompt,
|
use_autoprompt=use_autoprompt,
|
||||||
|
livecrawl=livecrawl,
|
||||||
|
summary=summary,
|
||||||
|
type=type,
|
||||||
) # type: ignore
|
) # type: ignore
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
return repr(e)
|
return repr(e)
|
||||||
@ -128,8 +154,10 @@ class ExaFindSimilarResults(BaseTool): # type: ignore[override]
|
|||||||
def _run(
|
def _run(
|
||||||
self,
|
self,
|
||||||
url: str,
|
url: str,
|
||||||
num_results: int,
|
num_results: int = 10,
|
||||||
text_contents_options: Optional[Union[TextContentsOptions, bool]] = None,
|
text_contents_options: Optional[
|
||||||
|
Union[TextContentsOptions, dict[str, Any], bool]
|
||||||
|
] = None,
|
||||||
highlights: Optional[Union[HighlightsContentsOptions, bool]] = None,
|
highlights: Optional[Union[HighlightsContentsOptions, bool]] = None,
|
||||||
include_domains: Optional[list[str]] = None,
|
include_domains: Optional[list[str]] = None,
|
||||||
exclude_domains: Optional[list[str]] = None,
|
exclude_domains: Optional[list[str]] = None,
|
||||||
@ -139,9 +167,29 @@ class ExaFindSimilarResults(BaseTool): # type: ignore[override]
|
|||||||
end_published_date: Optional[str] = None,
|
end_published_date: Optional[str] = None,
|
||||||
exclude_source_domain: Optional[bool] = None,
|
exclude_source_domain: Optional[bool] = None,
|
||||||
category: Optional[str] = None,
|
category: Optional[str] = None,
|
||||||
|
livecrawl: Optional[Literal["always", "fallback", "never"]] = None,
|
||||||
|
summary: Optional[Union[bool, dict[str, str]]] = None,
|
||||||
run_manager: Optional[CallbackManagerForToolRun] = None,
|
run_manager: Optional[CallbackManagerForToolRun] = None,
|
||||||
) -> Union[list[dict], str]:
|
) -> Union[list[dict], str]:
|
||||||
"""Use the tool."""
|
"""Use the tool.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
url: The URL to find similar pages for.
|
||||||
|
num_results: The number of search results to return (1 to 100). Default: 10
|
||||||
|
text_contents_options: How to set the page content of the results. Can be True or a dict with options like max_characters.
|
||||||
|
highlights: Whether to include highlights in the results.
|
||||||
|
include_domains: A list of domains to include in the search.
|
||||||
|
exclude_domains: A list of domains to exclude from the search.
|
||||||
|
start_crawl_date: The start date for the crawl (in YYYY-MM-DD format).
|
||||||
|
end_crawl_date: The end date for the crawl (in YYYY-MM-DD format).
|
||||||
|
start_published_date: The start date for when the document was published (in YYYY-MM-DD format).
|
||||||
|
end_published_date: The end date for when the document was published (in YYYY-MM-DD format).
|
||||||
|
exclude_source_domain: If True, exclude pages from the same domain as the source URL.
|
||||||
|
category: Filter for similar pages by category.
|
||||||
|
livecrawl: Option to crawl live webpages if content is not in the index. Options: "always", "fallback", "never"
|
||||||
|
summary: Whether to include a summary of the content. Can be a boolean or a dict with a custom query.
|
||||||
|
run_manager: The run manager for callbacks.
|
||||||
|
""" # noqa: E501
|
||||||
try:
|
try:
|
||||||
return self.client.find_similar_and_contents(
|
return self.client.find_similar_and_contents(
|
||||||
url,
|
url,
|
||||||
@ -156,6 +204,8 @@ class ExaFindSimilarResults(BaseTool): # type: ignore[override]
|
|||||||
end_published_date=end_published_date,
|
end_published_date=end_published_date,
|
||||||
exclude_source_domain=exclude_source_domain,
|
exclude_source_domain=exclude_source_domain,
|
||||||
category=category,
|
category=category,
|
||||||
|
livecrawl=livecrawl,
|
||||||
|
summary=summary,
|
||||||
) # type: ignore
|
) # type: ignore
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
return repr(e)
|
return repr(e)
|
||||||
|
@ -6,9 +6,9 @@ build-backend = "pdm.backend"
|
|||||||
authors = []
|
authors = []
|
||||||
license = { text = "MIT" }
|
license = { text = "MIT" }
|
||||||
requires-python = ">=3.9"
|
requires-python = ">=3.9"
|
||||||
dependencies = ["langchain-core<1.0.0,>=0.3.15", "exa-py<2.0.0,>=1.0.8"]
|
dependencies = ["langchain-core<1.0.0,>=0.3.60", "exa-py<2.0.0,>=1.0.8"]
|
||||||
name = "langchain-exa"
|
name = "langchain-exa"
|
||||||
version = "0.2.1"
|
version = "0.3.0"
|
||||||
description = "An integration package connecting Exa and LangChain"
|
description = "An integration package connecting Exa and LangChain"
|
||||||
readme = "README.md"
|
readme = "README.md"
|
||||||
|
|
||||||
|
@ -26,3 +26,19 @@ def test_exa_retriever_highlights() -> None:
|
|||||||
assert isinstance(highlight_scores, list)
|
assert isinstance(highlight_scores, list)
|
||||||
assert isinstance(highlights[0], str)
|
assert isinstance(highlights[0], str)
|
||||||
assert isinstance(highlight_scores[0], float)
|
assert isinstance(highlight_scores[0], float)
|
||||||
|
|
||||||
|
|
||||||
|
def test_exa_retriever_advanced_features() -> None:
|
||||||
|
retriever = ExaSearchRetriever(
|
||||||
|
k=3, text_contents_options={"max_characters": 1000}, summary=True, type="auto"
|
||||||
|
)
|
||||||
|
res = retriever.invoke("best time to visit japan")
|
||||||
|
print(res) # noqa: T201
|
||||||
|
assert len(res) == 3 # requested k=3
|
||||||
|
assert isinstance(res, list)
|
||||||
|
assert isinstance(res[0], Document)
|
||||||
|
# Verify summary is in metadata
|
||||||
|
assert "summary" in res[0].metadata
|
||||||
|
assert isinstance(res[0].metadata["summary"], str)
|
||||||
|
# Verify text was limited
|
||||||
|
assert len(res[0].page_content) <= 1000
|
||||||
|
@ -8,3 +8,23 @@ def test_search_tool() -> None:
|
|||||||
res = tool.invoke({"query": "best time to visit japan", "num_results": 5})
|
res = tool.invoke({"query": "best time to visit japan", "num_results": 5})
|
||||||
print(res) # noqa: T201
|
print(res) # noqa: T201
|
||||||
assert not isinstance(res, str) # str means error for this tool\
|
assert not isinstance(res, str) # str means error for this tool\
|
||||||
|
|
||||||
|
|
||||||
|
def test_search_tool_advanced_features() -> None:
|
||||||
|
tool = ExaSearchResults()
|
||||||
|
res = tool.invoke(
|
||||||
|
{
|
||||||
|
"query": "best time to visit japan",
|
||||||
|
"num_results": 3,
|
||||||
|
"text_contents_options": {"max_characters": 1000},
|
||||||
|
"summary": True,
|
||||||
|
"type": "auto",
|
||||||
|
}
|
||||||
|
)
|
||||||
|
print(res) # noqa: T201
|
||||||
|
assert not isinstance(res, str) # str means error for this tool
|
||||||
|
assert len(res.results) == 3
|
||||||
|
# Verify summary exists
|
||||||
|
assert hasattr(res.results[0], "summary")
|
||||||
|
# Verify text was limited
|
||||||
|
assert len(res.results[0].text) <= 1000
|
||||||
|
@ -1,5 +1,4 @@
|
|||||||
version = 1
|
version = 1
|
||||||
revision = 1
|
|
||||||
requires-python = ">=3.9"
|
requires-python = ">=3.9"
|
||||||
resolution-markers = [
|
resolution-markers = [
|
||||||
"python_full_version >= '3.13'",
|
"python_full_version >= '3.13'",
|
||||||
@ -394,7 +393,7 @@ wheels = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "langchain-core"
|
name = "langchain-core"
|
||||||
version = "0.3.56"
|
version = "0.3.60"
|
||||||
source = { editable = "../../core" }
|
source = { editable = "../../core" }
|
||||||
dependencies = [
|
dependencies = [
|
||||||
{ name = "jsonpatch" },
|
{ name = "jsonpatch" },
|
||||||
@ -409,10 +408,9 @@ dependencies = [
|
|||||||
[package.metadata]
|
[package.metadata]
|
||||||
requires-dist = [
|
requires-dist = [
|
||||||
{ name = "jsonpatch", specifier = ">=1.33,<2.0" },
|
{ name = "jsonpatch", specifier = ">=1.33,<2.0" },
|
||||||
{ name = "langsmith", specifier = ">=0.1.125,<0.4" },
|
{ name = "langsmith", specifier = ">=0.1.126,<0.4" },
|
||||||
{ name = "packaging", specifier = ">=23.2,<25" },
|
{ name = "packaging", specifier = ">=23.2,<25" },
|
||||||
{ name = "pydantic", marker = "python_full_version < '3.12.4'", specifier = ">=2.5.2,<3.0.0" },
|
{ name = "pydantic", specifier = ">=2.7.4" },
|
||||||
{ name = "pydantic", marker = "python_full_version >= '3.12.4'", specifier = ">=2.7.4,<3.0.0" },
|
|
||||||
{ name = "pyyaml", specifier = ">=5.3" },
|
{ name = "pyyaml", specifier = ">=5.3" },
|
||||||
{ name = "tenacity", specifier = ">=8.1.0,!=8.4.0,<10.0.0" },
|
{ name = "tenacity", specifier = ">=8.1.0,!=8.4.0,<10.0.0" },
|
||||||
{ name = "typing-extensions", specifier = ">=4.7" },
|
{ name = "typing-extensions", specifier = ">=4.7" },
|
||||||
@ -453,7 +451,7 @@ typing = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "langchain-exa"
|
name = "langchain-exa"
|
||||||
version = "0.2.1"
|
version = "0.3.0"
|
||||||
source = { editable = "." }
|
source = { editable = "." }
|
||||||
dependencies = [
|
dependencies = [
|
||||||
{ name = "exa-py" },
|
{ name = "exa-py" },
|
||||||
|
Loading…
Reference in New Issue
Block a user