langchain/libs/partners/exa/langchain_exa/tools.py
Ishan Goswami f16456139b
exa docs and python package update (#31307)
Added support for new Exa API features. Updated Exa docs and python
package (langchain-exa).

Description

Added support for new Exa API features in the langchain-exa package:
- Added max_characters option for text content
- Added support for summary and custom summary prompts
- Added livecrawl option with "always", "fallback", "never" settings
- Added "auto" option for search type
- Updated documentation and tests

Dependencies
- No new dependencies required. Using existing features from exa-py.

twitter: @theishangoswami

---------

Co-authored-by: Chester Curme <chester.curme@gmail.com>
2025-05-21 21:33:30 -04:00

212 lines
12 KiB
Python

"""Tool for the Exa Search API."""
from typing import Any, Literal, Optional, Union
from exa_py import Exa # type: ignore[untyped-import]
from exa_py.api import (
HighlightsContentsOptions, # type: ignore[untyped-import]
TextContentsOptions, # type: ignore[untyped-import]
)
from langchain_core.callbacks import (
CallbackManagerForToolRun,
)
from langchain_core.tools import BaseTool
from pydantic import Field, SecretStr, model_validator
from langchain_exa._utilities import initialize_client
class ExaSearchResults(BaseTool): # type: ignore[override]
"""Exa Search tool.
Setup:
Install ``langchain-exa`` and set environment variable ``EXA_API_KEY``.
.. code-block:: bash
pip install -U langchain-exa
export EXA_API_KEY="your-api-key"
Instantiation:
.. code-block:: python
from langchain-exa import ExaSearchResults
tool = ExaSearchResults()
Invocation with args:
.. code-block:: python
tool.invoke({"query":"what is the weather in SF","num_results":1})
.. code-block:: python
SearchResponse(results=[Result(url='https://www.wunderground.com/weather/37.8,-122.4', id='https://www.wunderground.com/weather/37.8,-122.4', title='San Francisco, CA Weather Conditionsstar_ratehome', score=0.1843988299369812, published_date='2023-02-23T01:17:06.594Z', author=None, text='The time period when the sun is no more than 6 degrees below the horizon at either sunrise or sunset. The horizon should be clearly defined and the brightest stars should be visible under good atmospheric conditions (i.e. no moonlight, or other lights). One still should be able to carry on ordinary outdoor activities. The time period when the sun is between 6 and 12 degrees below the horizon at either sunrise or sunset. The horizon is well defined and the outline of objects might be visible without artificial light. Ordinary outdoor activities are not possible at this time without extra illumination. The time period when the sun is between 12 and 18 degrees below the horizon at either sunrise or sunset. The sun does not contribute to the illumination of the sky before this time in the morning, or after this time in the evening. In the beginning of morning astronomical twilight and at the end of astronomical twilight in the evening, sky illumination is very faint, and might be undetectable. The time of Civil Sunset minus the time of Civil Sunrise. The time of Actual Sunset minus the time of Actual Sunrise. The change in length of daylight between today and tomorrow is also listed when available.', highlights=None, highlight_scores=None, summary=None)], autoprompt_string=None)
Invocation with ToolCall:
.. code-block:: python
tool.invoke({"args": {"query":"what is the weather in SF","num_results":1}, "id": "1", "name": tool.name, "type": "tool_call"})
.. code-block:: python
ToolMessage(content='Title: San Francisco, CA Weather Conditionsstar_ratehome\nURL: https://www.wunderground.com/weather/37.8,-122.4\nID: https://www.wunderground.com/weather/37.8,-122.4\nScore: 0.1843988299369812\nPublished Date: 2023-02-23T01:17:06.594Z\nAuthor: None\nText: The time period when the sun is no more than 6 degrees below the horizon at either sunrise or sunset. The horizon should be clearly defined and the brightest stars should be visible under good atmospheric conditions (i.e. no moonlight, or other lights). One still should be able to carry on ordinary outdoor activities. The time period when the sun is between 6 and 12 degrees below the horizon at either sunrise or sunset. The horizon is well defined and the outline of objects might be visible without artificial light. Ordinary outdoor activities are not possible at this time without extra illumination. The time period when the sun is between 12 and 18 degrees below the horizon at either sunrise or sunset. The sun does not contribute to the illumination of the sky before this time in the morning, or after this time in the evening. In the beginning of morning astronomical twilight and at the end of astronomical twilight in the evening, sky illumination is very faint, and might be undetectable. The time of Civil Sunset minus the time of Civil Sunrise. The time of Actual Sunset minus the time of Actual Sunrise. The change in length of daylight between today and tomorrow is also listed when available.\nHighlights: None\nHighlight Scores: None\nSummary: None\n', name='exa_search_results_json', tool_call_id='1')
""" # noqa: E501
name: str = "exa_search_results_json"
description: str = (
"A wrapper around Exa Search. "
"Input should be an Exa-optimized query. "
"Output is a JSON array of the query results"
)
client: Exa = Field(default=None)
exa_api_key: SecretStr = Field(default=None)
@model_validator(mode="before")
@classmethod
def validate_environment(cls, values: dict) -> Any:
"""Validate the environment."""
values = initialize_client(values)
return values
def _run(
self,
query: str,
num_results: int = 10,
text_contents_options: Optional[
Union[TextContentsOptions, dict[str, Any], bool]
] = None,
highlights: Optional[Union[HighlightsContentsOptions, bool]] = None,
include_domains: Optional[list[str]] = None,
exclude_domains: Optional[list[str]] = None,
start_crawl_date: Optional[str] = None,
end_crawl_date: Optional[str] = None,
start_published_date: Optional[str] = None,
end_published_date: Optional[str] = None,
use_autoprompt: Optional[bool] = None,
livecrawl: Optional[Literal["always", "fallback", "never"]] = None,
summary: Optional[Union[bool, dict[str, str]]] = None,
type: Optional[Literal["neural", "keyword", "auto"]] = None,
run_manager: Optional[CallbackManagerForToolRun] = None,
) -> Union[list[dict], str]:
"""Use the tool.
Args:
query: The search query.
num_results: The number of search results to return (1 to 100). Default: 10
text_contents_options: How to set the page content of the results. Can be True or a dict with options like max_characters.
highlights: Whether to include highlights in the results.
include_domains: A list of domains to include in the search.
exclude_domains: A list of domains to exclude from the search.
start_crawl_date: The start date for the crawl (in YYYY-MM-DD format).
end_crawl_date: The end date for the crawl (in YYYY-MM-DD format).
start_published_date: The start date for when the document was published (in YYYY-MM-DD format).
end_published_date: The end date for when the document was published (in YYYY-MM-DD format).
use_autoprompt: Whether to use autoprompt for the search.
livecrawl: Option to crawl live webpages if content is not in the index. Options: "always", "fallback", "never"
summary: Whether to include a summary of the content. Can be a boolean or a dict with a custom query.
type: The type of search, 'keyword', 'neural', or 'auto'.
run_manager: The run manager for callbacks.
""" # noqa: E501
try:
return self.client.search_and_contents(
query,
num_results=num_results,
text=text_contents_options, # type: ignore
highlights=highlights, # type: ignore
include_domains=include_domains,
exclude_domains=exclude_domains,
start_crawl_date=start_crawl_date,
end_crawl_date=end_crawl_date,
start_published_date=start_published_date,
end_published_date=end_published_date,
use_autoprompt=use_autoprompt,
livecrawl=livecrawl,
summary=summary,
type=type,
) # type: ignore
except Exception as e:
return repr(e)
class ExaFindSimilarResults(BaseTool): # type: ignore[override]
"""Tool that queries the Metaphor Search API and gets back json."""
name: str = "exa_find_similar_results_json"
description: str = (
"A wrapper around Exa Find Similar. "
"Input should be an Exa-optimized query. "
"Output is a JSON array of the query results"
)
client: Exa = Field(default=None)
exa_api_key: SecretStr = Field(default=None)
exa_base_url: Optional[str] = None
@model_validator(mode="before")
@classmethod
def validate_environment(cls, values: dict) -> Any:
"""Validate the environment."""
values = initialize_client(values)
return values
def _run(
self,
url: str,
num_results: int = 10,
text_contents_options: Optional[
Union[TextContentsOptions, dict[str, Any], bool]
] = None,
highlights: Optional[Union[HighlightsContentsOptions, bool]] = None,
include_domains: Optional[list[str]] = None,
exclude_domains: Optional[list[str]] = None,
start_crawl_date: Optional[str] = None,
end_crawl_date: Optional[str] = None,
start_published_date: Optional[str] = None,
end_published_date: Optional[str] = None,
exclude_source_domain: Optional[bool] = None,
category: Optional[str] = None,
livecrawl: Optional[Literal["always", "fallback", "never"]] = None,
summary: Optional[Union[bool, dict[str, str]]] = None,
run_manager: Optional[CallbackManagerForToolRun] = None,
) -> Union[list[dict], str]:
"""Use the tool.
Args:
url: The URL to find similar pages for.
num_results: The number of search results to return (1 to 100). Default: 10
text_contents_options: How to set the page content of the results. Can be True or a dict with options like max_characters.
highlights: Whether to include highlights in the results.
include_domains: A list of domains to include in the search.
exclude_domains: A list of domains to exclude from the search.
start_crawl_date: The start date for the crawl (in YYYY-MM-DD format).
end_crawl_date: The end date for the crawl (in YYYY-MM-DD format).
start_published_date: The start date for when the document was published (in YYYY-MM-DD format).
end_published_date: The end date for when the document was published (in YYYY-MM-DD format).
exclude_source_domain: If True, exclude pages from the same domain as the source URL.
category: Filter for similar pages by category.
livecrawl: Option to crawl live webpages if content is not in the index. Options: "always", "fallback", "never"
summary: Whether to include a summary of the content. Can be a boolean or a dict with a custom query.
run_manager: The run manager for callbacks.
""" # noqa: E501
try:
return self.client.find_similar_and_contents(
url,
num_results=num_results,
text=text_contents_options, # type: ignore
highlights=highlights, # type: ignore
include_domains=include_domains,
exclude_domains=exclude_domains,
start_crawl_date=start_crawl_date,
end_crawl_date=end_crawl_date,
start_published_date=start_published_date,
end_published_date=end_published_date,
exclude_source_domain=exclude_source_domain,
category=category,
livecrawl=livecrawl,
summary=summary,
) # type: ignore
except Exception as e:
return repr(e)