mirror of
https://github.com/hwchase17/langchain.git
synced 2025-08-31 18:38:48 +00:00
community: Add you.com utility, update you retriever integration docs (#17014)
<!-- Thank you for contributing to LangChain! Please title your PR "<package>: <description>", where <package> is whichever of langchain, community, core, experimental, etc. is being modified. Replace this entire comment with: - **Description:** a description of the change, - **Issue:** the issue # it fixes if applicable, - **Dependencies:** any dependencies required for this change, - **Twitter handle:** we announce bigger features on Twitter. If your PR gets announced, and you'd like a mention, we'll gladly shout you out! Please make sure your PR is passing linting and testing before submitting. Run `make format`, `make lint` and `make test` from the root of the package you've modified to check this locally. See contribution guidelines for more information on how to write/run tests, lint, etc: https://python.langchain.com/docs/contributing/ If you're adding a new integration, please include: 1. a test for the integration, preferably unit tests that do not rely on network access, 2. an example notebook showing its use. It lives in `docs/docs/integrations` directory. If no one reviews your PR within a few days, please @-mention one of @baskaryan, @eyurtsev, @hwchase17. --> - **Description: changes to you.com files** - general cleanup - adds community/utilities/you.py, moving bulk of code from retriever -> utility - removes `snippet` as endpoint - adds `news` as endpoint - adds more tests <s>**Description: update community MAKE file** - adds `integration_tests` - adds `coverage`</s> - **Issue:** the issue # it fixes if applicable, - [For New Contributors: Update Integration Documentation](https://github.com/langchain-ai/langchain/issues/15664#issuecomment-1920099868) - **Dependencies:** n/a - **Twitter handle:** @scottnath - **Mastodon handle:** scottnath@mastodon.social --------- Co-authored-by: Bagatur <baskaryan@gmail.com>
This commit is contained in:
File diff suppressed because one or more lines are too long
@@ -70,6 +70,7 @@ from langchain_community.retrievers.weaviate_hybrid_search import (
|
||||
WeaviateHybridSearchRetriever,
|
||||
)
|
||||
from langchain_community.retrievers.wikipedia import WikipediaRetriever
|
||||
from langchain_community.retrievers.you import YouRetriever
|
||||
from langchain_community.retrievers.zep import ZepRetriever
|
||||
from langchain_community.retrievers.zilliz import ZillizRetriever
|
||||
|
||||
@@ -79,6 +80,7 @@ __all__ = [
|
||||
"ArceeRetriever",
|
||||
"ArxivRetriever",
|
||||
"AzureCognitiveSearchRetriever",
|
||||
"BM25Retriever",
|
||||
"BreebsRetriever",
|
||||
"ChatGPTPluginRetriever",
|
||||
"ChaindeskRetriever",
|
||||
@@ -103,10 +105,10 @@ __all__ = [
|
||||
"SVMRetriever",
|
||||
"TavilySearchAPIRetriever",
|
||||
"TFIDFRetriever",
|
||||
"BM25Retriever",
|
||||
"VespaRetriever",
|
||||
"WeaviateHybridSearchRetriever",
|
||||
"WikipediaRetriever",
|
||||
"YouRetriever",
|
||||
"ZepRetriever",
|
||||
"ZillizRetriever",
|
||||
"DocArrayRetriever",
|
||||
|
@@ -1,64 +1,23 @@
|
||||
from typing import Any, Dict, List, Optional
|
||||
from typing import Any, List
|
||||
|
||||
from langchain_core.callbacks import CallbackManagerForRetrieverRun
|
||||
from langchain_core.documents import Document
|
||||
from langchain_core.pydantic_v1 import root_validator
|
||||
from langchain_core.retrievers import BaseRetriever
|
||||
from langchain_core.utils import get_from_dict_or_env
|
||||
|
||||
from langchain_community.utilities import YouSearchAPIWrapper
|
||||
|
||||
|
||||
class YouRetriever(BaseRetriever):
|
||||
class YouRetriever(BaseRetriever, YouSearchAPIWrapper):
|
||||
"""`You` retriever that uses You.com's search API.
|
||||
|
||||
To connect to the You.com api requires an API key which
|
||||
you can get by emailing api@you.com.
|
||||
You can check out our docs at https://documentation.you.com.
|
||||
|
||||
You need to set the environment variable `YDC_API_KEY` for retriever to operate.
|
||||
It wraps results() to get_relevant_documents
|
||||
It uses all YouSearchAPIWrapper arguments without any change.
|
||||
"""
|
||||
|
||||
ydc_api_key: str
|
||||
k: Optional[int] = None
|
||||
n_hits: Optional[int] = None
|
||||
n_snippets_per_hit: Optional[int] = None
|
||||
endpoint_type: str = "web"
|
||||
|
||||
@root_validator(pre=True)
|
||||
def validate_client(
|
||||
cls,
|
||||
values: Dict[str, Any],
|
||||
) -> Dict[str, Any]:
|
||||
values["ydc_api_key"] = get_from_dict_or_env(
|
||||
values, "ydc_api_key", "YDC_API_KEY"
|
||||
)
|
||||
return values
|
||||
|
||||
def _get_relevant_documents(
|
||||
self, query: str, *, run_manager: CallbackManagerForRetrieverRun
|
||||
self,
|
||||
query: str,
|
||||
*,
|
||||
run_manager: CallbackManagerForRetrieverRun,
|
||||
**kwargs: Any,
|
||||
) -> List[Document]:
|
||||
import requests
|
||||
|
||||
headers = {"X-API-Key": self.ydc_api_key}
|
||||
if self.endpoint_type == "web":
|
||||
results = requests.get(
|
||||
f"https://api.ydc-index.io/search?query={query}",
|
||||
headers=headers,
|
||||
).json()
|
||||
|
||||
docs = []
|
||||
n_hits = self.n_hits or len(results["hits"])
|
||||
for hit in results["hits"][:n_hits]:
|
||||
n_snippets_per_hit = self.n_snippets_per_hit or len(hit["snippets"])
|
||||
for snippet in hit["snippets"][:n_snippets_per_hit]:
|
||||
docs.append(Document(page_content=snippet))
|
||||
if self.k is not None and len(docs) >= self.k:
|
||||
return docs
|
||||
return docs
|
||||
elif self.endpoint_type == "snippet":
|
||||
results = requests.get(
|
||||
f"https://api.ydc-index.io/snippet_search?query={query}",
|
||||
headers=headers,
|
||||
).json()
|
||||
return [Document(page_content=snippet) for snippet in results]
|
||||
else:
|
||||
raise RuntimeError(f"Invalid endpoint type provided {self.endpoint_type}")
|
||||
return self.results(query, run_manager=run_manager.get_child(), **kwargs)
|
||||
|
@@ -248,6 +248,12 @@ def _import_twilio() -> Any:
|
||||
return TwilioAPIWrapper
|
||||
|
||||
|
||||
def _import_you() -> Any:
|
||||
from langchain_community.utilities.you import YouSearchAPIWrapper
|
||||
|
||||
return YouSearchAPIWrapper
|
||||
|
||||
|
||||
def _import_wikipedia() -> Any:
|
||||
from langchain_community.utilities.wikipedia import WikipediaAPIWrapper
|
||||
|
||||
@@ -377,6 +383,8 @@ def __getattr__(name: str) -> Any:
|
||||
return _import_tensorflow_datasets()
|
||||
elif name == "TwilioAPIWrapper":
|
||||
return _import_twilio()
|
||||
elif name == "YouSearchAPIWrapper":
|
||||
return _import_you()
|
||||
elif name == "WikipediaAPIWrapper":
|
||||
return _import_wikipedia()
|
||||
elif name == "WolframAlphaAPIWrapper":
|
||||
@@ -434,6 +442,7 @@ __all__ = [
|
||||
"TensorflowDatasets",
|
||||
"TextRequestsWrapper",
|
||||
"TwilioAPIWrapper",
|
||||
"YouSearchAPIWrapper",
|
||||
"WikipediaAPIWrapper",
|
||||
"WolframAlphaAPIWrapper",
|
||||
"ZapierNLAWrapper",
|
||||
|
230
libs/community/langchain_community/utilities/you.py
Normal file
230
libs/community/langchain_community/utilities/you.py
Normal file
@@ -0,0 +1,230 @@
|
||||
"""Util that calls you.com Search API.
|
||||
|
||||
In order to set this up, follow instructions at:
|
||||
"""
|
||||
import json
|
||||
from typing import Any, Dict, List, Literal, Optional
|
||||
|
||||
import aiohttp
|
||||
import requests
|
||||
from langchain_core.documents import Document
|
||||
from langchain_core.pydantic_v1 import BaseModel, Field, root_validator
|
||||
from langchain_core.utils import get_from_dict_or_env
|
||||
|
||||
YOU_API_URL = "https://api.ydc-index.io"
|
||||
|
||||
|
||||
class YouHitMetadata(BaseModel):
|
||||
"""Metadata on a single hit from you.com"""
|
||||
|
||||
title: str = Field(description="The title of the result")
|
||||
url: str = Field(description="The url of the result")
|
||||
thumbnail_url: str = Field(description="Thumbnail associated with the result")
|
||||
description: str = Field(description="Details about the result")
|
||||
|
||||
|
||||
class YouHit(YouHitMetadata):
|
||||
"""A single hit from you.com, which may contain multiple snippets"""
|
||||
|
||||
snippets: List[str] = Field(description="One or snippets of text")
|
||||
|
||||
|
||||
class YouAPIOutput(BaseModel):
|
||||
"""The output from you.com api"""
|
||||
|
||||
hits: List[YouHit] = Field(
|
||||
description="A list of dictionaries containing the results"
|
||||
)
|
||||
|
||||
|
||||
class YouDocument(BaseModel):
|
||||
"""The output of parsing one snippet"""
|
||||
|
||||
page_content: str = Field(description="One snippet of text")
|
||||
metadata: YouHitMetadata
|
||||
|
||||
|
||||
class YouSearchAPIWrapper(BaseModel):
|
||||
"""Wrapper for you.com Search API.
|
||||
|
||||
To connect to the You.com api requires an API key which
|
||||
you can get at https://api.you.com.
|
||||
You can check out the docs at https://documentation.you.com.
|
||||
|
||||
You need to set the environment variable `YDC_API_KEY` for retriever to operate.
|
||||
|
||||
Attributes
|
||||
----------
|
||||
ydc_api_key: str, optional
|
||||
you.com api key, if YDC_API_KEY is not set in the environment
|
||||
num_web_results: int, optional
|
||||
The max number of web results to return, must be under 20
|
||||
safesearch: str, optional
|
||||
Safesearch settings, one of off, moderate, strict, defaults to moderate
|
||||
country: str, optional
|
||||
Country code, ex: 'US' for united states, see api docs for list
|
||||
k: int, optional
|
||||
max number of Documents to return using `results()`
|
||||
n_hits: int, optional, deprecated
|
||||
Alias for num_web_results
|
||||
n_snippets_per_hit: int, optional
|
||||
limit the number of snippets returned per hit
|
||||
endpoint_type: str, optional
|
||||
you.com endpoints: search, news, rag;
|
||||
`web` and `snippet` alias `search`
|
||||
`rag` returns `{'message': 'Forbidden'}`
|
||||
@todo `news` endpoint
|
||||
"""
|
||||
|
||||
ydc_api_key: Optional[str] = None
|
||||
num_web_results: Optional[int] = None
|
||||
safesearch: Optional[str] = None
|
||||
country: Optional[str] = None
|
||||
k: Optional[int] = None
|
||||
n_snippets_per_hit: Optional[int] = None
|
||||
# @todo deprecate `snippet`, not part of API
|
||||
endpoint_type: Literal["search", "news", "rag", "snippet"] = "search"
|
||||
# should deprecate n_hits
|
||||
n_hits: Optional[int] = None
|
||||
|
||||
@root_validator(pre=True)
|
||||
def validate_environment(cls, values: Dict) -> Dict:
|
||||
"""Validate that api key exists in environment."""
|
||||
ydc_api_key = get_from_dict_or_env(values, "ydc_api_key", "YDC_API_KEY")
|
||||
values["ydc_api_key"] = ydc_api_key
|
||||
|
||||
return values
|
||||
|
||||
def _parse_results(self, raw_search_results: Dict) -> List[Document]:
|
||||
"""
|
||||
Extracts snippets from each hit and puts them in a Document
|
||||
Parameters:
|
||||
raw_search_results: A dict containing list of hits
|
||||
Returns:
|
||||
List[YouDocument]: A dictionary of parsed results
|
||||
"""
|
||||
|
||||
# return news results
|
||||
if self.endpoint_type == "news":
|
||||
return [
|
||||
Document(page_content=result["description"], metadata=result)
|
||||
for result in raw_search_results["news"]["results"]
|
||||
]
|
||||
|
||||
docs = []
|
||||
for hit in raw_search_results["hits"]:
|
||||
n_snippets_per_hit = self.n_snippets_per_hit or len(hit["snippets"])
|
||||
for snippet in hit["snippets"][:n_snippets_per_hit]:
|
||||
docs.append(
|
||||
Document(
|
||||
page_content=snippet,
|
||||
metadata={
|
||||
"url": hit["url"],
|
||||
"thumbnail_url": hit["thumbnail_url"],
|
||||
"title": hit["title"],
|
||||
"description": hit["description"],
|
||||
},
|
||||
)
|
||||
)
|
||||
if self.k is not None and len(docs) >= self.k:
|
||||
return docs
|
||||
return docs
|
||||
|
||||
def raw_results(
|
||||
self,
|
||||
query: str,
|
||||
**kwargs: Any,
|
||||
) -> Dict:
|
||||
"""Run query through you.com Search and return hits.
|
||||
|
||||
Args:
|
||||
query: The query to search for.
|
||||
num_web_results: The maximum number of results to return.
|
||||
safesearch: Safesearch settings,
|
||||
one of off, moderate, strict, defaults to moderate
|
||||
country: Country code
|
||||
Returns: YouAPIOutput
|
||||
"""
|
||||
headers = {"X-API-Key": self.ydc_api_key or ""}
|
||||
params = {
|
||||
"query": query,
|
||||
"num_web_results": self.num_web_results,
|
||||
"safesearch": self.safesearch,
|
||||
"country": self.country,
|
||||
**kwargs,
|
||||
}
|
||||
|
||||
params = {k: v for k, v in params.items() if v is not None}
|
||||
# news endpoint expects `q` instead of `query`
|
||||
if self.endpoint_type == "news":
|
||||
params["q"] = params["query"]
|
||||
del params["query"]
|
||||
|
||||
# @todo deprecate `snippet`, not part of API
|
||||
if self.endpoint_type == "snippet":
|
||||
self.endpoint_type = "search"
|
||||
response = requests.get(
|
||||
# type: ignore
|
||||
f"{YOU_API_URL}/{self.endpoint_type}",
|
||||
params=params,
|
||||
headers=headers,
|
||||
)
|
||||
response.raise_for_status()
|
||||
return response.json()
|
||||
|
||||
def results(
|
||||
self,
|
||||
query: str,
|
||||
**kwargs: Any,
|
||||
) -> List[Document]:
|
||||
"""Run query through you.com Search and parses results into Documents."""
|
||||
|
||||
raw_search_results = self.raw_results(
|
||||
query,
|
||||
**{key: value for key, value in kwargs.items() if value is not None},
|
||||
)
|
||||
return self._parse_results(raw_search_results)
|
||||
|
||||
async def raw_results_async(
|
||||
self,
|
||||
query: str,
|
||||
num_web_results: Optional[int] = 5,
|
||||
safesearch: Optional[str] = "moderate",
|
||||
country: Optional[str] = "US",
|
||||
) -> Dict:
|
||||
"""Get results from the you.com Search API asynchronously."""
|
||||
|
||||
# Function to perform the API call
|
||||
async def fetch() -> str:
|
||||
params = {
|
||||
"query": query,
|
||||
"num_web_results": num_web_results,
|
||||
"safesearch": safesearch,
|
||||
"country": country,
|
||||
}
|
||||
async with aiohttp.ClientSession() as session:
|
||||
async with session.post(f"{YOU_API_URL}/search", json=params) as res:
|
||||
if res.status == 200:
|
||||
data = await res.text()
|
||||
return data
|
||||
else:
|
||||
raise Exception(f"Error {res.status}: {res.reason}")
|
||||
|
||||
results_json_str = await fetch()
|
||||
return json.loads(results_json_str)
|
||||
|
||||
async def results_async(
|
||||
self,
|
||||
query: str,
|
||||
num_web_results: Optional[int] = 5,
|
||||
safesearch: Optional[str] = "moderate",
|
||||
country: Optional[str] = "US",
|
||||
) -> List[Document]:
|
||||
results_json = await self.raw_results_async(
|
||||
query=query,
|
||||
num_web_results=num_web_results,
|
||||
safesearch=safesearch,
|
||||
country=country,
|
||||
)
|
||||
|
||||
return self._parse_results(results_json["results"])
|
@@ -34,6 +34,7 @@ EXPECTED_ALL = [
|
||||
"VespaRetriever",
|
||||
"WeaviateHybridSearchRetriever",
|
||||
"WikipediaRetriever",
|
||||
"YouRetriever",
|
||||
"ZepRetriever",
|
||||
"ZillizRetriever",
|
||||
"DocArrayRetriever",
|
||||
|
@@ -1,26 +1,72 @@
|
||||
import json
|
||||
import os
|
||||
from unittest import mock
|
||||
|
||||
from langchain_core.documents import Document
|
||||
from requests import Response
|
||||
import responses
|
||||
|
||||
from langchain_community.retrievers.you import YouRetriever
|
||||
|
||||
from ..utilities.test_you import (
|
||||
LIMITED_PARSED_OUTPUT,
|
||||
MOCK_PARSED_OUTPUT,
|
||||
MOCK_RESPONSE_RAW,
|
||||
NEWS_RESPONSE_PARSED,
|
||||
NEWS_RESPONSE_RAW,
|
||||
TEST_ENDPOINT,
|
||||
)
|
||||
|
||||
|
||||
class TestYouRetriever:
|
||||
@responses.activate
|
||||
def test_get_relevant_documents(self) -> None:
|
||||
os.environ["YDC_API_KEY"] = "MOCK KEY!"
|
||||
retriever = YouRetriever()
|
||||
responses.add(
|
||||
responses.GET, f"{TEST_ENDPOINT}/search", json=MOCK_RESPONSE_RAW, status=200
|
||||
)
|
||||
query = "Test query text"
|
||||
you_wrapper = YouRetriever(ydc_api_key="test")
|
||||
results = you_wrapper.get_relevant_documents(query)
|
||||
expected_result = MOCK_PARSED_OUTPUT
|
||||
assert results == expected_result
|
||||
|
||||
with mock.patch("requests.get") as mock_get:
|
||||
fixture = {"hits": [{"snippets": ["yo"]}, {"snippets": ["bird up"]}]}
|
||||
response = Response()
|
||||
response._content = bytes(json.dumps(fixture).encode("utf-8"))
|
||||
mock_get.return_value = response
|
||||
@responses.activate
|
||||
def test_invoke(self) -> None:
|
||||
responses.add(
|
||||
responses.GET, f"{TEST_ENDPOINT}/search", json=MOCK_RESPONSE_RAW, status=200
|
||||
)
|
||||
query = "Test query text"
|
||||
you_wrapper = YouRetriever(ydc_api_key="test")
|
||||
results = you_wrapper.invoke(query)
|
||||
expected_result = MOCK_PARSED_OUTPUT
|
||||
assert results == expected_result
|
||||
|
||||
actual = retriever.get_relevant_documents("test")
|
||||
assert actual == [
|
||||
Document(page_content="yo"),
|
||||
Document(page_content="bird up"),
|
||||
]
|
||||
@responses.activate
|
||||
def test_invoke_max_docs(self) -> None:
|
||||
responses.add(
|
||||
responses.GET, f"{TEST_ENDPOINT}/search", json=MOCK_RESPONSE_RAW, status=200
|
||||
)
|
||||
query = "Test query text"
|
||||
you_wrapper = YouRetriever(k=2, ydc_api_key="test")
|
||||
results = you_wrapper.invoke(query)
|
||||
expected_result = [MOCK_PARSED_OUTPUT[0], MOCK_PARSED_OUTPUT[1]]
|
||||
assert results == expected_result
|
||||
|
||||
@responses.activate
|
||||
def test_invoke_limit_snippets(self) -> None:
|
||||
responses.add(
|
||||
responses.GET, f"{TEST_ENDPOINT}/search", json=MOCK_RESPONSE_RAW, status=200
|
||||
)
|
||||
|
||||
query = "Test query text"
|
||||
you_wrapper = YouRetriever(n_snippets_per_hit=1, ydc_api_key="test")
|
||||
results = you_wrapper.results(query)
|
||||
expected_result = LIMITED_PARSED_OUTPUT
|
||||
assert results == expected_result
|
||||
|
||||
@responses.activate
|
||||
def test_invoke_news(self) -> None:
|
||||
responses.add(
|
||||
responses.GET, f"{TEST_ENDPOINT}/news", json=NEWS_RESPONSE_RAW, status=200
|
||||
)
|
||||
|
||||
query = "Test news text"
|
||||
# ensure limit on number of docs returned
|
||||
you_wrapper = YouRetriever(endpoint_type="news", ydc_api_key="test")
|
||||
results = you_wrapper.results(query)
|
||||
expected_result = NEWS_RESPONSE_PARSED
|
||||
assert results == expected_result
|
||||
|
@@ -48,6 +48,7 @@ EXPECTED_ALL = [
|
||||
"TwilioAPIWrapper",
|
||||
"WikipediaAPIWrapper",
|
||||
"WolframAlphaAPIWrapper",
|
||||
"YouSearchAPIWrapper",
|
||||
"ZapierNLAWrapper",
|
||||
"MerriamWebsterAPIWrapper",
|
||||
]
|
||||
|
190
libs/community/tests/unit_tests/utilities/test_you.py
Normal file
190
libs/community/tests/unit_tests/utilities/test_you.py
Normal file
@@ -0,0 +1,190 @@
|
||||
from typing import Any, Dict, List, Optional, Union
|
||||
|
||||
import responses
|
||||
from langchain_core.documents import Document
|
||||
|
||||
from langchain_community.utilities.you import YouSearchAPIWrapper
|
||||
|
||||
TEST_ENDPOINT = "https://api.ydc-index.io"
|
||||
|
||||
# Mock you.com response for testing
|
||||
MOCK_RESPONSE_RAW: Dict[str, List[Dict[str, Union[str, List[str]]]]] = {
|
||||
"hits": [
|
||||
{
|
||||
"description": "Test description",
|
||||
"snippets": ["yo", "bird up"],
|
||||
"thumbnail_url": "https://example.com/image.gif",
|
||||
"title": "Test title 1",
|
||||
"url": "https://example.com/article.html",
|
||||
},
|
||||
{
|
||||
"description": "Test description 2",
|
||||
"snippets": ["worst show", "on tv"],
|
||||
"thumbnail_url": "https://example.com/image2.gif",
|
||||
"title": "Test title 2",
|
||||
"url": "https://example.com/article2.html",
|
||||
},
|
||||
]
|
||||
}
|
||||
|
||||
|
||||
def generate_parsed_metadata(num: Optional[int] = 0) -> Dict[Any, Any]:
|
||||
"""generate metadata for testing"""
|
||||
if num is None:
|
||||
num = 0
|
||||
hit: Dict[str, Union[str, List[str]]] = MOCK_RESPONSE_RAW["hits"][num]
|
||||
return {
|
||||
"url": hit["url"],
|
||||
"thumbnail_url": hit["thumbnail_url"],
|
||||
"title": hit["title"],
|
||||
"description": hit["description"],
|
||||
}
|
||||
|
||||
|
||||
def generate_parsed_output(num: Optional[int] = 0) -> List[Document]:
|
||||
"""generate parsed output for testing"""
|
||||
if num is None:
|
||||
num = 0
|
||||
hit: Dict[str, Union[str, List[str]]] = MOCK_RESPONSE_RAW["hits"][num]
|
||||
output = []
|
||||
for snippit in hit["snippets"]:
|
||||
doc = Document(page_content=snippit, metadata=generate_parsed_metadata(num))
|
||||
output.append(doc)
|
||||
return output
|
||||
|
||||
|
||||
# Mock results after parsing
|
||||
MOCK_PARSED_OUTPUT = generate_parsed_output()
|
||||
MOCK_PARSED_OUTPUT.extend(generate_parsed_output(1))
|
||||
# Single-snippet
|
||||
LIMITED_PARSED_OUTPUT = []
|
||||
LIMITED_PARSED_OUTPUT.append(generate_parsed_output()[0])
|
||||
LIMITED_PARSED_OUTPUT.append(generate_parsed_output(1)[0])
|
||||
|
||||
# copied from you api docs
|
||||
NEWS_RESPONSE_RAW = {
|
||||
"news": {
|
||||
"results": [
|
||||
{
|
||||
"age": "18 hours ago",
|
||||
"breaking": True,
|
||||
"description": "Search on YDC for the news",
|
||||
"meta_url": {
|
||||
"hostname": "www.reuters.com",
|
||||
"netloc": "reuters.com",
|
||||
"path": "› 2023 › 10 › 18 › politics › inflation › index.html",
|
||||
"scheme": "https",
|
||||
},
|
||||
"page_age": "2 days",
|
||||
"page_fetched": "2023-10-12T23:00:00Z",
|
||||
"thumbnail": {"original": "https://reuters.com/news.jpg"},
|
||||
"title": "Breaking News about the World's Greatest Search Engine!",
|
||||
"type": "news",
|
||||
"url": "https://news.you.com",
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
|
||||
NEWS_RESPONSE_PARSED = [
|
||||
Document(page_content=str(result["description"]), metadata=result)
|
||||
for result in NEWS_RESPONSE_RAW["news"]["results"]
|
||||
]
|
||||
|
||||
|
||||
@responses.activate
|
||||
def test_raw_results() -> None:
|
||||
responses.add(
|
||||
responses.GET, f"{TEST_ENDPOINT}/search", json=MOCK_RESPONSE_RAW, status=200
|
||||
)
|
||||
|
||||
query = "Test query text"
|
||||
# ensure default endpoint_type
|
||||
you_wrapper = YouSearchAPIWrapper(endpoint_type="snippet", ydc_api_key="test")
|
||||
raw_results = you_wrapper.raw_results(query)
|
||||
expected_result = MOCK_RESPONSE_RAW
|
||||
assert raw_results == expected_result
|
||||
|
||||
|
||||
@responses.activate
|
||||
def test_raw_results_defaults() -> None:
|
||||
responses.add(
|
||||
responses.GET, f"{TEST_ENDPOINT}/search", json=MOCK_RESPONSE_RAW, status=200
|
||||
)
|
||||
|
||||
query = "Test query text"
|
||||
# ensure limit on number of docs returned
|
||||
you_wrapper = YouSearchAPIWrapper(ydc_api_key="test")
|
||||
raw_results = you_wrapper.raw_results(query)
|
||||
expected_result = MOCK_RESPONSE_RAW
|
||||
assert raw_results == expected_result
|
||||
|
||||
|
||||
@responses.activate
|
||||
def test_raw_results_news() -> None:
|
||||
responses.add(
|
||||
responses.GET, f"{TEST_ENDPOINT}/news", json=NEWS_RESPONSE_RAW, status=200
|
||||
)
|
||||
|
||||
query = "Test news text"
|
||||
# ensure limit on number of docs returned
|
||||
you_wrapper = YouSearchAPIWrapper(endpoint_type="news", ydc_api_key="test")
|
||||
raw_results = you_wrapper.raw_results(query)
|
||||
expected_result = NEWS_RESPONSE_RAW
|
||||
assert raw_results == expected_result
|
||||
|
||||
|
||||
@responses.activate
|
||||
def test_results() -> None:
|
||||
responses.add(
|
||||
responses.GET, f"{TEST_ENDPOINT}/search", json=MOCK_RESPONSE_RAW, status=200
|
||||
)
|
||||
|
||||
query = "Test query text"
|
||||
you_wrapper = YouSearchAPIWrapper(ydc_api_key="test")
|
||||
results = you_wrapper.results(query)
|
||||
expected_result = MOCK_PARSED_OUTPUT
|
||||
assert results == expected_result
|
||||
|
||||
|
||||
@responses.activate
|
||||
def test_results_max_docs() -> None:
|
||||
responses.add(
|
||||
responses.GET, f"{TEST_ENDPOINT}/search", json=MOCK_RESPONSE_RAW, status=200
|
||||
)
|
||||
|
||||
query = "Test query text"
|
||||
you_wrapper = YouSearchAPIWrapper(k=2, ydc_api_key="test")
|
||||
results = you_wrapper.results(query)
|
||||
expected_result = generate_parsed_output()
|
||||
assert results == expected_result
|
||||
|
||||
|
||||
@responses.activate
|
||||
def test_results_limit_snippets() -> None:
|
||||
responses.add(
|
||||
responses.GET, f"{TEST_ENDPOINT}/search", json=MOCK_RESPONSE_RAW, status=200
|
||||
)
|
||||
|
||||
query = "Test query text"
|
||||
you_wrapper = YouSearchAPIWrapper(n_snippets_per_hit=1, ydc_api_key="test")
|
||||
results = you_wrapper.results(query)
|
||||
expected_result = LIMITED_PARSED_OUTPUT
|
||||
assert results == expected_result
|
||||
|
||||
|
||||
@responses.activate
|
||||
def test_results_news() -> None:
|
||||
responses.add(
|
||||
responses.GET, f"{TEST_ENDPOINT}/news", json=NEWS_RESPONSE_RAW, status=200
|
||||
)
|
||||
|
||||
query = "Test news text"
|
||||
# ensure limit on number of docs returned
|
||||
you_wrapper = YouSearchAPIWrapper(endpoint_type="news", ydc_api_key="test")
|
||||
raw_results = you_wrapper.results(query)
|
||||
expected_result = NEWS_RESPONSE_PARSED
|
||||
assert raw_results == expected_result
|
||||
|
||||
|
||||
# @todo test async methods
|
Reference in New Issue
Block a user