Add youdotcom retriever (#11304)

---------

Co-authored-by: Bagatur <baskaryan@gmail.com>
This commit is contained in:
mrbean
2023-10-05 16:48:11 -04:00
committed by GitHub
parent 1655ff2ded
commit 9903a70379
4 changed files with 150 additions and 0 deletions

View File

@@ -0,0 +1,46 @@
from typing import Any, Dict, List
from langchain.callbacks.manager import CallbackManagerForRetrieverRun
from langchain.pydantic_v1 import root_validator
from langchain.schema import BaseRetriever, Document
from langchain.utils import get_from_dict_or_env
class YouRetriever(BaseRetriever):
"""`You` retriever that uses You.com's search API.
To connect to the You.com api requires an API key which
you can get by emailing api@you.com.
You can check out our docs at https://documentation.you.com.
You need to set the environment variable `YDC_API_KEY` for retriever to operate.
"""
ydc_api_key: str
@root_validator(pre=True)
def validate_client(
cls,
values: Dict[str, Any],
) -> Dict[str, Any]:
values["ydc_api_key"] = get_from_dict_or_env(
values, "ydc_api_key", "YDC_API_KEY"
)
return values
def _get_relevant_documents(
self, query: str, *, run_manager: CallbackManagerForRetrieverRun
) -> List[Document]:
import requests
headers = {"X-API-Key": self.ydc_api_key}
results = requests.get(
f"https://api.ydc-index.io/search?query={query}",
headers=headers,
).json()
docs = []
for hit in results["hits"]:
for snippet in hit["snippets"]:
docs.append(Document(page_content=snippet))
return docs

View File

@@ -0,0 +1,16 @@
import os
from langchain.retrievers.you import YouRetriever
class TestYouRetriever:
@classmethod
def setup_class(cls) -> None:
if not os.getenv("YDC_API_KEY"):
raise ValueError("YDC_API_KEY environment variable is not set")
def test_get_relevant_documents(self) -> None:
retriever = YouRetriever()
actual = retriever.get_relevant_documents("test")
assert len(actual) > 0

View File

@@ -0,0 +1,26 @@
import json
import os
from unittest import mock
from requests import Response
from langchain.retrievers.you import YouRetriever
from langchain.schema import Document
class TestYouRetriever:
def test_get_relevant_documents(self) -> None:
os.environ["YDC_API_KEY"] = "MOCK KEY!"
retriever = YouRetriever()
with mock.patch("requests.get") as mock_get:
fixture = {"hits": [{"snippets": ["yo"]}, {"snippets": ["bird up"]}]}
response = Response()
response._content = bytes(json.dumps(fixture).encode("utf-8"))
mock_get.return_value = response
actual = retriever.get_relevant_documents("test")
assert actual == [
Document(page_content="yo"),
Document(page_content="bird up"),
]