mirror of
https://github.com/hwchase17/langchain.git
synced 2025-07-03 19:57:51 +00:00
Add youdotcom retriever (#11304)
--------- Co-authored-by: Bagatur <baskaryan@gmail.com>
This commit is contained in:
parent
1655ff2ded
commit
9903a70379
62
docs/extras/integrations/retrievers/you-retriever.ipynb
Normal file
62
docs/extras/integrations/retrievers/you-retriever.ipynb
Normal file
@ -0,0 +1,62 @@
|
|||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "47828a7a",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Using the You.com Retriever\n",
|
||||||
|
"The retriever from You.com is good for retrieving lots of text. We return multiple of the best text snippets per URL we find to be relevant.\n",
|
||||||
|
"\n",
|
||||||
|
"First you just need to initialize the retriever"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "a90d61d4",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from langchain.retrievers.you_retriever import YouRetriever\n",
|
||||||
|
"from langchain.chains import RetrievalQA\n",
|
||||||
|
"from langchain.llms import OpenAI\n",
|
||||||
|
"\n",
|
||||||
|
"yr = YouRetriever()\n",
|
||||||
|
"qa = RetrievalQA.from_chain_type(llm=OpenAI(), chain_type=\"map_reduce\", retriever=yr)\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "4a223f2f",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"query = \"what starting ohio state quarterback most recently went their entire college career without beating Michigan?\"\n",
|
||||||
|
"qa.run(query)"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3 (ipykernel)",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.9.17"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 5
|
||||||
|
}
|
46
libs/langchain/langchain/retrievers/you.py
Normal file
46
libs/langchain/langchain/retrievers/you.py
Normal file
@ -0,0 +1,46 @@
|
|||||||
|
from typing import Any, Dict, List
|
||||||
|
|
||||||
|
from langchain.callbacks.manager import CallbackManagerForRetrieverRun
|
||||||
|
from langchain.pydantic_v1 import root_validator
|
||||||
|
from langchain.schema import BaseRetriever, Document
|
||||||
|
from langchain.utils import get_from_dict_or_env
|
||||||
|
|
||||||
|
|
||||||
|
class YouRetriever(BaseRetriever):
|
||||||
|
"""`You` retriever that uses You.com's search API.
|
||||||
|
|
||||||
|
To connect to the You.com api requires an API key which
|
||||||
|
you can get by emailing api@you.com.
|
||||||
|
You can check out our docs at https://documentation.you.com.
|
||||||
|
|
||||||
|
You need to set the environment variable `YDC_API_KEY` for retriever to operate.
|
||||||
|
"""
|
||||||
|
|
||||||
|
ydc_api_key: str
|
||||||
|
|
||||||
|
@root_validator(pre=True)
|
||||||
|
def validate_client(
|
||||||
|
cls,
|
||||||
|
values: Dict[str, Any],
|
||||||
|
) -> Dict[str, Any]:
|
||||||
|
values["ydc_api_key"] = get_from_dict_or_env(
|
||||||
|
values, "ydc_api_key", "YDC_API_KEY"
|
||||||
|
)
|
||||||
|
return values
|
||||||
|
|
||||||
|
def _get_relevant_documents(
|
||||||
|
self, query: str, *, run_manager: CallbackManagerForRetrieverRun
|
||||||
|
) -> List[Document]:
|
||||||
|
import requests
|
||||||
|
|
||||||
|
headers = {"X-API-Key": self.ydc_api_key}
|
||||||
|
results = requests.get(
|
||||||
|
f"https://api.ydc-index.io/search?query={query}",
|
||||||
|
headers=headers,
|
||||||
|
).json()
|
||||||
|
|
||||||
|
docs = []
|
||||||
|
for hit in results["hits"]:
|
||||||
|
for snippet in hit["snippets"]:
|
||||||
|
docs.append(Document(page_content=snippet))
|
||||||
|
return docs
|
@ -0,0 +1,16 @@
|
|||||||
|
import os
|
||||||
|
|
||||||
|
from langchain.retrievers.you import YouRetriever
|
||||||
|
|
||||||
|
|
||||||
|
class TestYouRetriever:
|
||||||
|
@classmethod
|
||||||
|
def setup_class(cls) -> None:
|
||||||
|
if not os.getenv("YDC_API_KEY"):
|
||||||
|
raise ValueError("YDC_API_KEY environment variable is not set")
|
||||||
|
|
||||||
|
def test_get_relevant_documents(self) -> None:
|
||||||
|
retriever = YouRetriever()
|
||||||
|
actual = retriever.get_relevant_documents("test")
|
||||||
|
|
||||||
|
assert len(actual) > 0
|
26
libs/langchain/tests/unit_tests/retrievers/test_you.py
Normal file
26
libs/langchain/tests/unit_tests/retrievers/test_you.py
Normal file
@ -0,0 +1,26 @@
|
|||||||
|
import json
|
||||||
|
import os
|
||||||
|
from unittest import mock
|
||||||
|
|
||||||
|
from requests import Response
|
||||||
|
|
||||||
|
from langchain.retrievers.you import YouRetriever
|
||||||
|
from langchain.schema import Document
|
||||||
|
|
||||||
|
|
||||||
|
class TestYouRetriever:
|
||||||
|
def test_get_relevant_documents(self) -> None:
|
||||||
|
os.environ["YDC_API_KEY"] = "MOCK KEY!"
|
||||||
|
retriever = YouRetriever()
|
||||||
|
|
||||||
|
with mock.patch("requests.get") as mock_get:
|
||||||
|
fixture = {"hits": [{"snippets": ["yo"]}, {"snippets": ["bird up"]}]}
|
||||||
|
response = Response()
|
||||||
|
response._content = bytes(json.dumps(fixture).encode("utf-8"))
|
||||||
|
mock_get.return_value = response
|
||||||
|
|
||||||
|
actual = retriever.get_relevant_documents("test")
|
||||||
|
assert actual == [
|
||||||
|
Document(page_content="yo"),
|
||||||
|
Document(page_content="bird up"),
|
||||||
|
]
|
Loading…
Reference in New Issue
Block a user