mirror of
https://github.com/hwchase17/langchain.git
synced 2025-06-29 01:48:57 +00:00
Allow base_store to be used directly with MultiVectorRetriever (#14202)
Allow users to pass a generic `BaseStore[str, bytes]` to MultiVectorRetriever, removing the need to use the `create_kv_docstore` method. This encoding will now happen internally. @rlancemartin @eyurtsev --------- Co-authored-by: Eugene Yurtsev <eyurtsev@gmail.com>
This commit is contained in:
parent
67662564f3
commit
a26c4a0930
@ -88,7 +88,7 @@
|
||||
"# The retriever (empty to start)\n",
|
||||
"retriever = MultiVectorRetriever(\n",
|
||||
" vectorstore=vectorstore,\n",
|
||||
" docstore=store,\n",
|
||||
" base_store=store,\n",
|
||||
" id_key=id_key,\n",
|
||||
")\n",
|
||||
"import uuid\n",
|
||||
@ -143,7 +143,7 @@
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"Document(page_content='Tonight, I’d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \\n\\nOne of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court.', metadata={'doc_id': '455205f7-bb7d-4c36-b442-d1d6f9f701ed', 'source': '../../state_of_the_union.txt'})"
|
||||
"Document(page_content='Tonight, I’d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \\n\\nOne of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court.', metadata={'doc_id': '59899493-92a0-41cb-b6ba-a854730ad74a', 'source': '../../state_of_the_union.txt'})"
|
||||
]
|
||||
},
|
||||
"execution_count": 8,
|
||||
@ -188,7 +188,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
"execution_count": 10,
|
||||
"id": "36739460-a737-4a8e-b70f-50bf8c8eaae7",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@ -198,7 +198,7 @@
|
||||
"9875"
|
||||
]
|
||||
},
|
||||
"execution_count": 15,
|
||||
"execution_count": 10,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@ -223,7 +223,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"execution_count": 11,
|
||||
"id": "1433dff4",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@ -238,7 +238,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"execution_count": 12,
|
||||
"id": "35b30390",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@ -253,7 +253,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"execution_count": 13,
|
||||
"id": "41a2a738",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@ -263,7 +263,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"execution_count": 14,
|
||||
"id": "7ac5e4b1",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@ -276,7 +276,7 @@
|
||||
"# The retriever (empty to start)\n",
|
||||
"retriever = MultiVectorRetriever(\n",
|
||||
" vectorstore=vectorstore,\n",
|
||||
" docstore=store,\n",
|
||||
" base_store=store,\n",
|
||||
" id_key=id_key,\n",
|
||||
")\n",
|
||||
"doc_ids = [str(uuid.uuid4()) for _ in docs]"
|
||||
@ -338,7 +338,7 @@
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"Document(page_content=\"The document is a transcript of a speech given by the President of the United States. The President discusses several important issues and initiatives, including the nomination of a Supreme Court Justice, border security and immigration reform, protecting women's rights, advancing LGBTQ+ equality, bipartisan legislation, addressing the opioid epidemic and mental health, supporting veterans, investigating the health effects of burn pits on military personnel, ending cancer, and the strength and resilience of the American people.\", metadata={'doc_id': '79fa2e9f-28d9-4372-8af3-2caf4f1de312'})"
|
||||
"Document(page_content=\"The document is a speech given by the President of the United States. The President discusses various important issues and goals for the country, including nominating a Supreme Court Justice, securing the border and fixing the immigration system, protecting women's rights, supporting veterans, addressing the opioid epidemic, improving mental health care, and ending cancer. The President emphasizes the unity and strength of the American people and expresses optimism for the future of the nation.\", metadata={'doc_id': '8fdf4009-628c-400d-949c-1d3f4daf1e66'})"
|
||||
]
|
||||
},
|
||||
"execution_count": 19,
|
||||
@ -393,7 +393,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 26,
|
||||
"execution_count": 22,
|
||||
"id": "5219b085",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@ -418,7 +418,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 32,
|
||||
"execution_count": 23,
|
||||
"id": "523deb92",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@ -429,7 +429,7 @@
|
||||
" {\"doc\": lambda x: x.page_content}\n",
|
||||
" # Only asking for 3 hypothetical questions, but this could be adjusted\n",
|
||||
" | ChatPromptTemplate.from_template(\n",
|
||||
" \"Generate a list of 3 hypothetical questions that the below document could be used to answer:\\n\\n{doc}\"\n",
|
||||
" \"Generate a list of exactly 3 hypothetical questions that the below document could be used to answer:\\n\\n{doc}\"\n",
|
||||
" )\n",
|
||||
" | ChatOpenAI(max_retries=0, model=\"gpt-4\").bind(\n",
|
||||
" functions=functions, function_call={\"name\": \"hypothetical_questions\"}\n",
|
||||
@ -440,19 +440,19 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 33,
|
||||
"execution_count": 24,
|
||||
"id": "11d30554",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[\"What was the author's initial impression of philosophy as a field of study, and how did it change when they got to college?\",\n",
|
||||
" 'Why did the author decide to switch their focus to Artificial Intelligence (AI)?',\n",
|
||||
" \"What led to the author's disillusionment with the field of AI as it was practiced at the time?\"]"
|
||||
"[\"What were the author's initial areas of interest before college?\",\n",
|
||||
" \"What was the author's experience with programming in his early years?\",\n",
|
||||
" 'Why did the author switch his focus from AI to Lisp?']"
|
||||
]
|
||||
},
|
||||
"execution_count": 33,
|
||||
"execution_count": 24,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@ -463,7 +463,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 34,
|
||||
"execution_count": 25,
|
||||
"id": "3eb2e48c",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@ -473,7 +473,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 67,
|
||||
"execution_count": 26,
|
||||
"id": "b2cd6e75",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@ -488,7 +488,7 @@
|
||||
"# The retriever (empty to start)\n",
|
||||
"retriever = MultiVectorRetriever(\n",
|
||||
" vectorstore=vectorstore,\n",
|
||||
" docstore=store,\n",
|
||||
" base_store=store,\n",
|
||||
" id_key=id_key,\n",
|
||||
")\n",
|
||||
"doc_ids = [str(uuid.uuid4()) for _ in docs]"
|
||||
@ -496,7 +496,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 68,
|
||||
"execution_count": 27,
|
||||
"id": "18831b3b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@ -510,7 +510,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 69,
|
||||
"execution_count": 28,
|
||||
"id": "224b24c5",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@ -521,7 +521,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 70,
|
||||
"execution_count": 29,
|
||||
"id": "7b442b90",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@ -531,20 +531,20 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 71,
|
||||
"execution_count": 30,
|
||||
"id": "089b5ad0",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[Document(page_content=\"What is the President's stance on immigration reform?\", metadata={'doc_id': '505d73e3-8350-46ec-a58e-3af032f04ab3'}),\n",
|
||||
" Document(page_content=\"What is the President's stance on immigration reform?\", metadata={'doc_id': '1c9618f0-7660-4b4f-a37c-509cbbbf6dba'}),\n",
|
||||
" Document(page_content=\"What is the President's stance on immigration reform?\", metadata={'doc_id': '82c08209-b904-46a8-9532-edd2380950b7'}),\n",
|
||||
" Document(page_content='What measures is the President proposing to protect the rights of LGBTQ+ Americans?', metadata={'doc_id': '82c08209-b904-46a8-9532-edd2380950b7'})]"
|
||||
"[Document(page_content='What made Robert Morris advise the author to leave Y Combinator?', metadata={'doc_id': '740e484e-d67c-45f7-989d-9928aaf51c28'}),\n",
|
||||
" Document(page_content=\"How did the author's mother's illness affect his decision to leave Y Combinator?\", metadata={'doc_id': '740e484e-d67c-45f7-989d-9928aaf51c28'}),\n",
|
||||
" Document(page_content='What led the author to start publishing essays online?', metadata={'doc_id': '675ccee3-ce0b-4d5d-892c-b8942370babd'}),\n",
|
||||
" Document(page_content='What measures are being taken to secure the border and fix the immigration system?', metadata={'doc_id': '2d51f010-969e-48a9-9e82-6b12bc7ab3d4'})]"
|
||||
]
|
||||
},
|
||||
"execution_count": 71,
|
||||
"execution_count": 30,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@ -555,7 +555,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 72,
|
||||
"execution_count": 31,
|
||||
"id": "7594b24e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@ -565,17 +565,17 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 73,
|
||||
"execution_count": 32,
|
||||
"id": "4c120c65",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"9194"
|
||||
"9844"
|
||||
]
|
||||
},
|
||||
"execution_count": 73,
|
||||
"execution_count": 32,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@ -583,14 +583,6 @@
|
||||
"source": [
|
||||
"len(retrieved_docs[0].page_content)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "616cfeeb",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
@ -609,7 +601,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.16"
|
||||
"version": "3.10.5"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
@ -1,13 +1,13 @@
|
||||
from enum import Enum
|
||||
from typing import List
|
||||
from typing import List, Optional
|
||||
|
||||
from langchain_core.documents import Document
|
||||
from langchain_core.pydantic_v1 import Field
|
||||
from langchain_core.retrievers import BaseRetriever
|
||||
from langchain_core.stores import BaseStore
|
||||
from langchain_core.vectorstores import VectorStore
|
||||
|
||||
from langchain.callbacks.manager import CallbackManagerForRetrieverRun
|
||||
from langchain.storage._lc_store import create_kv_docstore
|
||||
|
||||
|
||||
class SearchType(str, Enum):
|
||||
@ -27,12 +27,35 @@ class MultiVectorRetriever(BaseRetriever):
|
||||
and their embedding vectors"""
|
||||
docstore: BaseStore[str, Document]
|
||||
"""The storage layer for the parent documents"""
|
||||
id_key: str = "doc_id"
|
||||
search_kwargs: dict = Field(default_factory=dict)
|
||||
id_key: str
|
||||
search_kwargs: dict
|
||||
"""Keyword arguments to pass to the search function."""
|
||||
search_type: SearchType = SearchType.similarity
|
||||
search_type: SearchType
|
||||
"""Type of search to perform (similarity / mmr)"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
vectorstore: VectorStore,
|
||||
docstore: Optional[BaseStore[str, Document]] = None,
|
||||
base_store: Optional[BaseStore[str, bytes]] = None,
|
||||
id_key: str = "doc_id",
|
||||
search_kwargs: Optional[dict] = None,
|
||||
search_type: SearchType = SearchType.similarity,
|
||||
):
|
||||
if base_store is not None:
|
||||
docstore = create_kv_docstore(base_store)
|
||||
elif docstore is None:
|
||||
raise Exception("You must pass a `base_store` parameter.")
|
||||
|
||||
super().__init__(
|
||||
vectorstore=vectorstore,
|
||||
docstore=docstore,
|
||||
id_key=id_key,
|
||||
search_kwargs=search_kwargs if search_kwargs is not None else {},
|
||||
search_type=search_type,
|
||||
)
|
||||
|
||||
def _get_relevant_documents(
|
||||
self, query: str, *, run_manager: CallbackManagerForRetrieverRun
|
||||
) -> List[Document]:
|
||||
|
Loading…
Reference in New Issue
Block a user