From 7124f2ebfadbbd227fdb9878bdf3a75a4281624c Mon Sep 17 00:00:00 2001 From: William FH <13333726+hinthornw@users.noreply.github.com> Date: Mon, 14 Aug 2023 15:41:53 -0700 Subject: [PATCH] Parent Doc Retriever (#9214) 2 things: - Implement the private method rather than the public one so callbacks are handled properly - Add search_kwargs (Open to not adding this if we are trying to deprecate this UX but seems like as a user i'd assume similar args to the vector store retriever. In fact some may assume this implements the same interface but I'm not dealing with that here) - --- .../retrievers/parent_document_retriever.py | 27 +++++++++++-------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/libs/langchain/langchain/retrievers/parent_document_retriever.py b/libs/langchain/langchain/retrievers/parent_document_retriever.py index f427b1f7b4b..a4f775e449f 100644 --- a/libs/langchain/langchain/retrievers/parent_document_retriever.py +++ b/libs/langchain/langchain/retrievers/parent_document_retriever.py @@ -1,7 +1,9 @@ import uuid -from typing import Any, Dict, List, Optional +from typing import List, Optional -from langchain.callbacks.base import Callbacks +from pydantic import Field + +from langchain.callbacks.manager import CallbackManagerForRetrieverRun from langchain.schema.document import Document from langchain.schema.retriever import BaseRetriever from langchain.schema.storage import BaseStore @@ -71,17 +73,20 @@ class ParentDocumentRetriever(BaseRetriever): parent_splitter: Optional[TextSplitter] = None """The text splitter to use to create parent documents. If none, then the parent documents will be the raw documents passed in.""" + search_kwargs: dict = Field(default_factory=dict) + """Keyword arguments to pass to the search function.""" - def get_relevant_documents( - self, - query: str, - *, - callbacks: Callbacks = None, - tags: Optional[List[str]] = None, - metadata: Optional[Dict[str, Any]] = None, - **kwargs: Any, + def _get_relevant_documents( + self, query: str, *, run_manager: CallbackManagerForRetrieverRun ) -> List[Document]: - sub_docs = self.vectorstore.similarity_search(query) + """Get documents relevant to a query. + Args: + query: String to find relevant documents for + run_manager: The callbacks handler to use + Returns: + List of relevant documents + """ + sub_docs = self.vectorstore.similarity_search(query, **self.search_kwargs) # We do this to maintain the order of the ids that are returned ids = [] for d in sub_docs: