From 5affbada616619a08e0fd67e1db098ee00481770 Mon Sep 17 00:00:00 2001 From: John Kelly Date: Fri, 19 Jul 2024 18:12:39 +0100 Subject: [PATCH] langchain: Add `aadd_documents` to `ParentDocumentRetriever` (#23969) - **Description:** Add an async version of `add_documents` to `ParentDocumentRetriever` - **Twitter handle:** @johnkdev --------- Co-authored-by: John Kelly Co-authored-by: Chester Curme Co-authored-by: Eugene Yurtsev --- .../retrievers/parent_document_retriever.py | 58 +++++++++++++------ 1 file changed, 40 insertions(+), 18 deletions(-) diff --git a/libs/langchain/langchain/retrievers/parent_document_retriever.py b/libs/langchain/langchain/retrievers/parent_document_retriever.py index 8729b6b391c..1e1fb2df250 100644 --- a/libs/langchain/langchain/retrievers/parent_document_retriever.py +++ b/libs/langchain/langchain/retrievers/parent_document_retriever.py @@ -1,5 +1,5 @@ import uuid -from typing import Any, List, Optional, Sequence +from typing import Any, List, Optional, Sequence, Tuple from langchain_core.documents import Document from langchain_text_splitters import TextSplitter @@ -69,27 +69,12 @@ class ParentDocumentRetriever(MultiVectorRetriever): metadata. """ - def add_documents( + def _split_docs_for_adding( self, documents: List[Document], ids: Optional[List[str]] = None, add_to_docstore: bool = True, - **kwargs: Any, - ) -> None: - """Adds documents to the docstore and vectorstores. - - Args: - documents: List of documents to add - ids: Optional list of ids for documents. If provided should be the same - length as the list of documents. Can be provided if parent documents - are already in the document store and you don't want to re-add - to the docstore. If not provided, random UUIDs will be used as - ids. - add_to_docstore: Boolean of whether to add documents to docstore. - This can be false if and only if `ids` are provided. You may want - to set this to False if the documents are already in the docstore - and you don't want to re-add them. - """ + ) -> Tuple[List[Document], List[Tuple[str, Document]]]: if self.parent_splitter is not None: documents = self.parent_splitter.split_documents(documents) if ids is None: @@ -120,6 +105,43 @@ class ParentDocumentRetriever(MultiVectorRetriever): _doc.metadata[self.id_key] = _id docs.extend(sub_docs) full_docs.append((_id, doc)) + + return docs, full_docs + + def add_documents( + self, + documents: List[Document], + ids: Optional[List[str]] = None, + add_to_docstore: bool = True, + **kwargs: Any, + ) -> None: + """Adds documents to the docstore and vectorstores. + + Args: + documents: List of documents to add + ids: Optional list of ids for documents. If provided should be the same + length as the list of documents. Can be provided if parent documents + are already in the document store and you don't want to re-add + to the docstore. If not provided, random UUIDs will be used as + ids. + add_to_docstore: Boolean of whether to add documents to docstore. + This can be false if and only if `ids` are provided. You may want + to set this to False if the documents are already in the docstore + and you don't want to re-add them. + """ + docs, full_docs = self._split_docs_for_adding(documents, ids, add_to_docstore) self.vectorstore.add_documents(docs, **kwargs) if add_to_docstore: self.docstore.mset(full_docs) + + async def aadd_documents( + self, + documents: List[Document], + ids: Optional[List[str]] = None, + add_to_docstore: bool = True, + **kwargs: Any, + ) -> None: + docs, full_docs = self._split_docs_for_adding(documents, ids, add_to_docstore) + await self.vectorstore.aadd_documents(docs, **kwargs) + if add_to_docstore: + await self.docstore.amset(full_docs)