mirror of
https://github.com/hwchase17/langchain.git
synced 2025-09-02 03:26:17 +00:00
langchain: Add aadd_documents
to ParentDocumentRetriever
(#23969)
- **Description:** Add an async version of `add_documents` to `ParentDocumentRetriever` - **Twitter handle:** @johnkdev --------- Co-authored-by: John Kelly <j.kelly@mwam.com> Co-authored-by: Chester Curme <chester.curme@gmail.com> Co-authored-by: Eugene Yurtsev <eyurtsev@gmail.com>
This commit is contained in:
@@ -1,5 +1,5 @@
|
|||||||
import uuid
|
import uuid
|
||||||
from typing import Any, List, Optional, Sequence
|
from typing import Any, List, Optional, Sequence, Tuple
|
||||||
|
|
||||||
from langchain_core.documents import Document
|
from langchain_core.documents import Document
|
||||||
from langchain_text_splitters import TextSplitter
|
from langchain_text_splitters import TextSplitter
|
||||||
@@ -69,27 +69,12 @@ class ParentDocumentRetriever(MultiVectorRetriever):
|
|||||||
metadata.
|
metadata.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def add_documents(
|
def _split_docs_for_adding(
|
||||||
self,
|
self,
|
||||||
documents: List[Document],
|
documents: List[Document],
|
||||||
ids: Optional[List[str]] = None,
|
ids: Optional[List[str]] = None,
|
||||||
add_to_docstore: bool = True,
|
add_to_docstore: bool = True,
|
||||||
**kwargs: Any,
|
) -> Tuple[List[Document], List[Tuple[str, Document]]]:
|
||||||
) -> None:
|
|
||||||
"""Adds documents to the docstore and vectorstores.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
documents: List of documents to add
|
|
||||||
ids: Optional list of ids for documents. If provided should be the same
|
|
||||||
length as the list of documents. Can be provided if parent documents
|
|
||||||
are already in the document store and you don't want to re-add
|
|
||||||
to the docstore. If not provided, random UUIDs will be used as
|
|
||||||
ids.
|
|
||||||
add_to_docstore: Boolean of whether to add documents to docstore.
|
|
||||||
This can be false if and only if `ids` are provided. You may want
|
|
||||||
to set this to False if the documents are already in the docstore
|
|
||||||
and you don't want to re-add them.
|
|
||||||
"""
|
|
||||||
if self.parent_splitter is not None:
|
if self.parent_splitter is not None:
|
||||||
documents = self.parent_splitter.split_documents(documents)
|
documents = self.parent_splitter.split_documents(documents)
|
||||||
if ids is None:
|
if ids is None:
|
||||||
@@ -120,6 +105,43 @@ class ParentDocumentRetriever(MultiVectorRetriever):
|
|||||||
_doc.metadata[self.id_key] = _id
|
_doc.metadata[self.id_key] = _id
|
||||||
docs.extend(sub_docs)
|
docs.extend(sub_docs)
|
||||||
full_docs.append((_id, doc))
|
full_docs.append((_id, doc))
|
||||||
|
|
||||||
|
return docs, full_docs
|
||||||
|
|
||||||
|
def add_documents(
|
||||||
|
self,
|
||||||
|
documents: List[Document],
|
||||||
|
ids: Optional[List[str]] = None,
|
||||||
|
add_to_docstore: bool = True,
|
||||||
|
**kwargs: Any,
|
||||||
|
) -> None:
|
||||||
|
"""Adds documents to the docstore and vectorstores.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
documents: List of documents to add
|
||||||
|
ids: Optional list of ids for documents. If provided should be the same
|
||||||
|
length as the list of documents. Can be provided if parent documents
|
||||||
|
are already in the document store and you don't want to re-add
|
||||||
|
to the docstore. If not provided, random UUIDs will be used as
|
||||||
|
ids.
|
||||||
|
add_to_docstore: Boolean of whether to add documents to docstore.
|
||||||
|
This can be false if and only if `ids` are provided. You may want
|
||||||
|
to set this to False if the documents are already in the docstore
|
||||||
|
and you don't want to re-add them.
|
||||||
|
"""
|
||||||
|
docs, full_docs = self._split_docs_for_adding(documents, ids, add_to_docstore)
|
||||||
self.vectorstore.add_documents(docs, **kwargs)
|
self.vectorstore.add_documents(docs, **kwargs)
|
||||||
if add_to_docstore:
|
if add_to_docstore:
|
||||||
self.docstore.mset(full_docs)
|
self.docstore.mset(full_docs)
|
||||||
|
|
||||||
|
async def aadd_documents(
|
||||||
|
self,
|
||||||
|
documents: List[Document],
|
||||||
|
ids: Optional[List[str]] = None,
|
||||||
|
add_to_docstore: bool = True,
|
||||||
|
**kwargs: Any,
|
||||||
|
) -> None:
|
||||||
|
docs, full_docs = self._split_docs_for_adding(documents, ids, add_to_docstore)
|
||||||
|
await self.vectorstore.aadd_documents(docs, **kwargs)
|
||||||
|
if add_to_docstore:
|
||||||
|
await self.docstore.amset(full_docs)
|
||||||
|
Reference in New Issue
Block a user