diff --git a/libs/community/langchain_community/vectorstores/aperturedb.py b/libs/community/langchain_community/vectorstores/aperturedb.py index 44b278b38d9..a19a9ece5c9 100644 --- a/libs/community/langchain_community/vectorstores/aperturedb.py +++ b/libs/community/langchain_community/vectorstores/aperturedb.py @@ -460,7 +460,45 @@ class ApertureDB(VectorStore): assert db.last_query_ok(), response return response[0]["FindDescriptorSet"]["entities"] - @override + def add_documents(self, documents: List[Document], **kwargs: Any) -> List[str]: + """Add or update documents in the vectorstore. + + Args: + documents: Documents to add to the vectorstore. + kwargs: Additional keyword arguments. + if kwargs contains ids and documents contain ids, + the ids in the kwargs will receive precedence. + + Returns: + List of IDs of the added texts. + + Raises: + ValueError: If the number of ids does not match the number of documents. + """ + + if "ids" in kwargs: + ids = kwargs.pop("ids") + if ids and len(ids) != len(documents): + raise ValueError( + "The number of ids must match the number of documents. " + "Got {len(ids)} ids and {len(documents)} documents." + ) + + documents_ = [] + + for id_, document in zip(ids, documents): + doc_with_id = Document( + page_content=document.page_content, + metadata=document.metadata, + id=id_, + ) + documents_.append(doc_with_id) + else: + documents_ = documents + + # If upsert has been implemented, we can use it to add documents + return self.upsert(documents_, **kwargs)["succeeded"] + def upsert(self, items: Sequence[Document], /, **kwargs: Any) -> UpsertResponse: """Insert or update items diff --git a/libs/core/langchain_core/vectorstores/base.py b/libs/core/langchain_core/vectorstores/base.py index f6f3f37772d..806549f709b 100644 --- a/libs/core/langchain_core/vectorstores/base.py +++ b/libs/core/langchain_core/vectorstores/base.py @@ -29,30 +29,23 @@ from itertools import cycle from typing import ( TYPE_CHECKING, Any, - AsyncIterable, - AsyncIterator, Callable, ClassVar, Collection, Dict, Iterable, - Iterator, List, Optional, Sequence, Tuple, Type, TypeVar, - Union, ) -from langchain_core._api import beta from langchain_core.embeddings import Embeddings from langchain_core.pydantic_v1 import Field, root_validator from langchain_core.retrievers import BaseRetriever from langchain_core.runnables.config import run_in_executor -from langchain_core.utils.aiter import abatch_iterate -from langchain_core.utils.iter import batch_iterate if TYPE_CHECKING: from langchain_core.callbacks.manager import ( @@ -60,7 +53,6 @@ if TYPE_CHECKING: CallbackManagerForRetrieverRun, ) from langchain_core.documents import Document - from langchain_core.indexing import UpsertResponse logger = logging.getLogger(__name__) @@ -96,7 +88,7 @@ class VectorStore(ABC): ValueError: If the number of metadatas does not match the number of texts. ValueError: If the number of ids does not match the number of texts. """ - if type(self).upsert != VectorStore.upsert: + if type(self).add_documents != VectorStore.add_documents: # Import document in local scope to avoid circular imports from langchain_core.documents import Document @@ -109,190 +101,19 @@ class VectorStore(ABC): if metadatas and len(metadatas) != len(texts_): raise ValueError( "The number of metadatas must match the number of texts." - "Got {len(metadatas)} metadatas and {len(texts_)} texts." + f"Got {len(metadatas)} metadatas and {len(texts_)} texts." ) - - if "ids" in kwargs: - ids = kwargs.pop("ids") - if ids and len(ids) != len(texts_): - raise ValueError( - "The number of ids must match the number of texts." - "Got {len(ids)} ids and {len(texts_)} texts." - ) - else: - ids = None - metadatas_ = iter(metadatas) if metadatas else cycle([{}]) - ids_: Iterable[Union[str, None]] = ids if ids is not None else cycle([None]) docs = [ - Document(page_content=text, metadata=metadata_, id=id_) - for text, metadata_, id_ in zip(texts, metadatas_, ids_) + Document(page_content=text, metadata=metadata_) + for text, metadata_ in zip(texts, metadatas_) ] - upsert_response = self.upsert(docs, **kwargs) - return upsert_response["succeeded"] + + return self.add_documents(docs, **kwargs) raise NotImplementedError( f"`add_texts` has not been implemented for {self.__class__.__name__} " ) - # Developer guidelines: - # Do not override streaming_upsert! - @beta(message="Added in 0.2.11. The API is subject to change.") - def streaming_upsert( - self, items: Iterable[Document], /, batch_size: int, **kwargs: Any - ) -> Iterator[UpsertResponse]: - """Upsert documents in a streaming fashion. - - Args: - items: Iterable of Documents to add to the vectorstore. - batch_size: The size of each batch to upsert. - kwargs: Additional keyword arguments. - kwargs should only include parameters that are common to all - documents. (e.g., timeout for indexing, retry policy, etc.) - kwargs should not include ids to avoid ambiguous semantics. - Instead, the ID should be provided as part of the Document object. - - Yields: - UpsertResponse: A response object that contains the list of IDs that were - successfully added or updated in the vectorstore and the list of IDs that - failed to be added or updated. - - .. versionadded:: 0.2.11 - """ - # The default implementation of this method breaks the input into - # batches of size `batch_size` and calls the `upsert` method on each batch. - # Subclasses can override this method to provide a more efficient - # implementation. - for item_batch in batch_iterate(batch_size, items): - yield self.upsert(item_batch, **kwargs) - - # Please note that we've added a new method `upsert` instead of re-using the - # existing `add_documents` method. - # This was done to resolve potential ambiguities around the behavior of **kwargs - # in existing add_documents / add_texts methods which could include per document - # information (e.g., the `ids` parameter). - # Over time the `add_documents` could be denoted as legacy and deprecated - # in favor of the `upsert` method. - @beta(message="Added in 0.2.11. The API is subject to change.") - def upsert(self, items: Sequence[Document], /, **kwargs: Any) -> UpsertResponse: - """Add or update documents in the vectorstore. - - The upsert functionality should utilize the ID field of the Document object - if it is provided. If the ID is not provided, the upsert method is free - to generate an ID for the document. - - When an ID is specified and the document already exists in the vectorstore, - the upsert method should update the document with the new data. If the document - does not exist, the upsert method should add the document to the vectorstore. - - Args: - items: Sequence of Documents to add to the vectorstore. - kwargs: Additional keyword arguments. - - Returns: - UpsertResponse: A response object that contains the list of IDs that were - successfully added or updated in the vectorstore and the list of IDs that - failed to be added or updated. - - .. versionadded:: 0.2.11 - """ - # Developer guidelines: - # - # Vectorstores implementations are free to extend `upsert` implementation - # to take in additional data per document. - # - # This data **SHOULD NOT** be part of the **kwargs** parameter, instead - # sub-classes can use a Union type on `documents` to include additional - # supported formats for the input data stream. - # - # For example, - # - # .. code-block:: python - # from typing import TypedDict - # - # class DocumentWithVector(TypedDict): - # document: Document - # vector: List[float] - # - # def upsert( - # self, - # documents: Union[Iterable[Document], Iterable[DocumentWithVector]], - # /, - # **kwargs - # ) -> UpsertResponse: - # \"\"\"Add or update documents in the vectorstore.\"\"\" - # # Implementation should check if documents is an - # # iterable of DocumentWithVector or Document - # pass - # - # Implementations that override upsert should include a new doc-string - # that explains the semantics of upsert and includes in code - # examples of how to insert using the alternate data formats. - - # The implementation does not delegate to the `add_texts` method or - # the `add_documents` method by default since those implementations - raise NotImplementedError( - f"upsert has not been implemented for {self.__class__.__name__}" - ) - - @beta(message="Added in 0.2.11. The API is subject to change.") - async def astreaming_upsert( - self, - items: AsyncIterable[Document], - /, - batch_size: int, - **kwargs: Any, - ) -> AsyncIterator[UpsertResponse]: - """Upsert documents in a streaming fashion. Async version of streaming_upsert. - - Args: - items: Iterable of Documents to add to the vectorstore. - batch_size: The size of each batch to upsert. - kwargs: Additional keyword arguments. - kwargs should only include parameters that are common to all - documents. (e.g., timeout for indexing, retry policy, etc.) - kwargs should not include ids to avoid ambiguous semantics. - Instead the ID should be provided as part of the Document object. - - Yields: - UpsertResponse: A response object that contains the list of IDs that were - successfully added or updated in the vectorstore and the list of IDs that - failed to be added or updated. - - .. versionadded:: 0.2.11 - """ - async for batch in abatch_iterate(batch_size, items): - yield await self.aupsert(batch, **kwargs) - - @beta(message="Added in 0.2.11. The API is subject to change.") - async def aupsert( - self, items: Sequence[Document], /, **kwargs: Any - ) -> UpsertResponse: - """Add or update documents in the vectorstore. Async version of upsert. - - The upsert functionality should utilize the ID field of the Document object - if it is provided. If the ID is not provided, the upsert method is free - to generate an ID for the document. - - When an ID is specified and the document already exists in the vectorstore, - the upsert method should update the document with the new data. If the document - does not exist, the upsert method should add the document to the vectorstore. - - Args: - items: Sequence of Documents to add to the vectorstore. - kwargs: Additional keyword arguments. - - Returns: - UpsertResponse: A response object that contains the list of IDs that were - successfully added or updated in the vectorstore and the list of IDs that - failed to be added or updated. - - .. versionadded:: 0.2.11 - """ - # Developer guidelines: See guidelines for the `upsert` method. - # The implementation does not delegate to the `add_texts` method or - # the `add_documents` method by default since those implementations - return await run_in_executor(None, self.upsert, items, **kwargs) - @property def embeddings(self) -> Optional[Embeddings]: """Access the query embedding object if available.""" @@ -407,7 +228,7 @@ class VectorStore(ABC): ValueError: If the number of metadatas does not match the number of texts. ValueError: If the number of ids does not match the number of texts. """ - if type(self).aupsert != VectorStore.aupsert: + if type(self).aadd_documents != VectorStore.aadd_documents: # Import document in local scope to avoid circular imports from langchain_core.documents import Document @@ -420,27 +241,16 @@ class VectorStore(ABC): if metadatas and len(metadatas) != len(texts_): raise ValueError( "The number of metadatas must match the number of texts." - "Got {len(metadatas)} metadatas and {len(texts_)} texts." + f"Got {len(metadatas)} metadatas and {len(texts_)} texts." ) - - if "ids" in kwargs: - ids = kwargs.pop("ids") - if ids and len(ids) != len(texts_): - raise ValueError( - "The number of ids must match the number of texts." - "Got {len(ids)} ids and {len(texts_)} texts." - ) - else: - ids = None - metadatas_ = iter(metadatas) if metadatas else cycle([{}]) - ids_: Iterable[Union[str, None]] = ids if ids is not None else cycle([None]) + docs = [ - Document(page_content=text, metadata=metadata_, id=id_) - for text, metadata_, id_ in zip(texts, metadatas_, ids_) + Document(page_content=text, metadata=metadata_) + for text, metadata_ in zip(texts, metadatas_) ] - upsert_response = await self.aupsert(docs, **kwargs) - return upsert_response["succeeded"] + + return await self.aadd_documents(docs, **kwargs) return await run_in_executor(None, self.add_texts, texts, metadatas, **kwargs) def add_documents(self, documents: List[Document], **kwargs: Any) -> List[str]: @@ -458,37 +268,22 @@ class VectorStore(ABC): Raises: ValueError: If the number of ids does not match the number of documents. """ - if type(self).upsert != VectorStore.upsert: - from langchain_core.documents import Document + if type(self).add_texts != VectorStore.add_texts: + if "ids" not in kwargs: + ids = [doc.id for doc in documents] - if "ids" in kwargs: - ids = kwargs.pop("ids") - if ids and len(ids) != len(documents): - raise ValueError( - "The number of ids must match the number of documents. " - "Got {len(ids)} ids and {len(documents)} documents." - ) + # If there's at least one valid ID, we'll assume that IDs + # should be used. + if any(ids): + kwargs["ids"] = ids - documents_ = [] - - for id_, document in zip(ids, documents): - doc_with_id = Document( - page_content=document.page_content, - metadata=document.metadata, - id=id_, - ) - documents_.append(doc_with_id) - else: - documents_ = documents - - # If upsert has been implemented, we can use it to add documents - return self.upsert(documents_, **kwargs)["succeeded"] - - # Code path that delegates to add_text for backwards compatibility - # TODO: Handle the case where the user doesn't provide ids on the Collection - texts = [doc.page_content for doc in documents] - metadatas = [doc.metadata for doc in documents] - return self.add_texts(texts, metadatas, **kwargs) + texts = [doc.page_content for doc in documents] + metadatas = [doc.metadata for doc in documents] + return self.add_texts(texts, metadatas, **kwargs) + raise NotImplementedError( + f"`add_documents` and `add_texts` has not been implemented " + f"for {self.__class__.__name__} " + ) async def aadd_documents( self, documents: List[Document], **kwargs: Any @@ -506,41 +301,21 @@ class VectorStore(ABC): Raises: ValueError: If the number of IDs does not match the number of documents. """ - # If either upsert or aupsert has been implemented, we delegate to them! - if ( - type(self).aupsert != VectorStore.aupsert - or type(self).upsert != VectorStore.upsert - ): - # If aupsert has been implemented, we can use it to add documents - from langchain_core.documents import Document + # If the async method has been overridden, we'll use that. + if type(self).aadd_texts != VectorStore.aadd_texts: + if "ids" not in kwargs: + ids = [doc.id for doc in documents] - if "ids" in kwargs: - ids = kwargs.pop("ids") - if ids and len(ids) != len(documents): - raise ValueError( - "The number of ids must match the number of documents." - "Got {len(ids)} ids and {len(documents)} documents." - ) + # If there's at least one valid ID, we'll assume that IDs + # should be used. + if any(ids): + kwargs["ids"] = ids - documents_ = [] + texts = [doc.page_content for doc in documents] + metadatas = [doc.metadata for doc in documents] + return await self.aadd_texts(texts, metadatas, **kwargs) - for id_, document in zip(ids, documents): - doc_with_id = Document( - page_content=document.page_content, - metadata=document.metadata, - id=id_, - ) - documents_.append(doc_with_id) - else: - documents_ = documents - - # The default implementation of aupsert delegates to upsert. - upsert_response = await self.aupsert(documents_, **kwargs) - return upsert_response["succeeded"] - - texts = [doc.page_content for doc in documents] - metadatas = [doc.metadata for doc in documents] - return await self.aadd_texts(texts, metadatas, **kwargs) + return await run_in_executor(None, self.add_documents, documents, **kwargs) def search(self, query: str, search_type: str, **kwargs: Any) -> List[Document]: """Return docs most similar to query using a specified search type. diff --git a/libs/core/langchain_core/vectorstores/in_memory.py b/libs/core/langchain_core/vectorstores/in_memory.py index 3adb1f6888c..ca6d7477820 100644 --- a/libs/core/langchain_core/vectorstores/in_memory.py +++ b/libs/core/langchain_core/vectorstores/in_memory.py @@ -8,12 +8,14 @@ from typing import ( Any, Callable, Dict, + Iterator, List, Optional, Sequence, Tuple, ) +from langchain_core._api import deprecated from langchain_core.documents import Document from langchain_core.embeddings import Embeddings from langchain_core.load import dumpd, load @@ -56,43 +58,71 @@ class InMemoryVectorStore(VectorStore): async def adelete(self, ids: Optional[Sequence[str]] = None, **kwargs: Any) -> None: self.delete(ids) - def upsert(self, items: Sequence[Document], /, **kwargs: Any) -> UpsertResponse: - vectors = self.embedding.embed_documents([item.page_content for item in items]) - ids = [] - for item, vector in zip(items, vectors): - doc_id = item.id if item.id else str(uuid.uuid4()) - ids.append(doc_id) - self.store[doc_id] = { - "id": doc_id, - "vector": vector, - "text": item.page_content, - "metadata": item.metadata, - } - return { - "succeeded": ids, - "failed": [], - } + def add_documents( + self, + documents: List[Document], + ids: Optional[List[str]] = None, + **kwargs: Any, + ) -> List[str]: + """Add documents to the store.""" + texts = [doc.page_content for doc in documents] + vectors = self.embedding.embed_documents(texts) - async def aupsert( - self, items: Sequence[Document], /, **kwargs: Any - ) -> UpsertResponse: - vectors = await self.embedding.aembed_documents( - [item.page_content for item in items] + if ids and len(ids) != len(texts): + raise ValueError( + f"ids must be the same length as texts. " + f"Got {len(ids)} ids and {len(texts)} texts." + ) + + id_iterator: Iterator[Optional[str]] = ( + iter(ids) if ids else iter(doc.id for doc in documents) ) - ids = [] - for item, vector in zip(items, vectors): - doc_id = item.id if item.id else str(uuid.uuid4()) - ids.append(doc_id) - self.store[doc_id] = { - "id": doc_id, + + ids_ = [] + + for doc, vector in zip(documents, vectors): + doc_id = next(id_iterator) + doc_id_ = doc_id if doc_id else str(uuid.uuid4()) + ids_.append(doc_id_) + self.store[doc_id_] = { + "id": doc_id_, "vector": vector, - "text": item.page_content, - "metadata": item.metadata, + "text": doc.page_content, + "metadata": doc.metadata, } - return { - "succeeded": ids, - "failed": [], - } + + return ids_ + + async def aadd_documents( + self, documents: List[Document], ids: Optional[List[str]] = None, **kwargs: Any + ) -> List[str]: + """Add documents to the store.""" + texts = [doc.page_content for doc in documents] + vectors = await self.embedding.aembed_documents(texts) + + if ids and len(ids) != len(texts): + raise ValueError( + f"ids must be the same length as texts. " + f"Got {len(ids)} ids and {len(texts)} texts." + ) + + id_iterator: Iterator[Optional[str]] = ( + iter(ids) if ids else iter(doc.id for doc in documents) + ) + ids_: List[str] = [] + + for doc, vector in zip(documents, vectors): + doc_id = next(id_iterator) + doc_id_ = doc_id if doc_id else str(uuid.uuid4()) + ids_.append(doc_id_) + self.store[doc_id_] = { + "id": doc_id_, + "vector": vector, + "text": doc.page_content, + "metadata": doc.metadata, + } + + return ids_ def get_by_ids(self, ids: Sequence[str], /) -> List[Document]: """Get documents by their ids. @@ -117,6 +147,62 @@ class InMemoryVectorStore(VectorStore): ) return documents + @deprecated( + alternative="VectorStore.add_documents", + message=( + "This was a beta API that was added in 0.2.11. " + "It'll be removed in 0.3.0." + ), + since="0.2.29", + removal="0.3.0", + ) + def upsert(self, items: Sequence[Document], /, **kwargs: Any) -> UpsertResponse: + vectors = self.embedding.embed_documents([item.page_content for item in items]) + ids = [] + for item, vector in zip(items, vectors): + doc_id = item.id if item.id else str(uuid.uuid4()) + ids.append(doc_id) + self.store[doc_id] = { + "id": doc_id, + "vector": vector, + "text": item.page_content, + "metadata": item.metadata, + } + return { + "succeeded": ids, + "failed": [], + } + + @deprecated( + alternative="VectorStore.aadd_documents", + message=( + "This was a beta API that was added in 0.2.11. " + "It'll be removed in 0.3.0." + ), + since="0.2.29", + removal="0.3.0", + ) + async def aupsert( + self, items: Sequence[Document], /, **kwargs: Any + ) -> UpsertResponse: + vectors = await self.embedding.aembed_documents( + [item.page_content for item in items] + ) + ids = [] + for item, vector in zip(items, vectors): + doc_id = item.id if item.id else str(uuid.uuid4()) + ids.append(doc_id) + self.store[doc_id] = { + "id": doc_id, + "vector": vector, + "text": item.page_content, + "metadata": item.metadata, + } + return { + "succeeded": ids, + "failed": [], + } + async def aget_by_ids(self, ids: Sequence[str], /) -> List[Document]: """Async get documents by their ids. diff --git a/libs/core/tests/unit_tests/vectorstores/test_vectorstore.py b/libs/core/tests/unit_tests/vectorstores/test_vectorstore.py index 3b5efc7a85b..971315752b8 100644 --- a/libs/core/tests/unit_tests/vectorstores/test_vectorstore.py +++ b/libs/core/tests/unit_tests/vectorstores/test_vectorstore.py @@ -1,69 +1,50 @@ +"""Set of tests that complement the standard tests for vectorstore. + +These tests verify that the base abstraction does appropriate delegation to +the relevant methods. +""" + from __future__ import annotations import uuid -from typing import Any, Dict, List, Optional, Sequence, Union - -from typing_extensions import TypedDict +from typing import Any, Dict, Iterable, List, Optional, Sequence from langchain_core.documents import Document from langchain_core.embeddings import Embeddings -from langchain_core.indexing import UpsertResponse from langchain_core.vectorstores import VectorStore -def test_custom_upsert_type() -> None: - """Test that we can override the signature of the upsert method - of the VectorStore class without creating typing issues by violating - the Liskov Substitution Principle. - """ - - class ByVector(TypedDict): - document: Document - vector: List[float] - - class CustomVectorStore(VectorStore): - def upsert( - # This unit test verifies that the signature of the upsert method - # specifically the items parameter can be overridden without - # violating the Liskov Substitution Principle (and getting - # typing errors). - self, - items: Union[Sequence[Document], Sequence[ByVector]], - /, - **kwargs: Any, - ) -> UpsertResponse: - raise NotImplementedError() - - -class CustomSyncVectorStore(VectorStore): - """A vectorstore that only implements the synchronous methods.""" +class CustomAddTextsVectorstore(VectorStore): + """A vectorstore that only implements add texts.""" def __init__(self) -> None: self.store: Dict[str, Document] = {} - def upsert( + def add_texts( self, - items: Sequence[Document], - /, + texts: Iterable[str], + metadatas: Optional[List[dict]] = None, + # One of the kwargs should be `ids` which is a list of ids + # associated with the texts. + # This is not yet enforced in the type signature for backwards compatibility + # with existing implementations. + ids: Optional[List[str]] = None, **kwargs: Any, - ) -> UpsertResponse: - ids = [] - for item in items: - if item.id is None: - new_item = item.copy() - id_: str = str(uuid.uuid4()) - new_item.id = id_ - else: - id_ = item.id - new_item = item + ) -> List[str]: + if not isinstance(texts, list): + texts = list(texts) + ids_iter = iter(ids or []) - self.store[id_] = new_item - ids.append(id_) + ids_ = [] - return { - "succeeded": ids, - "failed": [], - } + metadatas_ = metadatas or [{} for _ in texts] + + for text, metadata in zip(texts, metadatas_ or []): + next_id = next(ids_iter, None) + id_ = next_id or str(uuid.uuid4()) + self.store[id_] = Document(page_content=text, metadata=metadata, id=id_) + ids_.append(id_) + return ids_ def get_by_ids(self, ids: Sequence[str], /) -> List[Document]: return [self.store[id] for id in ids if id in self.store] @@ -74,8 +55,8 @@ class CustomSyncVectorStore(VectorStore): embedding: Embeddings, metadatas: Optional[List[dict]] = None, **kwargs: Any, - ) -> CustomSyncVectorStore: - vectorstore = CustomSyncVectorStore() + ) -> CustomAddTextsVectorstore: + vectorstore = CustomAddTextsVectorstore() vectorstore.add_texts(texts, metadatas=metadatas, **kwargs) return vectorstore @@ -85,30 +66,38 @@ class CustomSyncVectorStore(VectorStore): raise NotImplementedError() -def test_implement_upsert() -> None: +def test_default_add_documents() -> None: """Test that we can implement the upsert method of the CustomVectorStore class without violating the Liskov Substitution Principle. """ - store = CustomSyncVectorStore() + store = CustomAddTextsVectorstore() # Check upsert with id - assert store.upsert([Document(id="1", page_content="hello")]) == { - "succeeded": ["1"], - "failed": [], - } + assert store.add_documents([Document(id="1", page_content="hello")]) == ["1"] assert store.get_by_ids(["1"]) == [Document(id="1", page_content="hello")] # Check upsert without id - response = store.upsert([Document(page_content="world")]) - assert len(response["succeeded"]) == 1 - id_ = response["succeeded"][0] - assert id_ is not None - assert store.get_by_ids([id_]) == [Document(id=id_, page_content="world")] + ids = store.add_documents([Document(page_content="world")]) + assert len(ids) == 1 + assert store.get_by_ids(ids) == [Document(id=ids[0], page_content="world")] + # Check that add_documents works + assert store.add_documents([Document(id="5", page_content="baz")]) == ["5"] + + # Test add documents with id specified in both document and ids + original_document = Document(id="7", page_content="baz") + assert store.add_documents([original_document], ids=["6"]) == ["6"] + assert original_document.id == "7" # original document should not be modified + assert store.get_by_ids(["6"]) == [Document(id="6", page_content="baz")] + + +def test_default_add_texts() -> None: + store = CustomAddTextsVectorstore() # Check that default implementation of add_texts works assert store.add_texts(["hello", "world"], ids=["3", "4"]) == ["3", "4"] + assert store.get_by_ids(["3", "4"]) == [ Document(id="3", page_content="hello"), Document(id="4", page_content="world"), @@ -130,39 +119,37 @@ def test_implement_upsert() -> None: Document(id=ids_2[1], page_content="bar", metadata={"foo": "bar"}), ] - # Check that add_documents works - assert store.add_documents([Document(id="5", page_content="baz")]) == ["5"] - # Test add documents with id specified in both document and ids - original_document = Document(id="7", page_content="baz") - assert store.add_documents([original_document], ids=["6"]) == ["6"] - assert original_document.id == "7" # original document should not be modified - assert store.get_by_ids(["6"]) == [Document(id="6", page_content="baz")] - - -async def test_aupsert_delegation_to_upsert() -> None: - """Test delegation to the synchronous upsert method in async execution - if async methods are not implemented. - """ - store = CustomSyncVectorStore() +async def test_default_aadd_documents() -> None: + """Test delegation to the synchronous method.""" + store = CustomAddTextsVectorstore() # Check upsert with id - assert await store.aupsert([Document(id="1", page_content="hello")]) == { - "succeeded": ["1"], - "failed": [], - } + assert await store.aadd_documents([Document(id="1", page_content="hello")]) == ["1"] assert await store.aget_by_ids(["1"]) == [Document(id="1", page_content="hello")] # Check upsert without id - response = await store.aupsert([Document(page_content="world")]) - assert len(response["succeeded"]) == 1 - id_ = response["succeeded"][0] - assert id_ is not None - assert await store.aget_by_ids([id_]) == [Document(id=id_, page_content="world")] + ids = await store.aadd_documents([Document(page_content="world")]) + assert len(ids) == 1 + assert await store.aget_by_ids(ids) == [Document(id=ids[0], page_content="world")] + # Check that add_documents works + assert await store.aadd_documents([Document(id="5", page_content="baz")]) == ["5"] + + # Test add documents with id specified in both document and ids + original_document = Document(id="7", page_content="baz") + assert await store.aadd_documents([original_document], ids=["6"]) == ["6"] + assert original_document.id == "7" # original document should not be modified + assert await store.aget_by_ids(["6"]) == [Document(id="6", page_content="baz")] + + +async def test_default_aadd_texts() -> None: + """Test delegation to the synchronous method.""" + store = CustomAddTextsVectorstore() # Check that default implementation of add_texts works assert await store.aadd_texts(["hello", "world"], ids=["3", "4"]) == ["3", "4"] + assert await store.aget_by_ids(["3", "4"]) == [ Document(id="3", page_content="hello"), Document(id="4", page_content="world"), @@ -183,12 +170,3 @@ async def test_aupsert_delegation_to_upsert() -> None: Document(id=ids_2[0], page_content="foo", metadata={"foo": "bar"}), Document(id=ids_2[1], page_content="bar", metadata={"foo": "bar"}), ] - - # Check that add_documents works - assert await store.aadd_documents([Document(id="5", page_content="baz")]) == ["5"] - - # Test add documents with id specified in both document and ids - original_document = Document(id="7", page_content="baz") - assert await store.aadd_documents([original_document], ids=["6"]) == ["6"] - assert original_document.id == "7" # original document should not be modified - assert await store.aget_by_ids(["6"]) == [Document(id="6", page_content="baz")] diff --git a/libs/standard-tests/langchain_standard_tests/integration_tests/vectorstores.py b/libs/standard-tests/langchain_standard_tests/integration_tests/vectorstores.py index 83e76aaff80..d7735cfdd2b 100644 --- a/libs/standard-tests/langchain_standard_tests/integration_tests/vectorstores.py +++ b/libs/standard-tests/langchain_standard_tests/integration_tests/vectorstores.py @@ -1,6 +1,5 @@ """Test suite to test vectostores.""" -import inspect from abc import abstractmethod import pytest @@ -169,39 +168,31 @@ class ReadWriteTestSuite(BaseStandardTests): documents = vectorstore.get_by_ids(["1", "2", "3"]) assert documents == [] - def test_upsert_documents(self, vectorstore: VectorStore) -> None: - """Run upsert tests.""" + def test_add_documents_documents(self, vectorstore: VectorStore) -> None: + """Run add_documents tests.""" documents = [ Document(page_content="foo", metadata={"id": 1}), Document(page_content="bar", metadata={"id": 2}), ] - response = vectorstore.upsert(documents) - ids = response["succeeded"] + ids = vectorstore.add_documents(documents) assert vectorstore.get_by_ids(ids) == [ Document(page_content="foo", metadata={"id": 1}, id=ids[0]), Document(page_content="bar", metadata={"id": 2}, id=ids[1]), ] - def test_upsert_with_existing_ids(self, vectorstore: VectorStore) -> None: - """Test that upserting with existing IDs is idempotent.""" + def test_add_documents_with_existing_ids(self, vectorstore: VectorStore) -> None: + """Test that add_documentsing with existing IDs is idempotent.""" documents = [ Document(id="foo", page_content="foo", metadata={"id": 1}), Document(page_content="bar", metadata={"id": 2}), ] - response = vectorstore.upsert(documents) - ids = response["succeeded"] - assert response["failed"] == [] + ids = vectorstore.add_documents(documents) assert "foo" in ids assert vectorstore.get_by_ids(ids) == [ Document(page_content="foo", metadata={"id": 1}, id="foo"), Document(page_content="bar", metadata={"id": 2}, id=ids[1]), ] - def test_upsert_documents_has_no_ids(self, vectorstore: VectorStore) -> None: - """Verify that there is not parameter called ids in upsert""" - signature = inspect.signature(vectorstore.upsert) - assert "ids" not in signature.parameters - class AsyncReadWriteTestSuite(BaseStandardTests): """Test suite for checking the **async** read-write API of a vectorstore. @@ -359,35 +350,29 @@ class AsyncReadWriteTestSuite(BaseStandardTests): # This should not raise an exception assert await vectorstore.aget_by_ids(["1", "2", "3"]) == [] - async def test_upsert_documents(self, vectorstore: VectorStore) -> None: - """Run upsert tests.""" + async def test_add_documents_documents(self, vectorstore: VectorStore) -> None: + """Run add_documents tests.""" documents = [ Document(page_content="foo", metadata={"id": 1}), Document(page_content="bar", metadata={"id": 2}), ] - response = await vectorstore.aupsert(documents) - ids = response["succeeded"] + ids = await vectorstore.aadd_documents(documents) assert await vectorstore.aget_by_ids(ids) == [ Document(page_content="foo", metadata={"id": 1}, id=ids[0]), Document(page_content="bar", metadata={"id": 2}, id=ids[1]), ] - async def test_upsert_with_existing_ids(self, vectorstore: VectorStore) -> None: - """Test that upserting with existing IDs is idempotent.""" + async def test_add_documents_with_existing_ids( + self, vectorstore: VectorStore + ) -> None: + """Test that add_documentsing with existing IDs is idempotent.""" documents = [ Document(id="foo", page_content="foo", metadata={"id": 1}), Document(page_content="bar", metadata={"id": 2}), ] - response = await vectorstore.aupsert(documents) - ids = response["succeeded"] - assert response["failed"] == [] + ids = await vectorstore.aadd_documents(documents) assert "foo" in ids assert await vectorstore.aget_by_ids(ids) == [ Document(page_content="foo", metadata={"id": 1}, id="foo"), Document(page_content="bar", metadata={"id": 2}, id=ids[1]), ] - - async def test_upsert_documents_has_no_ids(self, vectorstore: VectorStore) -> None: - """Verify that there is not parameter called ids in upsert""" - signature = inspect.signature(vectorstore.aupsert) - assert "ids" not in signature.parameters