mirror of
https://github.com/hwchase17/langchain.git
synced 2025-06-27 17:08:47 +00:00
tests[patch]: populate API reference for vector stores (#28520)
This commit is contained in:
parent
2b360d6a2f
commit
f459754470
@ -15,18 +15,68 @@ EMBEDDING_SIZE = 6
|
|||||||
|
|
||||||
|
|
||||||
class ReadWriteTestSuite(BaseStandardTests):
|
class ReadWriteTestSuite(BaseStandardTests):
|
||||||
"""Test suite for checking the read-write API of a vectorstore.
|
"""Test suite for checking the synchronous read-write API of a vector store.
|
||||||
|
|
||||||
This test suite verifies the basic read-write API of a vectorstore.
|
|
||||||
|
|
||||||
The test suite is designed for synchronous vectorstores.
|
|
||||||
|
|
||||||
Implementers should subclass this test suite and provide a fixture
|
Implementers should subclass this test suite and provide a fixture
|
||||||
that returns an empty vectorstore for each test.
|
that returns an empty vector store for each test.
|
||||||
|
|
||||||
The fixture should use the `get_embeddings` method to get a pre-defined
|
The fixture should use the ``get_embeddings`` method to get a pre-defined
|
||||||
embeddings model that should be used for this test suite.
|
embeddings model that should be used for this test suite.
|
||||||
"""
|
|
||||||
|
Here is a template:
|
||||||
|
|
||||||
|
.. code-block:: python
|
||||||
|
|
||||||
|
from typing import Generator
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
from langchain_core.vectorstores import VectorStore
|
||||||
|
from langchain_parrot_link.vectorstores import ParrotVectorStore
|
||||||
|
from langchain_tests.integration_tests.vectorstores import ReadWriteTestSuite
|
||||||
|
|
||||||
|
|
||||||
|
class TestSync(ReadWriteTestSuite):
|
||||||
|
@pytest.fixture()
|
||||||
|
def vectorstore(self) -> Generator[VectorStore, None, None]: # type: ignore
|
||||||
|
\"\"\"Get an empty vectorstore.\"\"\"
|
||||||
|
store = ParrotVectorStore(self.get_embeddings())
|
||||||
|
# note: store should be EMPTY at this point
|
||||||
|
# if you need to delete data, you may do so here
|
||||||
|
try:
|
||||||
|
yield store
|
||||||
|
finally:
|
||||||
|
# cleanup operations, or deleting data
|
||||||
|
pass
|
||||||
|
|
||||||
|
In the fixture, before the ``yield`` we instantiate an empty vector store. In the
|
||||||
|
``finally`` block, we call whatever logic is necessary to bring the vector store
|
||||||
|
to a clean state.
|
||||||
|
|
||||||
|
Example:
|
||||||
|
|
||||||
|
.. code-block:: python
|
||||||
|
|
||||||
|
from typing import Generator
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
from langchain_core.vectorstores import VectorStore
|
||||||
|
from langchain_tests.integration_tests.vectorstores import ReadWriteTestSuite
|
||||||
|
|
||||||
|
from langchain_chroma import Chroma
|
||||||
|
|
||||||
|
|
||||||
|
class TestSync(ReadWriteTestSuite):
|
||||||
|
@pytest.fixture()
|
||||||
|
def vectorstore(self) -> Generator[VectorStore, None, None]: # type: ignore
|
||||||
|
\"\"\"Get an empty vectorstore.\"\"\"
|
||||||
|
store = Chroma(embedding_function=self.get_embeddings())
|
||||||
|
try:
|
||||||
|
yield store
|
||||||
|
finally:
|
||||||
|
store.delete_collection()
|
||||||
|
pass
|
||||||
|
|
||||||
|
""" # noqa: E501
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
@ -38,17 +88,39 @@ class ReadWriteTestSuite(BaseStandardTests):
|
|||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def get_embeddings() -> Embeddings:
|
def get_embeddings() -> Embeddings:
|
||||||
"""A pre-defined embeddings model that should be used for this test."""
|
"""A pre-defined embeddings model that should be used for this test.
|
||||||
|
|
||||||
|
This currently uses ``DeterministicFakeEmbedding`` from ``langchain-core``,
|
||||||
|
which uses numpy to generate random numbers based on a hash of the input text.
|
||||||
|
|
||||||
|
The resulting embeddings are not meaningful, but they are deterministic.
|
||||||
|
"""
|
||||||
return DeterministicFakeEmbedding(
|
return DeterministicFakeEmbedding(
|
||||||
size=EMBEDDING_SIZE,
|
size=EMBEDDING_SIZE,
|
||||||
)
|
)
|
||||||
|
|
||||||
def test_vectorstore_is_empty(self, vectorstore: VectorStore) -> None:
|
def test_vectorstore_is_empty(self, vectorstore: VectorStore) -> None:
|
||||||
"""Test that the vectorstore is empty."""
|
"""Test that the vectorstore is empty.
|
||||||
|
|
||||||
|
.. dropdown:: Troubleshooting
|
||||||
|
|
||||||
|
If this test fails, check that the test class (i.e., sub class of
|
||||||
|
``ReadWriteTestSuite``) initializes an empty vector store in the
|
||||||
|
``vectorestore`` fixture.
|
||||||
|
"""
|
||||||
assert vectorstore.similarity_search("foo", k=1) == []
|
assert vectorstore.similarity_search("foo", k=1) == []
|
||||||
|
|
||||||
def test_add_documents(self, vectorstore: VectorStore) -> None:
|
def test_add_documents(self, vectorstore: VectorStore) -> None:
|
||||||
"""Test adding documents into the vectorstore."""
|
"""Test adding documents into the vectorstore.
|
||||||
|
|
||||||
|
.. dropdown:: Troubleshooting
|
||||||
|
|
||||||
|
If this test fails, check that:
|
||||||
|
|
||||||
|
1. We correctly initialize an empty vector store in the ``vectorestore`` fixture.
|
||||||
|
2. Calling ``.similarity_search`` for the top ``k`` similar documents does not threshold by score.
|
||||||
|
3. We do not mutate the original document object when adding it to the vector store (e.g., by adding an ID).
|
||||||
|
""" # noqa: E501
|
||||||
original_documents = [
|
original_documents = [
|
||||||
Document(page_content="foo", metadata={"id": 1}),
|
Document(page_content="foo", metadata={"id": 1}),
|
||||||
Document(page_content="bar", metadata={"id": 2}),
|
Document(page_content="bar", metadata={"id": 2}),
|
||||||
@ -71,11 +143,24 @@ class ReadWriteTestSuite(BaseStandardTests):
|
|||||||
|
|
||||||
This just verifies that the fixture is set up properly to be empty
|
This just verifies that the fixture is set up properly to be empty
|
||||||
after each test.
|
after each test.
|
||||||
|
|
||||||
|
.. dropdown:: Troubleshooting
|
||||||
|
|
||||||
|
If this test fails, check that the test class (i.e., sub class of
|
||||||
|
``ReadWriteTestSuite``) correctly clears the vector store in the
|
||||||
|
``finally`` block.
|
||||||
"""
|
"""
|
||||||
assert vectorstore.similarity_search("foo", k=1) == []
|
assert vectorstore.similarity_search("foo", k=1) == []
|
||||||
|
|
||||||
def test_deleting_documents(self, vectorstore: VectorStore) -> None:
|
def test_deleting_documents(self, vectorstore: VectorStore) -> None:
|
||||||
"""Test deleting documents from the vectorstore."""
|
"""Test deleting documents from the vectorstore.
|
||||||
|
|
||||||
|
.. dropdown:: Troubleshooting
|
||||||
|
|
||||||
|
If this test fails, check that ``add_documents`` preserves identifiers
|
||||||
|
passed in through ``ids``, and that ``delete`` correctly removes
|
||||||
|
documents.
|
||||||
|
"""
|
||||||
documents = [
|
documents = [
|
||||||
Document(page_content="foo", metadata={"id": 1}),
|
Document(page_content="foo", metadata={"id": 1}),
|
||||||
Document(page_content="bar", metadata={"id": 2}),
|
Document(page_content="bar", metadata={"id": 2}),
|
||||||
@ -87,7 +172,13 @@ class ReadWriteTestSuite(BaseStandardTests):
|
|||||||
assert documents == [Document(page_content="bar", metadata={"id": 2}, id="2")]
|
assert documents == [Document(page_content="bar", metadata={"id": 2}, id="2")]
|
||||||
|
|
||||||
def test_deleting_bulk_documents(self, vectorstore: VectorStore) -> None:
|
def test_deleting_bulk_documents(self, vectorstore: VectorStore) -> None:
|
||||||
"""Test that we can delete several documents at once."""
|
"""Test that we can delete several documents at once.
|
||||||
|
|
||||||
|
.. dropdown:: Troubleshooting
|
||||||
|
|
||||||
|
If this test fails, check that ``delete`` correctly removes multiple
|
||||||
|
documents when given a list of IDs.
|
||||||
|
"""
|
||||||
documents = [
|
documents = [
|
||||||
Document(page_content="foo", metadata={"id": 1}),
|
Document(page_content="foo", metadata={"id": 1}),
|
||||||
Document(page_content="bar", metadata={"id": 2}),
|
Document(page_content="bar", metadata={"id": 2}),
|
||||||
@ -100,14 +191,27 @@ class ReadWriteTestSuite(BaseStandardTests):
|
|||||||
assert documents == [Document(page_content="baz", metadata={"id": 3}, id="3")]
|
assert documents == [Document(page_content="baz", metadata={"id": 3}, id="3")]
|
||||||
|
|
||||||
def test_delete_missing_content(self, vectorstore: VectorStore) -> None:
|
def test_delete_missing_content(self, vectorstore: VectorStore) -> None:
|
||||||
"""Deleting missing content should not raise an exception."""
|
"""Deleting missing content should not raise an exception.
|
||||||
|
|
||||||
|
.. dropdown:: Troubleshooting
|
||||||
|
|
||||||
|
If this test fails, check that ``delete`` does not raise an exception
|
||||||
|
when deleting IDs that do not exist.
|
||||||
|
"""
|
||||||
vectorstore.delete(["1"])
|
vectorstore.delete(["1"])
|
||||||
vectorstore.delete(["1", "2", "3"])
|
vectorstore.delete(["1", "2", "3"])
|
||||||
|
|
||||||
def test_add_documents_with_ids_is_idempotent(
|
def test_add_documents_with_ids_is_idempotent(
|
||||||
self, vectorstore: VectorStore
|
self, vectorstore: VectorStore
|
||||||
) -> None:
|
) -> None:
|
||||||
"""Adding by ID should be idempotent."""
|
"""Adding by ID should be idempotent.
|
||||||
|
|
||||||
|
.. dropdown:: Troubleshooting
|
||||||
|
|
||||||
|
If this test fails, check that adding the same document twice with the
|
||||||
|
same IDs has the same effect as adding it once (i.e., it does not
|
||||||
|
duplicate the documents).
|
||||||
|
"""
|
||||||
documents = [
|
documents = [
|
||||||
Document(page_content="foo", metadata={"id": 1}),
|
Document(page_content="foo", metadata={"id": 1}),
|
||||||
Document(page_content="bar", metadata={"id": 2}),
|
Document(page_content="bar", metadata={"id": 2}),
|
||||||
@ -121,7 +225,14 @@ class ReadWriteTestSuite(BaseStandardTests):
|
|||||||
]
|
]
|
||||||
|
|
||||||
def test_add_documents_by_id_with_mutation(self, vectorstore: VectorStore) -> None:
|
def test_add_documents_by_id_with_mutation(self, vectorstore: VectorStore) -> None:
|
||||||
"""Test that we can overwrite by ID using add_documents."""
|
"""Test that we can overwrite by ID using add_documents.
|
||||||
|
|
||||||
|
.. dropdown:: Troubleshooting
|
||||||
|
|
||||||
|
If this test fails, check that when ``add_documents`` is called with an
|
||||||
|
ID that already exists in the vector store, the content is updated
|
||||||
|
rather than duplicated.
|
||||||
|
"""
|
||||||
documents = [
|
documents = [
|
||||||
Document(page_content="foo", metadata={"id": 1}),
|
Document(page_content="foo", metadata={"id": 1}),
|
||||||
Document(page_content="bar", metadata={"id": 2}),
|
Document(page_content="bar", metadata={"id": 2}),
|
||||||
@ -150,7 +261,26 @@ class ReadWriteTestSuite(BaseStandardTests):
|
|||||||
]
|
]
|
||||||
|
|
||||||
def test_get_by_ids(self, vectorstore: VectorStore) -> None:
|
def test_get_by_ids(self, vectorstore: VectorStore) -> None:
|
||||||
"""Test get by IDs."""
|
"""Test get by IDs.
|
||||||
|
|
||||||
|
This test requires that ``get_by_ids`` be implemented on the vector store.
|
||||||
|
|
||||||
|
.. dropdown:: Troubleshooting
|
||||||
|
|
||||||
|
If this test fails, check that ``get_by_ids`` is implemented and returns
|
||||||
|
documents in the same order as the IDs passed in.
|
||||||
|
|
||||||
|
.. note::
|
||||||
|
``get_by_ids`` was added to the ``VectorStore`` interface in
|
||||||
|
``langchain-core`` version 0.2.11. If difficult to implement, this
|
||||||
|
test can be skipped using a pytest ``xfail`` on the test class:
|
||||||
|
|
||||||
|
.. code-block:: python
|
||||||
|
|
||||||
|
@pytest.mark.xfail(reason=("get_by_ids not implemented."))
|
||||||
|
def test_get_by_ids(self, vectorstore: VectorStore) -> None:
|
||||||
|
super().test_get_by_ids(vectorstore)
|
||||||
|
"""
|
||||||
documents = [
|
documents = [
|
||||||
Document(page_content="foo", metadata={"id": 1}),
|
Document(page_content="foo", metadata={"id": 1}),
|
||||||
Document(page_content="bar", metadata={"id": 2}),
|
Document(page_content="bar", metadata={"id": 2}),
|
||||||
@ -163,13 +293,50 @@ class ReadWriteTestSuite(BaseStandardTests):
|
|||||||
]
|
]
|
||||||
|
|
||||||
def test_get_by_ids_missing(self, vectorstore: VectorStore) -> None:
|
def test_get_by_ids_missing(self, vectorstore: VectorStore) -> None:
|
||||||
"""Test get by IDs with missing IDs."""
|
"""Test get by IDs with missing IDs.
|
||||||
|
|
||||||
|
.. dropdown:: Troubleshooting
|
||||||
|
|
||||||
|
If this test fails, check that ``get_by_ids`` is implemented and does not
|
||||||
|
raise an exception when given IDs that do not exist.
|
||||||
|
|
||||||
|
.. note::
|
||||||
|
``get_by_ids`` was added to the ``VectorStore`` interface in
|
||||||
|
``langchain-core`` version 0.2.11. If difficult to implement, this
|
||||||
|
test can be skipped using a pytest ``xfail`` on the test class:
|
||||||
|
|
||||||
|
.. code-block:: python
|
||||||
|
|
||||||
|
@pytest.mark.xfail(reason=("get_by_ids not implemented."))
|
||||||
|
def test_get_by_ids_missing(self, vectorstore: VectorStore) -> None:
|
||||||
|
super().test_get_by_ids_missing(vectorstore)
|
||||||
|
""" # noqa: E501
|
||||||
# This should not raise an exception
|
# This should not raise an exception
|
||||||
documents = vectorstore.get_by_ids(["1", "2", "3"])
|
documents = vectorstore.get_by_ids(["1", "2", "3"])
|
||||||
assert documents == []
|
assert documents == []
|
||||||
|
|
||||||
def test_add_documents_documents(self, vectorstore: VectorStore) -> None:
|
def test_add_documents_documents(self, vectorstore: VectorStore) -> None:
|
||||||
"""Run add_documents tests."""
|
"""Run add_documents tests.
|
||||||
|
|
||||||
|
.. dropdown:: Troubleshooting
|
||||||
|
|
||||||
|
If this test fails, check that ``get_by_ids`` is implemented and returns
|
||||||
|
documents in the same order as the IDs passed in.
|
||||||
|
|
||||||
|
Check also that ``add_documents`` will correctly generate string IDs if
|
||||||
|
none are provided.
|
||||||
|
|
||||||
|
.. note::
|
||||||
|
``get_by_ids`` was added to the ``VectorStore`` interface in
|
||||||
|
``langchain-core`` version 0.2.11. If difficult to implement, this
|
||||||
|
test can be skipped using a pytest ``xfail`` on the test class:
|
||||||
|
|
||||||
|
.. code-block:: python
|
||||||
|
|
||||||
|
@pytest.mark.xfail(reason=("get_by_ids not implemented."))
|
||||||
|
def test_add_documents_documents(self, vectorstore: VectorStore) -> None:
|
||||||
|
super().test_add_documents_documents(vectorstore)
|
||||||
|
""" # noqa: E501
|
||||||
documents = [
|
documents = [
|
||||||
Document(page_content="foo", metadata={"id": 1}),
|
Document(page_content="foo", metadata={"id": 1}),
|
||||||
Document(page_content="bar", metadata={"id": 2}),
|
Document(page_content="bar", metadata={"id": 2}),
|
||||||
@ -181,7 +348,29 @@ class ReadWriteTestSuite(BaseStandardTests):
|
|||||||
]
|
]
|
||||||
|
|
||||||
def test_add_documents_with_existing_ids(self, vectorstore: VectorStore) -> None:
|
def test_add_documents_with_existing_ids(self, vectorstore: VectorStore) -> None:
|
||||||
"""Test that add_documentsing with existing IDs is idempotent."""
|
"""Test that add_documents with existing IDs is idempotent.
|
||||||
|
|
||||||
|
.. dropdown:: Troubleshooting
|
||||||
|
|
||||||
|
If this test fails, check that ``get_by_ids`` is implemented and returns
|
||||||
|
documents in the same order as the IDs passed in.
|
||||||
|
|
||||||
|
This test also verifies that:
|
||||||
|
|
||||||
|
1. IDs specified in the ``Document.id`` field are assigned when adding documents.
|
||||||
|
2. If some documents include IDs and others don't string IDs are generated for the latter.
|
||||||
|
|
||||||
|
.. note::
|
||||||
|
``get_by_ids`` was added to the ``VectorStore`` interface in
|
||||||
|
``langchain-core`` version 0.2.11. If difficult to implement, this
|
||||||
|
test can be skipped using a pytest ``xfail`` on the test class:
|
||||||
|
|
||||||
|
.. code-block:: python
|
||||||
|
|
||||||
|
@pytest.mark.xfail(reason=("get_by_ids not implemented."))
|
||||||
|
def test_add_documents_with_existing_ids(self, vectorstore: VectorStore) -> None:
|
||||||
|
super().test_add_documents_with_existing_ids(vectorstore)
|
||||||
|
""" # noqa: E501
|
||||||
documents = [
|
documents = [
|
||||||
Document(id="foo", page_content="foo", metadata={"id": 1}),
|
Document(id="foo", page_content="foo", metadata={"id": 1}),
|
||||||
Document(page_content="bar", metadata={"id": 2}),
|
Document(page_content="bar", metadata={"id": 2}),
|
||||||
@ -195,18 +384,68 @@ class ReadWriteTestSuite(BaseStandardTests):
|
|||||||
|
|
||||||
|
|
||||||
class AsyncReadWriteTestSuite(BaseStandardTests):
|
class AsyncReadWriteTestSuite(BaseStandardTests):
|
||||||
"""Test suite for checking the **async** read-write API of a vectorstore.
|
"""Test suite for checking the async read-write API of a vector store.
|
||||||
|
|
||||||
This test suite verifies the basic read-write API of a vectorstore.
|
|
||||||
|
|
||||||
The test suite is designed for asynchronous vectorstores.
|
|
||||||
|
|
||||||
Implementers should subclass this test suite and provide a fixture
|
Implementers should subclass this test suite and provide a fixture
|
||||||
that returns an empty vectorstore for each test.
|
that returns an empty vector store for each test.
|
||||||
|
|
||||||
The fixture should use the `get_embeddings` method to get a pre-defined
|
The fixture should use the ``get_embeddings`` method to get a pre-defined
|
||||||
embeddings model that should be used for this test suite.
|
embeddings model that should be used for this test suite.
|
||||||
"""
|
|
||||||
|
Here is a template:
|
||||||
|
|
||||||
|
.. code-block:: python
|
||||||
|
|
||||||
|
from typing import AsyncGenerator
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
from langchain_core.vectorstores import VectorStore
|
||||||
|
from langchain_parrot_link.vectorstores import ParrotVectorStore
|
||||||
|
from langchain_tests.integration_tests.vectorstores import AsyncReadWriteTestSuite
|
||||||
|
|
||||||
|
|
||||||
|
class TestAsync(AsyncReadWriteTestSuite):
|
||||||
|
@pytest.fixture()
|
||||||
|
def vectorstore(self) -> AsyncGenerator[VectorStore, None]: # type: ignore
|
||||||
|
\"\"\"Get an empty vectorstore.\"\"\"
|
||||||
|
store = ParrotVectorStore(self.get_embeddings())
|
||||||
|
# note: store should be EMPTY at this point
|
||||||
|
# if you need to delete data, you may do so here
|
||||||
|
try:
|
||||||
|
yield store
|
||||||
|
finally:
|
||||||
|
# cleanup operations, or deleting data
|
||||||
|
pass
|
||||||
|
|
||||||
|
In the fixture, before the ``yield`` we instantiate an empty vector store. In the
|
||||||
|
``finally`` block, we call whatever logic is necessary to bring the vector store
|
||||||
|
to a clean state.
|
||||||
|
|
||||||
|
Example:
|
||||||
|
|
||||||
|
.. code-block:: python
|
||||||
|
|
||||||
|
from typing import AsyncGenerator, Generator
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
from langchain_core.vectorstores import VectorStore
|
||||||
|
from langchain_tests.integration_tests.vectorstores import AsyncReadWriteTestSuite
|
||||||
|
|
||||||
|
from langchain_chroma import Chroma
|
||||||
|
|
||||||
|
|
||||||
|
class TestAsync(AsyncReadWriteTestSuite):
|
||||||
|
@pytest.fixture()
|
||||||
|
async def vectorstore(self) -> AsyncGenerator[VectorStore, None]: # type: ignore
|
||||||
|
\"\"\"Get an empty vectorstore for unit tests.\"\"\"
|
||||||
|
store = Chroma(embedding_function=self.get_embeddings())
|
||||||
|
try:
|
||||||
|
yield store
|
||||||
|
finally:
|
||||||
|
store.delete_collection()
|
||||||
|
pass
|
||||||
|
|
||||||
|
""" # noqa: E501
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
@ -218,17 +457,39 @@ class AsyncReadWriteTestSuite(BaseStandardTests):
|
|||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def get_embeddings() -> Embeddings:
|
def get_embeddings() -> Embeddings:
|
||||||
"""A pre-defined embeddings model that should be used for this test."""
|
"""A pre-defined embeddings model that should be used for this test.
|
||||||
|
|
||||||
|
This currently uses ``DeterministicFakeEmbedding`` from ``langchain-core``,
|
||||||
|
which uses numpy to generate random numbers based on a hash of the input text.
|
||||||
|
|
||||||
|
The resulting embeddings are not meaningful, but they are deterministic.
|
||||||
|
"""
|
||||||
return DeterministicFakeEmbedding(
|
return DeterministicFakeEmbedding(
|
||||||
size=EMBEDDING_SIZE,
|
size=EMBEDDING_SIZE,
|
||||||
)
|
)
|
||||||
|
|
||||||
async def test_vectorstore_is_empty(self, vectorstore: VectorStore) -> None:
|
async def test_vectorstore_is_empty(self, vectorstore: VectorStore) -> None:
|
||||||
"""Test that the vectorstore is empty."""
|
"""Test that the vectorstore is empty.
|
||||||
|
|
||||||
|
.. dropdown:: Troubleshooting
|
||||||
|
|
||||||
|
If this test fails, check that the test class (i.e., sub class of
|
||||||
|
``AsyncReadWriteTestSuite``) initializes an empty vector store in the
|
||||||
|
``vectorestore`` fixture.
|
||||||
|
"""
|
||||||
assert await vectorstore.asimilarity_search("foo", k=1) == []
|
assert await vectorstore.asimilarity_search("foo", k=1) == []
|
||||||
|
|
||||||
async def test_add_documents(self, vectorstore: VectorStore) -> None:
|
async def test_add_documents(self, vectorstore: VectorStore) -> None:
|
||||||
"""Test adding documents into the vectorstore."""
|
"""Test adding documents into the vectorstore.
|
||||||
|
|
||||||
|
.. dropdown:: Troubleshooting
|
||||||
|
|
||||||
|
If this test fails, check that:
|
||||||
|
|
||||||
|
1. We correctly initialize an empty vector store in the ``vectorestore`` fixture.
|
||||||
|
2. Calling ``.asimilarity_search`` for the top ``k`` similar documents does not threshold by score.
|
||||||
|
3. We do not mutate the original document object when adding it to the vector store (e.g., by adding an ID).
|
||||||
|
""" # noqa: E501
|
||||||
original_documents = [
|
original_documents = [
|
||||||
Document(page_content="foo", metadata={"id": 1}),
|
Document(page_content="foo", metadata={"id": 1}),
|
||||||
Document(page_content="bar", metadata={"id": 2}),
|
Document(page_content="bar", metadata={"id": 2}),
|
||||||
@ -252,11 +513,24 @@ class AsyncReadWriteTestSuite(BaseStandardTests):
|
|||||||
|
|
||||||
This just verifies that the fixture is set up properly to be empty
|
This just verifies that the fixture is set up properly to be empty
|
||||||
after each test.
|
after each test.
|
||||||
|
|
||||||
|
.. dropdown:: Troubleshooting
|
||||||
|
|
||||||
|
If this test fails, check that the test class (i.e., sub class of
|
||||||
|
``AsyncReadWriteTestSuite``) correctly clears the vector store in the
|
||||||
|
``finally`` block.
|
||||||
"""
|
"""
|
||||||
assert await vectorstore.asimilarity_search("foo", k=1) == []
|
assert await vectorstore.asimilarity_search("foo", k=1) == []
|
||||||
|
|
||||||
async def test_deleting_documents(self, vectorstore: VectorStore) -> None:
|
async def test_deleting_documents(self, vectorstore: VectorStore) -> None:
|
||||||
"""Test deleting documents from the vectorstore."""
|
"""Test deleting documents from the vectorstore.
|
||||||
|
|
||||||
|
.. dropdown:: Troubleshooting
|
||||||
|
|
||||||
|
If this test fails, check that ``aadd_documents`` preserves identifiers
|
||||||
|
passed in through ``ids``, and that ``delete`` correctly removes
|
||||||
|
documents.
|
||||||
|
"""
|
||||||
documents = [
|
documents = [
|
||||||
Document(page_content="foo", metadata={"id": 1}),
|
Document(page_content="foo", metadata={"id": 1}),
|
||||||
Document(page_content="bar", metadata={"id": 2}),
|
Document(page_content="bar", metadata={"id": 2}),
|
||||||
@ -268,7 +542,13 @@ class AsyncReadWriteTestSuite(BaseStandardTests):
|
|||||||
assert documents == [Document(page_content="bar", metadata={"id": 2}, id="2")]
|
assert documents == [Document(page_content="bar", metadata={"id": 2}, id="2")]
|
||||||
|
|
||||||
async def test_deleting_bulk_documents(self, vectorstore: VectorStore) -> None:
|
async def test_deleting_bulk_documents(self, vectorstore: VectorStore) -> None:
|
||||||
"""Test that we can delete several documents at once."""
|
"""Test that we can delete several documents at once.
|
||||||
|
|
||||||
|
.. dropdown:: Troubleshooting
|
||||||
|
|
||||||
|
If this test fails, check that ``adelete`` correctly removes multiple
|
||||||
|
documents when given a list of IDs.
|
||||||
|
"""
|
||||||
documents = [
|
documents = [
|
||||||
Document(page_content="foo", metadata={"id": 1}),
|
Document(page_content="foo", metadata={"id": 1}),
|
||||||
Document(page_content="bar", metadata={"id": 2}),
|
Document(page_content="bar", metadata={"id": 2}),
|
||||||
@ -281,14 +561,27 @@ class AsyncReadWriteTestSuite(BaseStandardTests):
|
|||||||
assert documents == [Document(page_content="baz", metadata={"id": 3}, id="3")]
|
assert documents == [Document(page_content="baz", metadata={"id": 3}, id="3")]
|
||||||
|
|
||||||
async def test_delete_missing_content(self, vectorstore: VectorStore) -> None:
|
async def test_delete_missing_content(self, vectorstore: VectorStore) -> None:
|
||||||
"""Deleting missing content should not raise an exception."""
|
"""Deleting missing content should not raise an exception.
|
||||||
|
|
||||||
|
.. dropdown:: Troubleshooting
|
||||||
|
|
||||||
|
If this test fails, check that ``adelete`` does not raise an exception
|
||||||
|
when deleting IDs that do not exist.
|
||||||
|
"""
|
||||||
await vectorstore.adelete(["1"])
|
await vectorstore.adelete(["1"])
|
||||||
await vectorstore.adelete(["1", "2", "3"])
|
await vectorstore.adelete(["1", "2", "3"])
|
||||||
|
|
||||||
async def test_add_documents_with_ids_is_idempotent(
|
async def test_add_documents_with_ids_is_idempotent(
|
||||||
self, vectorstore: VectorStore
|
self, vectorstore: VectorStore
|
||||||
) -> None:
|
) -> None:
|
||||||
"""Adding by ID should be idempotent."""
|
"""Adding by ID should be idempotent.
|
||||||
|
|
||||||
|
.. dropdown:: Troubleshooting
|
||||||
|
|
||||||
|
If this test fails, check that adding the same document twice with the
|
||||||
|
same IDs has the same effect as adding it once (i.e., it does not
|
||||||
|
duplicate the documents).
|
||||||
|
"""
|
||||||
documents = [
|
documents = [
|
||||||
Document(page_content="foo", metadata={"id": 1}),
|
Document(page_content="foo", metadata={"id": 1}),
|
||||||
Document(page_content="bar", metadata={"id": 2}),
|
Document(page_content="bar", metadata={"id": 2}),
|
||||||
@ -304,7 +597,14 @@ class AsyncReadWriteTestSuite(BaseStandardTests):
|
|||||||
async def test_add_documents_by_id_with_mutation(
|
async def test_add_documents_by_id_with_mutation(
|
||||||
self, vectorstore: VectorStore
|
self, vectorstore: VectorStore
|
||||||
) -> None:
|
) -> None:
|
||||||
"""Test that we can overwrite by ID using add_documents."""
|
"""Test that we can overwrite by ID using add_documents.
|
||||||
|
|
||||||
|
.. dropdown:: Troubleshooting
|
||||||
|
|
||||||
|
If this test fails, check that when ``aadd_documents`` is called with an
|
||||||
|
ID that already exists in the vector store, the content is updated
|
||||||
|
rather than duplicated.
|
||||||
|
"""
|
||||||
documents = [
|
documents = [
|
||||||
Document(page_content="foo", metadata={"id": 1}),
|
Document(page_content="foo", metadata={"id": 1}),
|
||||||
Document(page_content="bar", metadata={"id": 2}),
|
Document(page_content="bar", metadata={"id": 2}),
|
||||||
@ -333,7 +633,26 @@ class AsyncReadWriteTestSuite(BaseStandardTests):
|
|||||||
]
|
]
|
||||||
|
|
||||||
async def test_get_by_ids(self, vectorstore: VectorStore) -> None:
|
async def test_get_by_ids(self, vectorstore: VectorStore) -> None:
|
||||||
"""Test get by IDs."""
|
"""Test get by IDs.
|
||||||
|
|
||||||
|
This test requires that ``get_by_ids`` be implemented on the vector store.
|
||||||
|
|
||||||
|
.. dropdown:: Troubleshooting
|
||||||
|
|
||||||
|
If this test fails, check that ``get_by_ids`` is implemented and returns
|
||||||
|
documents in the same order as the IDs passed in.
|
||||||
|
|
||||||
|
.. note::
|
||||||
|
``get_by_ids`` was added to the ``VectorStore`` interface in
|
||||||
|
``langchain-core`` version 0.2.11. If difficult to implement, this
|
||||||
|
test can be skipped using a pytest ``xfail`` on the test class:
|
||||||
|
|
||||||
|
.. code-block:: python
|
||||||
|
|
||||||
|
@pytest.mark.xfail(reason=("get_by_ids not implemented."))
|
||||||
|
async def test_get_by_ids(self, vectorstore: VectorStore) -> None:
|
||||||
|
await super().test_get_by_ids(vectorstore)
|
||||||
|
"""
|
||||||
documents = [
|
documents = [
|
||||||
Document(page_content="foo", metadata={"id": 1}),
|
Document(page_content="foo", metadata={"id": 1}),
|
||||||
Document(page_content="bar", metadata={"id": 2}),
|
Document(page_content="bar", metadata={"id": 2}),
|
||||||
@ -346,12 +665,49 @@ class AsyncReadWriteTestSuite(BaseStandardTests):
|
|||||||
]
|
]
|
||||||
|
|
||||||
async def test_get_by_ids_missing(self, vectorstore: VectorStore) -> None:
|
async def test_get_by_ids_missing(self, vectorstore: VectorStore) -> None:
|
||||||
"""Test get by IDs with missing IDs."""
|
"""Test get by IDs with missing IDs.
|
||||||
|
|
||||||
|
.. dropdown:: Troubleshooting
|
||||||
|
|
||||||
|
If this test fails, check that ``get_by_ids`` is implemented and does not
|
||||||
|
raise an exception when given IDs that do not exist.
|
||||||
|
|
||||||
|
.. note::
|
||||||
|
``get_by_ids`` was added to the ``VectorStore`` interface in
|
||||||
|
``langchain-core`` version 0.2.11. If difficult to implement, this
|
||||||
|
test can be skipped using a pytest ``xfail`` on the test class:
|
||||||
|
|
||||||
|
.. code-block:: python
|
||||||
|
|
||||||
|
@pytest.mark.xfail(reason=("get_by_ids not implemented."))
|
||||||
|
async def test_get_by_ids_missing(self, vectorstore: VectorStore) -> None:
|
||||||
|
await super().test_get_by_ids_missing(vectorstore)
|
||||||
|
""" # noqa: E501
|
||||||
# This should not raise an exception
|
# This should not raise an exception
|
||||||
assert await vectorstore.aget_by_ids(["1", "2", "3"]) == []
|
assert await vectorstore.aget_by_ids(["1", "2", "3"]) == []
|
||||||
|
|
||||||
async def test_add_documents_documents(self, vectorstore: VectorStore) -> None:
|
async def test_add_documents_documents(self, vectorstore: VectorStore) -> None:
|
||||||
"""Run add_documents tests."""
|
"""Run add_documents tests.
|
||||||
|
|
||||||
|
.. dropdown:: Troubleshooting
|
||||||
|
|
||||||
|
If this test fails, check that ``get_by_ids`` is implemented and returns
|
||||||
|
documents in the same order as the IDs passed in.
|
||||||
|
|
||||||
|
Check also that ``aadd_documents`` will correctly generate string IDs if
|
||||||
|
none are provided.
|
||||||
|
|
||||||
|
.. note::
|
||||||
|
``get_by_ids`` was added to the ``VectorStore`` interface in
|
||||||
|
``langchain-core`` version 0.2.11. If difficult to implement, this
|
||||||
|
test can be skipped using a pytest ``xfail`` on the test class:
|
||||||
|
|
||||||
|
.. code-block:: python
|
||||||
|
|
||||||
|
@pytest.mark.xfail(reason=("get_by_ids not implemented."))
|
||||||
|
async def test_add_documents_documents(self, vectorstore: VectorStore) -> None:
|
||||||
|
await super().test_add_documents_documents(vectorstore)
|
||||||
|
""" # noqa: E501
|
||||||
documents = [
|
documents = [
|
||||||
Document(page_content="foo", metadata={"id": 1}),
|
Document(page_content="foo", metadata={"id": 1}),
|
||||||
Document(page_content="bar", metadata={"id": 2}),
|
Document(page_content="bar", metadata={"id": 2}),
|
||||||
@ -365,7 +721,29 @@ class AsyncReadWriteTestSuite(BaseStandardTests):
|
|||||||
async def test_add_documents_with_existing_ids(
|
async def test_add_documents_with_existing_ids(
|
||||||
self, vectorstore: VectorStore
|
self, vectorstore: VectorStore
|
||||||
) -> None:
|
) -> None:
|
||||||
"""Test that add_documentsing with existing IDs is idempotent."""
|
"""Test that add_documents with existing IDs is idempotent.
|
||||||
|
|
||||||
|
.. dropdown:: Troubleshooting
|
||||||
|
|
||||||
|
If this test fails, check that ``get_by_ids`` is implemented and returns
|
||||||
|
documents in the same order as the IDs passed in.
|
||||||
|
|
||||||
|
This test also verifies that:
|
||||||
|
|
||||||
|
1. IDs specified in the ``Document.id`` field are assigned when adding documents.
|
||||||
|
2. If some documents include IDs and others don't string IDs are generated for the latter.
|
||||||
|
|
||||||
|
.. note::
|
||||||
|
``get_by_ids`` was added to the ``VectorStore`` interface in
|
||||||
|
``langchain-core`` version 0.2.11. If difficult to implement, this
|
||||||
|
test can be skipped using a pytest ``xfail`` on the test class:
|
||||||
|
|
||||||
|
.. code-block:: python
|
||||||
|
|
||||||
|
@pytest.mark.xfail(reason=("get_by_ids not implemented."))
|
||||||
|
async def test_add_documents_with_existing_ids(self, vectorstore: VectorStore) -> None:
|
||||||
|
await super().test_add_documents_with_existing_ids(vectorstore)
|
||||||
|
""" # noqa: E501
|
||||||
documents = [
|
documents = [
|
||||||
Document(id="foo", page_content="foo", metadata={"id": 1}),
|
Document(id="foo", page_content="foo", metadata={"id": 1}),
|
||||||
Document(page_content="bar", metadata={"id": 2}),
|
Document(page_content="bar", metadata={"id": 2}),
|
||||||
|
Loading…
Reference in New Issue
Block a user