multiple: combine sync/async vector store standard test suites (#28580)

Breaking change in `langchain-tests`.
This commit is contained in:
ccurme 2024-12-06 14:55:06 -05:00 committed by GitHub
parent dda9f90047
commit 2c6bc74cb1
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
9 changed files with 56 additions and 229 deletions

View File

@ -505,36 +505,21 @@
"source": [
"# title=\"tests/integration_tests/test_vectorstores_sync.py\"\n",
"\n",
"from typing import AsyncGenerator, Generator\n",
"from typing import Generator\n",
"\n",
"import pytest\n",
"from langchain_core.vectorstores import VectorStore\n",
"from langchain_parrot_link.vectorstores import ParrotVectorStore\n",
"from langchain_standard_tests.integration_tests.vectorstores import (\n",
" AsyncReadWriteTestSuite,\n",
" ReadWriteTestSuite,\n",
" VectorStoreIntegrationTests,\n",
")\n",
"\n",
"\n",
"class TestSync(ReadWriteTestSuite):\n",
"class TestParrotVectorStore(VectorStoreIntegrationTests):\n",
" @pytest.fixture()\n",
" def vectorstore(self) -> Generator[VectorStore, None, None]: # type: ignore\n",
" \"\"\"Get an empty vectorstore for unit tests.\"\"\"\n",
" store = ParrotVectorStore()\n",
" # note: store should be EMPTY at this point\n",
" # if you need to delete data, you may do so here\n",
" try:\n",
" yield store\n",
" finally:\n",
" # cleanup operations, or deleting data\n",
" pass\n",
"\n",
"\n",
"class TestAsync(AsyncReadWriteTestSuite):\n",
" @pytest.fixture()\n",
" async def vectorstore(self) -> AsyncGenerator[VectorStore, None]: # type: ignore\n",
" \"\"\"Get an empty vectorstore for unit tests.\"\"\"\n",
" store = ParrotVectorStore()\n",
" store = ParrotVectorStore(self.get_embeddings())\n",
" # note: store should be EMPTY at this point\n",
" # if you need to delete data, you may do so here\n",
" try:\n",
@ -548,11 +533,10 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"There are separate suites for testing synchronous and asynchronous methods.\n",
"Configuring the tests consists of implementing pytest fixtures for setting up an\n",
"empty vector store and tearing down the vector store after the test run ends.\n",
"\n",
"For example, below is the `ReadWriteTestSuite` for the [Chroma](https://python.langchain.com/docs/integrations/vectorstores/chroma/)\n",
"For example, below is the `VectorStoreIntegrationTests` class for the [Chroma](https://python.langchain.com/docs/integrations/vectorstores/chroma/)\n",
"integration:\n",
"\n",
"```python\n",
@ -560,26 +544,27 @@
"\n",
"import pytest\n",
"from langchain_core.vectorstores import VectorStore\n",
"from langchain_tests.integration_tests.vectorstores import ReadWriteTestSuite\n",
"from langchain_tests.integration_tests.vectorstores import VectorStoreIntegrationTests\n",
"\n",
"from langchain_chroma import Chroma\n",
"\n",
"\n",
"class TestSync(ReadWriteTestSuite):\n",
"class TestChromaStandard(VectorStoreIntegrationTests):\n",
" @pytest.fixture()\n",
" def vectorstore(self) -> Generator[VectorStore, None, None]: # type: ignore\n",
" \"\"\"Get an empty vectorstore.\"\"\"\n",
" \"\"\"Get an empty vectorstore for unit tests.\"\"\"\n",
" store = Chroma(embedding_function=self.get_embeddings())\n",
" try:\n",
" yield store\n",
" finally:\n",
" store.delete_collection()\n",
" pass\n",
"\n",
"```\n",
"\n",
"Note that before the initial `yield`, we instantiate the vector store with an\n",
"[embeddings](/docs/concepts/embedding_models/) object. This is a pre-defined\n",
"[\"fake\" embeddings model](https://python.langchain.com/api_reference/standard_tests/integration_tests/langchain_tests.integration_tests.vectorstores.ReadWriteTestSuite.html#langchain_tests.integration_tests.vectorstores.ReadWriteTestSuite.get_embeddings)\n",
"[\"fake\" embeddings model](https://python.langchain.com/api_reference/standard_tests/integration_tests/langchain_tests.integration_tests.vectorstores.VectorStoreIntegrationTests.html#langchain_tests.integration_tests.vectorstores.VectorStoreIntegrationTests.get_embeddings)\n",
"that will generate short, arbitrary vectors for documents. You can use a different\n",
"embeddings object if desired.\n",
"\n",
@ -589,10 +574,7 @@
"\n",
":::note\n",
"\n",
"Details on what tests are run, how each test can be skipped, and troubleshooting tips for each test can be found in the API references. See details:\n",
"\n",
"- [Sync tests API reference](https://python.langchain.com/api_reference/standard_tests/integration_tests/langchain_tests.integration_tests.vectorstores.ReadWriteTestSuite.html)\n",
"- [Async tests API reference](https://python.langchain.com/api_reference/standard_tests/integration_tests/langchain_tests.integration_tests.vectorstores.AsyncReadWriteTestSuite.html)\n",
"Details on what tests are run and troubleshooting tips for each test can be found in the [API reference](https://python.langchain.com/api_reference/standard_tests/integration_tests/langchain_tests.integration_tests.vectorstores.VectorStoreIntegrationTests.html).\n",
"\n",
":::"
]

View File

@ -1,15 +1,12 @@
from typing import AsyncGenerator, Generator
from typing import Generator
import pytest
from __module_name__.vectorstores import __ModuleName__VectorStore
from langchain_core.vectorstores import VectorStore
from langchain_tests.integration_tests import (
AsyncReadWriteTestSuite,
ReadWriteTestSuite,
)
from langchain_tests.integration_tests import VectorStoreIntegrationTests
class Test__ModuleName__VectorStoreSync(ReadWriteTestSuite):
class Test__ModuleName__VectorStore(VectorStoreIntegrationTests):
@pytest.fixture()
def vectorstore(self) -> Generator[VectorStore, None, None]: # type: ignore
"""Get an empty vectorstore for unit tests."""
@ -21,17 +18,3 @@ class Test__ModuleName__VectorStoreSync(ReadWriteTestSuite):
finally:
# cleanup operations, or deleting data
pass
class Test__ModuleName__VectorStoreAsync(AsyncReadWriteTestSuite):
@pytest.fixture()
async def vectorstore(self) -> AsyncGenerator[VectorStore, None]: # type: ignore
"""Get an empty vectorstore for unit tests."""
store = __ModuleName__VectorStore(self.get_embeddings())
# note: store should be EMPTY at this point
# if you need to delete data, you may do so here
try:
yield store
finally:
# cleanup operations, or deleting data
pass

View File

@ -3,27 +3,15 @@
import uuid
import pytest
from langchain_tests.integration_tests.vectorstores import (
AsyncReadWriteTestSuite,
ReadWriteTestSuite,
)
from langchain_tests.integration_tests.vectorstores import VectorStoreIntegrationTests
from langchain_community.vectorstores import ApertureDB
class TestApertureDBReadWriteTestSuite(ReadWriteTestSuite):
class TestApertureStandard(VectorStoreIntegrationTests):
@pytest.fixture
def vectorstore(self) -> ApertureDB:
descriptor_set = uuid.uuid4().hex # Fresh descriptor set for each test
return ApertureDB(
embeddings=self.get_embeddings(), descriptor_set=descriptor_set
)
class TestAsyncApertureDBReadWriteTestSuite(AsyncReadWriteTestSuite):
@pytest.fixture
async def vectorstore(self) -> ApertureDB:
descriptor_set = uuid.uuid4().hex # Fresh descriptor set for each test
return ApertureDB(
embeddings=self.get_embeddings(), descriptor_set=descriptor_set
)

View File

@ -3,10 +3,7 @@ from typing import Any
import pytest
from langchain_core.documents import Document
from langchain_tests.integration_tests.vectorstores import (
AsyncReadWriteTestSuite,
ReadWriteTestSuite,
)
from langchain_tests.integration_tests.vectorstores import VectorStoreIntegrationTests
from langchain_community.vectorstores.inmemory import InMemoryVectorStore
from tests.integration_tests.vectorstores.fake_embeddings import (
@ -26,18 +23,12 @@ def _AnyDocument(**kwargs: Any) -> Document:
return doc
class TestInMemoryReadWriteTestSuite(ReadWriteTestSuite):
class TestInMemoryStandard(VectorStoreIntegrationTests):
@pytest.fixture
def vectorstore(self) -> InMemoryVectorStore:
return InMemoryVectorStore(embedding=self.get_embeddings())
class TestAsyncInMemoryReadWriteTestSuite(AsyncReadWriteTestSuite):
@pytest.fixture
async def vectorstore(self) -> InMemoryVectorStore:
return InMemoryVectorStore(embedding=self.get_embeddings())
async def test_inmemory() -> None:
"""Test end to end construction and search."""
store = await InMemoryVectorStore.afrom_texts(

View File

@ -2,10 +2,7 @@ from pathlib import Path
from unittest.mock import AsyncMock, Mock
import pytest
from langchain_tests.integration_tests.vectorstores import (
AsyncReadWriteTestSuite,
ReadWriteTestSuite,
)
from langchain_tests.integration_tests.vectorstores import VectorStoreIntegrationTests
from langchain_core.documents import Document
from langchain_core.embeddings.fake import DeterministicFakeEmbedding
@ -13,18 +10,12 @@ from langchain_core.vectorstores import InMemoryVectorStore
from tests.unit_tests.stubs import _any_id_document
class TestInMemoryReadWriteTestSuite(ReadWriteTestSuite):
class TestInMemoryStandard(VectorStoreIntegrationTests):
@pytest.fixture
def vectorstore(self) -> InMemoryVectorStore:
return InMemoryVectorStore(embedding=self.get_embeddings())
class TestAsyncInMemoryReadWriteTestSuite(AsyncReadWriteTestSuite):
@pytest.fixture
async def vectorstore(self) -> InMemoryVectorStore:
return InMemoryVectorStore(embedding=self.get_embeddings())
async def test_inmemory_similarity_search() -> None:
"""Test end to end similarity search."""
store = await InMemoryVectorStore.afrom_texts(

View File

@ -1,16 +1,13 @@
from typing import AsyncGenerator, Generator
from typing import Generator
import pytest
from langchain_core.vectorstores import VectorStore
from langchain_tests.integration_tests.vectorstores import (
AsyncReadWriteTestSuite,
ReadWriteTestSuite,
)
from langchain_tests.integration_tests.vectorstores import VectorStoreIntegrationTests
from langchain_chroma import Chroma
class TestSync(ReadWriteTestSuite):
class TestChromaStandard(VectorStoreIntegrationTests):
@pytest.fixture()
def vectorstore(self) -> Generator[VectorStore, None, None]: # type: ignore
"""Get an empty vectorstore for unit tests."""
@ -20,15 +17,3 @@ class TestSync(ReadWriteTestSuite):
finally:
store.delete_collection()
pass
class TestAsync(AsyncReadWriteTestSuite):
@pytest.fixture()
async def vectorstore(self) -> AsyncGenerator[VectorStore, None]: # type: ignore
"""Get an empty vectorstore for unit tests."""
store = Chroma(embedding_function=self.get_embeddings())
try:
yield store
finally:
store.delete_collection()
pass

View File

@ -23,7 +23,7 @@ from .chat_models import ChatModelIntegrationTests
from .embeddings import EmbeddingsIntegrationTests
from .retrievers import RetrieversIntegrationTests
from .tools import ToolsIntegrationTests
from .vectorstores import AsyncReadWriteTestSuite, ReadWriteTestSuite
from .vectorstores import VectorStoreIntegrationTests
__all__ = [
"ChatModelIntegrationTests",
@ -33,7 +33,6 @@ __all__ = [
"BaseStoreSyncTests",
"AsyncCacheTestSuite",
"SyncCacheTestSuite",
"AsyncReadWriteTestSuite",
"ReadWriteTestSuite",
"VectorStoreIntegrationTests",
"RetrieversIntegrationTests",
]

View File

@ -14,8 +14,8 @@ from langchain_tests.base import BaseStandardTests
EMBEDDING_SIZE = 6
class ReadWriteTestSuite(BaseStandardTests):
"""Test suite for checking the synchronous read-write API of a vector store.
class VectorStoreIntegrationTests(BaseStandardTests):
"""Test suite for checking the read-write API of a vector store.
Implementers should subclass this test suite and provide a fixture
that returns an empty vector store for each test.
@ -32,10 +32,10 @@ class ReadWriteTestSuite(BaseStandardTests):
import pytest
from langchain_core.vectorstores import VectorStore
from langchain_parrot_link.vectorstores import ParrotVectorStore
from langchain_tests.integration_tests.vectorstores import ReadWriteTestSuite
from langchain_tests.integration_tests.vectorstores import VectorStoreIntegrationTests
class TestSync(ReadWriteTestSuite):
class TestParrotVectorStore(VectorStoreIntegrationTests):
@pytest.fixture()
def vectorstore(self) -> Generator[VectorStore, None, None]: # type: ignore
\"\"\"Get an empty vectorstore.\"\"\"
@ -60,15 +60,15 @@ class ReadWriteTestSuite(BaseStandardTests):
import pytest
from langchain_core.vectorstores import VectorStore
from langchain_tests.integration_tests.vectorstores import ReadWriteTestSuite
from langchain_tests.integration_tests.vectorstores import VectorStoreIntegrationTests
from langchain_chroma import Chroma
class TestSync(ReadWriteTestSuite):
class TestChromaStandard(VectorStoreIntegrationTests):
@pytest.fixture()
def vectorstore(self) -> Generator[VectorStore, None, None]: # type: ignore
\"\"\"Get an empty vectorstore.\"\"\"
\"\"\"Get an empty vectorstore for unit tests.\"\"\"
store = Chroma(embedding_function=self.get_embeddings())
try:
yield store
@ -107,7 +107,7 @@ class ReadWriteTestSuite(BaseStandardTests):
.. dropdown:: Troubleshooting
If this test fails, check that the test class (i.e., sub class of
``ReadWriteTestSuite``) initializes an empty vector store in the
``VectorStoreIntegrationTests``) initializes an empty vector store in the
``vectorestore`` fixture.
"""
assert vectorstore.similarity_search("foo", k=1) == []
@ -149,7 +149,7 @@ class ReadWriteTestSuite(BaseStandardTests):
.. dropdown:: Troubleshooting
If this test fails, check that the test class (i.e., sub class of
``ReadWriteTestSuite``) correctly clears the vector store in the
``VectorStoreIntegrationTests``) correctly clears the vector store in the
``finally`` block.
"""
assert vectorstore.similarity_search("foo", k=1) == []
@ -384,106 +384,18 @@ class ReadWriteTestSuite(BaseStandardTests):
Document(page_content="bar", metadata={"id": 2}, id=ids[1]),
]
class AsyncReadWriteTestSuite(BaseStandardTests):
"""Test suite for checking the async read-write API of a vector store.
Implementers should subclass this test suite and provide a fixture
that returns an empty vector store for each test.
The fixture should use the ``get_embeddings`` method to get a pre-defined
embeddings model that should be used for this test suite.
Here is a template:
.. code-block:: python
from typing import AsyncGenerator
import pytest
from langchain_core.vectorstores import VectorStore
from langchain_parrot_link.vectorstores import ParrotVectorStore
from langchain_tests.integration_tests.vectorstores import AsyncReadWriteTestSuite
class TestAsync(AsyncReadWriteTestSuite):
@pytest.fixture()
def vectorstore(self) -> AsyncGenerator[VectorStore, None]: # type: ignore
\"\"\"Get an empty vectorstore.\"\"\"
store = ParrotVectorStore(self.get_embeddings())
# note: store should be EMPTY at this point
# if you need to delete data, you may do so here
try:
yield store
finally:
# cleanup operations, or deleting data
pass
In the fixture, before the ``yield`` we instantiate an empty vector store. In the
``finally`` block, we call whatever logic is necessary to bring the vector store
to a clean state.
Example:
.. code-block:: python
from typing import AsyncGenerator, Generator
import pytest
from langchain_core.vectorstores import VectorStore
from langchain_tests.integration_tests.vectorstores import AsyncReadWriteTestSuite
from langchain_chroma import Chroma
class TestAsync(AsyncReadWriteTestSuite):
@pytest.fixture()
async def vectorstore(self) -> AsyncGenerator[VectorStore, None]: # type: ignore
\"\"\"Get an empty vectorstore for unit tests.\"\"\"
store = Chroma(embedding_function=self.get_embeddings())
try:
yield store
finally:
store.delete_collection()
pass
.. note::
API references for individual test methods include troubleshooting tips.
""" # noqa: E501
@abstractmethod
@pytest.fixture
async def vectorstore(self) -> VectorStore:
"""Get the vectorstore class to test.
The returned vectorstore should be EMPTY.
"""
@staticmethod
def get_embeddings() -> Embeddings:
"""A pre-defined embeddings model that should be used for this test.
This currently uses ``DeterministicFakeEmbedding`` from ``langchain-core``,
which uses numpy to generate random numbers based on a hash of the input text.
The resulting embeddings are not meaningful, but they are deterministic.
"""
return DeterministicFakeEmbedding(
size=EMBEDDING_SIZE,
)
async def test_vectorstore_is_empty(self, vectorstore: VectorStore) -> None:
async def test_vectorstore_is_empty_async(self, vectorstore: VectorStore) -> None:
"""Test that the vectorstore is empty.
.. dropdown:: Troubleshooting
If this test fails, check that the test class (i.e., sub class of
``AsyncReadWriteTestSuite``) initializes an empty vector store in the
``VectorStoreIntegrationTests``) initializes an empty vector store in the
``vectorestore`` fixture.
"""
assert await vectorstore.asimilarity_search("foo", k=1) == []
async def test_add_documents(self, vectorstore: VectorStore) -> None:
async def test_add_documents_async(self, vectorstore: VectorStore) -> None:
"""Test adding documents into the vectorstore.
.. dropdown:: Troubleshooting
@ -512,7 +424,9 @@ class AsyncReadWriteTestSuite(BaseStandardTests):
Document(page_content="bar", metadata={"id": 2}),
]
async def test_vectorstore_still_empty(self, vectorstore: VectorStore) -> None:
async def test_vectorstore_still_empty_async(
self, vectorstore: VectorStore
) -> None:
"""This test should follow a test that adds documents.
This just verifies that the fixture is set up properly to be empty
@ -521,12 +435,12 @@ class AsyncReadWriteTestSuite(BaseStandardTests):
.. dropdown:: Troubleshooting
If this test fails, check that the test class (i.e., sub class of
``AsyncReadWriteTestSuite``) correctly clears the vector store in the
``VectorStoreIntegrationTests``) correctly clears the vector store in the
``finally`` block.
"""
assert await vectorstore.asimilarity_search("foo", k=1) == []
async def test_deleting_documents(self, vectorstore: VectorStore) -> None:
async def test_deleting_documents_async(self, vectorstore: VectorStore) -> None:
"""Test deleting documents from the vectorstore.
.. dropdown:: Troubleshooting
@ -545,7 +459,9 @@ class AsyncReadWriteTestSuite(BaseStandardTests):
documents = await vectorstore.asimilarity_search("foo", k=1)
assert documents == [Document(page_content="bar", metadata={"id": 2}, id="2")]
async def test_deleting_bulk_documents(self, vectorstore: VectorStore) -> None:
async def test_deleting_bulk_documents_async(
self, vectorstore: VectorStore
) -> None:
"""Test that we can delete several documents at once.
.. dropdown:: Troubleshooting
@ -564,7 +480,7 @@ class AsyncReadWriteTestSuite(BaseStandardTests):
documents = await vectorstore.asimilarity_search("foo", k=1)
assert documents == [Document(page_content="baz", metadata={"id": 3}, id="3")]
async def test_delete_missing_content(self, vectorstore: VectorStore) -> None:
async def test_delete_missing_content_async(self, vectorstore: VectorStore) -> None:
"""Deleting missing content should not raise an exception.
.. dropdown:: Troubleshooting
@ -575,7 +491,7 @@ class AsyncReadWriteTestSuite(BaseStandardTests):
await vectorstore.adelete(["1"])
await vectorstore.adelete(["1", "2", "3"])
async def test_add_documents_with_ids_is_idempotent(
async def test_add_documents_with_ids_is_idempotent_async(
self, vectorstore: VectorStore
) -> None:
"""Adding by ID should be idempotent.
@ -598,7 +514,7 @@ class AsyncReadWriteTestSuite(BaseStandardTests):
Document(page_content="foo", metadata={"id": 1}, id="1"),
]
async def test_add_documents_by_id_with_mutation(
async def test_add_documents_by_id_with_mutation_async(
self, vectorstore: VectorStore
) -> None:
"""Test that we can overwrite by ID using add_documents.
@ -636,7 +552,7 @@ class AsyncReadWriteTestSuite(BaseStandardTests):
Document(id="2", page_content="bar", metadata={"id": 2}),
]
async def test_get_by_ids(self, vectorstore: VectorStore) -> None:
async def test_get_by_ids_async(self, vectorstore: VectorStore) -> None:
"""Test get by IDs.
This test requires that ``get_by_ids`` be implemented on the vector store.
@ -668,7 +584,7 @@ class AsyncReadWriteTestSuite(BaseStandardTests):
Document(page_content="bar", metadata={"id": 2}, id=ids[1]),
]
async def test_get_by_ids_missing(self, vectorstore: VectorStore) -> None:
async def test_get_by_ids_missing_async(self, vectorstore: VectorStore) -> None:
"""Test get by IDs with missing IDs.
.. dropdown:: Troubleshooting
@ -690,7 +606,9 @@ class AsyncReadWriteTestSuite(BaseStandardTests):
# This should not raise an exception
assert await vectorstore.aget_by_ids(["1", "2", "3"]) == []
async def test_add_documents_documents(self, vectorstore: VectorStore) -> None:
async def test_add_documents_documents_async(
self, vectorstore: VectorStore
) -> None:
"""Run add_documents tests.
.. dropdown:: Troubleshooting
@ -722,7 +640,7 @@ class AsyncReadWriteTestSuite(BaseStandardTests):
Document(page_content="bar", metadata={"id": 2}, id=ids[1]),
]
async def test_add_documents_with_existing_ids(
async def test_add_documents_with_existing_ids_async(
self, vectorstore: VectorStore
) -> None:
"""Test that add_documents with existing IDs is idempotent.

View File

@ -4,21 +4,11 @@ from langchain_core.vectorstores import (
VectorStore,
)
from langchain_tests.integration_tests.vectorstores import (
AsyncReadWriteTestSuite,
ReadWriteTestSuite,
)
from langchain_tests.integration_tests.vectorstores import VectorStoreIntegrationTests
class TestInMemoryVectorStore(ReadWriteTestSuite):
class TestInMemoryVectorStore(VectorStoreIntegrationTests):
@pytest.fixture
def vectorstore(self) -> VectorStore:
embeddings = self.get_embeddings()
return InMemoryVectorStore(embedding=embeddings)
class TestAsyncInMemoryVectorStore(AsyncReadWriteTestSuite):
@pytest.fixture
async def vectorstore(self) -> VectorStore:
embeddings = self.get_embeddings()
return InMemoryVectorStore(embedding=embeddings)