From e0889384d96843db96b0663a56b714d138c4190f Mon Sep 17 00:00:00 2001 From: Eugene Yurtsev Date: Fri, 5 Jul 2024 15:11:54 -0400 Subject: [PATCH] standard-tests[minor]: add unit tests for testing get_by_ids, aget_by_ids, upsert, aupsert_by_ids (#23919) These standard unit tests provide standard tests for functionality introduced in these PRs: * https://github.com/langchain-ai/langchain/pull/23774 * https://github.com/langchain-ai/langchain/pull/23594 --- .../integration_tests/vectorstores.py | 104 ++++++++++++++++++ 1 file changed, 104 insertions(+) diff --git a/libs/standard-tests/langchain_standard_tests/integration_tests/vectorstores.py b/libs/standard-tests/langchain_standard_tests/integration_tests/vectorstores.py index 81c3e828857..4e9dc905054 100644 --- a/libs/standard-tests/langchain_standard_tests/integration_tests/vectorstores.py +++ b/libs/standard-tests/langchain_standard_tests/integration_tests/vectorstores.py @@ -1,4 +1,5 @@ """Test suite to test vectostores.""" +import inspect from abc import ABC, abstractmethod import pytest @@ -146,6 +147,58 @@ class ReadWriteTestSuite(ABC): Document(id="2", page_content="bar", metadata={"id": 2}), ] + def test_get_by_ids(self, vectorstore: VectorStore) -> None: + """Test get by IDs.""" + documents = [ + Document(page_content="foo", metadata={"id": 1}), + Document(page_content="bar", metadata={"id": 2}), + ] + ids = vectorstore.add_documents(documents, ids=["1", "2"]) + retrieved_documents = vectorstore.get_by_ids(ids) + assert retrieved_documents == [ + Document(page_content="foo", metadata={"id": 1}, id=ids[0]), + Document(page_content="bar", metadata={"id": 2}, id=ids[1]), + ] + + def test_get_by_ids_missing(self, vectorstore: VectorStore) -> None: + """Test get by IDs with missing IDs.""" + # This should not raise an exception + documents = vectorstore.get_by_ids(["1", "2", "3"]) + assert documents == [] + + def test_upsert_documents(self, vectorstore: VectorStore) -> None: + """Run upsert tests.""" + documents = [ + Document(page_content="foo", metadata={"id": 1}), + Document(page_content="bar", metadata={"id": 2}), + ] + response = vectorstore.upsert(documents) + ids = response["succeeded"] + assert vectorstore.get_by_ids(ids) == [ + Document(page_content="foo", metadata={"id": 1}, id=ids[0]), + Document(page_content="bar", metadata={"id": 2}, id=ids[1]), + ] + + def test_upsert_with_existing_ids(self, vectorstore: VectorStore) -> None: + """Test that upserting with existing IDs is idempotent.""" + documents = [ + Document(id="foo", page_content="foo", metadata={"id": 1}), + Document(page_content="bar", metadata={"id": 2}), + ] + response = vectorstore.upsert(documents) + ids = response["succeeded"] + assert response["failed"] == [] + assert "foo" in ids + assert vectorstore.get_by_ids(ids) == [ + Document(page_content="foo", metadata={"id": 1}, id="foo"), + Document(page_content="bar", metadata={"id": 2}, id=ids[1]), + ] + + def test_upsert_documents_has_no_ids(self, vectorstore: VectorStore) -> None: + """Verify that there is not parameter called ids in upsert""" + signature = inspect.signature(vectorstore.upsert) + assert "ids" not in signature.parameters + class AsyncReadWriteTestSuite(ABC): """Test suite for checking the **async** read-write API of a vectorstore. @@ -284,3 +337,54 @@ class AsyncReadWriteTestSuite(ABC): ), Document(id="2", page_content="bar", metadata={"id": 2}), ] + + async def test_get_by_ids(self, vectorstore: VectorStore) -> None: + """Test get by IDs.""" + documents = [ + Document(page_content="foo", metadata={"id": 1}), + Document(page_content="bar", metadata={"id": 2}), + ] + ids = await vectorstore.aadd_documents(documents, ids=["1", "2"]) + retrieved_documents = await vectorstore.aget_by_ids(ids) + assert retrieved_documents == [ + Document(page_content="foo", metadata={"id": 1}, id=ids[0]), + Document(page_content="bar", metadata={"id": 2}, id=ids[1]), + ] + + async def test_get_by_ids_missing(self, vectorstore: VectorStore) -> None: + """Test get by IDs with missing IDs.""" + # This should not raise an exception + assert await vectorstore.aget_by_ids(["1", "2", "3"]) == [] + + async def test_upsert_documents(self, vectorstore: VectorStore) -> None: + """Run upsert tests.""" + documents = [ + Document(page_content="foo", metadata={"id": 1}), + Document(page_content="bar", metadata={"id": 2}), + ] + response = await vectorstore.aupsert(documents) + ids = response["succeeded"] + assert await vectorstore.aget_by_ids(ids) == [ + Document(page_content="foo", metadata={"id": 1}, id=ids[0]), + Document(page_content="bar", metadata={"id": 2}, id=ids[1]), + ] + + async def test_upsert_with_existing_ids(self, vectorstore: VectorStore) -> None: + """Test that upserting with existing IDs is idempotent.""" + documents = [ + Document(id="foo", page_content="foo", metadata={"id": 1}), + Document(page_content="bar", metadata={"id": 2}), + ] + response = await vectorstore.aupsert(documents) + ids = response["succeeded"] + assert response["failed"] == [] + assert "foo" in ids + assert await vectorstore.aget_by_ids(ids) == [ + Document(page_content="foo", metadata={"id": 1}, id="foo"), + Document(page_content="bar", metadata={"id": 2}, id=ids[1]), + ] + + async def test_upsert_documents_has_no_ids(self, vectorstore: VectorStore) -> None: + """Verify that there is not parameter called ids in upsert""" + signature = inspect.signature(vectorstore.aupsert) + assert "ids" not in signature.parameters