standard-tests[minor]: add unit tests for testing get_by_ids, aget_by_ids, upsert, aupsert_by_ids (#23919)

These standard unit tests provide standard tests for functionality
introduced in these PRs:

* https://github.com/langchain-ai/langchain/pull/23774
* https://github.com/langchain-ai/langchain/pull/23594
This commit is contained in:
Eugene Yurtsev 2024-07-05 15:11:54 -04:00 committed by GitHub
parent 74c7198906
commit e0889384d9
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -1,4 +1,5 @@
"""Test suite to test vectostores."""
import inspect
from abc import ABC, abstractmethod
import pytest
@ -146,6 +147,58 @@ class ReadWriteTestSuite(ABC):
Document(id="2", page_content="bar", metadata={"id": 2}),
]
def test_get_by_ids(self, vectorstore: VectorStore) -> None:
"""Test get by IDs."""
documents = [
Document(page_content="foo", metadata={"id": 1}),
Document(page_content="bar", metadata={"id": 2}),
]
ids = vectorstore.add_documents(documents, ids=["1", "2"])
retrieved_documents = vectorstore.get_by_ids(ids)
assert retrieved_documents == [
Document(page_content="foo", metadata={"id": 1}, id=ids[0]),
Document(page_content="bar", metadata={"id": 2}, id=ids[1]),
]
def test_get_by_ids_missing(self, vectorstore: VectorStore) -> None:
"""Test get by IDs with missing IDs."""
# This should not raise an exception
documents = vectorstore.get_by_ids(["1", "2", "3"])
assert documents == []
def test_upsert_documents(self, vectorstore: VectorStore) -> None:
"""Run upsert tests."""
documents = [
Document(page_content="foo", metadata={"id": 1}),
Document(page_content="bar", metadata={"id": 2}),
]
response = vectorstore.upsert(documents)
ids = response["succeeded"]
assert vectorstore.get_by_ids(ids) == [
Document(page_content="foo", metadata={"id": 1}, id=ids[0]),
Document(page_content="bar", metadata={"id": 2}, id=ids[1]),
]
def test_upsert_with_existing_ids(self, vectorstore: VectorStore) -> None:
"""Test that upserting with existing IDs is idempotent."""
documents = [
Document(id="foo", page_content="foo", metadata={"id": 1}),
Document(page_content="bar", metadata={"id": 2}),
]
response = vectorstore.upsert(documents)
ids = response["succeeded"]
assert response["failed"] == []
assert "foo" in ids
assert vectorstore.get_by_ids(ids) == [
Document(page_content="foo", metadata={"id": 1}, id="foo"),
Document(page_content="bar", metadata={"id": 2}, id=ids[1]),
]
def test_upsert_documents_has_no_ids(self, vectorstore: VectorStore) -> None:
"""Verify that there is not parameter called ids in upsert"""
signature = inspect.signature(vectorstore.upsert)
assert "ids" not in signature.parameters
class AsyncReadWriteTestSuite(ABC):
"""Test suite for checking the **async** read-write API of a vectorstore.
@ -284,3 +337,54 @@ class AsyncReadWriteTestSuite(ABC):
),
Document(id="2", page_content="bar", metadata={"id": 2}),
]
async def test_get_by_ids(self, vectorstore: VectorStore) -> None:
"""Test get by IDs."""
documents = [
Document(page_content="foo", metadata={"id": 1}),
Document(page_content="bar", metadata={"id": 2}),
]
ids = await vectorstore.aadd_documents(documents, ids=["1", "2"])
retrieved_documents = await vectorstore.aget_by_ids(ids)
assert retrieved_documents == [
Document(page_content="foo", metadata={"id": 1}, id=ids[0]),
Document(page_content="bar", metadata={"id": 2}, id=ids[1]),
]
async def test_get_by_ids_missing(self, vectorstore: VectorStore) -> None:
"""Test get by IDs with missing IDs."""
# This should not raise an exception
assert await vectorstore.aget_by_ids(["1", "2", "3"]) == []
async def test_upsert_documents(self, vectorstore: VectorStore) -> None:
"""Run upsert tests."""
documents = [
Document(page_content="foo", metadata={"id": 1}),
Document(page_content="bar", metadata={"id": 2}),
]
response = await vectorstore.aupsert(documents)
ids = response["succeeded"]
assert await vectorstore.aget_by_ids(ids) == [
Document(page_content="foo", metadata={"id": 1}, id=ids[0]),
Document(page_content="bar", metadata={"id": 2}, id=ids[1]),
]
async def test_upsert_with_existing_ids(self, vectorstore: VectorStore) -> None:
"""Test that upserting with existing IDs is idempotent."""
documents = [
Document(id="foo", page_content="foo", metadata={"id": 1}),
Document(page_content="bar", metadata={"id": 2}),
]
response = await vectorstore.aupsert(documents)
ids = response["succeeded"]
assert response["failed"] == []
assert "foo" in ids
assert await vectorstore.aget_by_ids(ids) == [
Document(page_content="foo", metadata={"id": 1}, id="foo"),
Document(page_content="bar", metadata={"id": 2}, id=ids[1]),
]
async def test_upsert_documents_has_no_ids(self, vectorstore: VectorStore) -> None:
"""Verify that there is not parameter called ids in upsert"""
signature = inspect.signature(vectorstore.aupsert)
assert "ids" not in signature.parameters