core[patch]: Deprecating beta upsert APIs in vectorstore (#25069)

This PR deprecates the beta upsert APIs in vectorstore.

We'll introduce them in a V2 abstraction instead to keep the existing
vectorstore implementations lighter weight.

The main problem with the existing APIs is that it's a bit more
challenging to
implement the correct behavior w/ respect to IDs since ID can be present
in
both the function signature and as an optional attribute on the document
object.

But VectorStores that pass the standard tests should have implemented
the semantics properly!

---------

Co-authored-by: Erick Friis <erick@langchain.dev>
This commit is contained in:
Eugene Yurtsev
2024-08-09 17:17:36 -04:00
committed by GitHub
parent ca9dcee940
commit 6dd9f053e3
5 changed files with 283 additions and 421 deletions

View File

@@ -1,6 +1,5 @@
"""Test suite to test vectostores."""
import inspect
from abc import abstractmethod
import pytest
@@ -169,39 +168,31 @@ class ReadWriteTestSuite(BaseStandardTests):
documents = vectorstore.get_by_ids(["1", "2", "3"])
assert documents == []
def test_upsert_documents(self, vectorstore: VectorStore) -> None:
"""Run upsert tests."""
def test_add_documents_documents(self, vectorstore: VectorStore) -> None:
"""Run add_documents tests."""
documents = [
Document(page_content="foo", metadata={"id": 1}),
Document(page_content="bar", metadata={"id": 2}),
]
response = vectorstore.upsert(documents)
ids = response["succeeded"]
ids = vectorstore.add_documents(documents)
assert vectorstore.get_by_ids(ids) == [
Document(page_content="foo", metadata={"id": 1}, id=ids[0]),
Document(page_content="bar", metadata={"id": 2}, id=ids[1]),
]
def test_upsert_with_existing_ids(self, vectorstore: VectorStore) -> None:
"""Test that upserting with existing IDs is idempotent."""
def test_add_documents_with_existing_ids(self, vectorstore: VectorStore) -> None:
"""Test that add_documentsing with existing IDs is idempotent."""
documents = [
Document(id="foo", page_content="foo", metadata={"id": 1}),
Document(page_content="bar", metadata={"id": 2}),
]
response = vectorstore.upsert(documents)
ids = response["succeeded"]
assert response["failed"] == []
ids = vectorstore.add_documents(documents)
assert "foo" in ids
assert vectorstore.get_by_ids(ids) == [
Document(page_content="foo", metadata={"id": 1}, id="foo"),
Document(page_content="bar", metadata={"id": 2}, id=ids[1]),
]
def test_upsert_documents_has_no_ids(self, vectorstore: VectorStore) -> None:
"""Verify that there is not parameter called ids in upsert"""
signature = inspect.signature(vectorstore.upsert)
assert "ids" not in signature.parameters
class AsyncReadWriteTestSuite(BaseStandardTests):
"""Test suite for checking the **async** read-write API of a vectorstore.
@@ -359,35 +350,29 @@ class AsyncReadWriteTestSuite(BaseStandardTests):
# This should not raise an exception
assert await vectorstore.aget_by_ids(["1", "2", "3"]) == []
async def test_upsert_documents(self, vectorstore: VectorStore) -> None:
"""Run upsert tests."""
async def test_add_documents_documents(self, vectorstore: VectorStore) -> None:
"""Run add_documents tests."""
documents = [
Document(page_content="foo", metadata={"id": 1}),
Document(page_content="bar", metadata={"id": 2}),
]
response = await vectorstore.aupsert(documents)
ids = response["succeeded"]
ids = await vectorstore.aadd_documents(documents)
assert await vectorstore.aget_by_ids(ids) == [
Document(page_content="foo", metadata={"id": 1}, id=ids[0]),
Document(page_content="bar", metadata={"id": 2}, id=ids[1]),
]
async def test_upsert_with_existing_ids(self, vectorstore: VectorStore) -> None:
"""Test that upserting with existing IDs is idempotent."""
async def test_add_documents_with_existing_ids(
self, vectorstore: VectorStore
) -> None:
"""Test that add_documentsing with existing IDs is idempotent."""
documents = [
Document(id="foo", page_content="foo", metadata={"id": 1}),
Document(page_content="bar", metadata={"id": 2}),
]
response = await vectorstore.aupsert(documents)
ids = response["succeeded"]
assert response["failed"] == []
ids = await vectorstore.aadd_documents(documents)
assert "foo" in ids
assert await vectorstore.aget_by_ids(ids) == [
Document(page_content="foo", metadata={"id": 1}, id="foo"),
Document(page_content="bar", metadata={"id": 2}, id=ids[1]),
]
async def test_upsert_documents_has_no_ids(self, vectorstore: VectorStore) -> None:
"""Verify that there is not parameter called ids in upsert"""
signature = inspect.signature(vectorstore.aupsert)
assert "ids" not in signature.parameters