mirror of
https://github.com/hwchase17/langchain.git
synced 2025-09-05 04:55:14 +00:00
core[patch]: Deprecating beta upsert APIs in vectorstore (#25069)
This PR deprecates the beta upsert APIs in vectorstore. We'll introduce them in a V2 abstraction instead to keep the existing vectorstore implementations lighter weight. The main problem with the existing APIs is that it's a bit more challenging to implement the correct behavior w/ respect to IDs since ID can be present in both the function signature and as an optional attribute on the document object. But VectorStores that pass the standard tests should have implemented the semantics properly! --------- Co-authored-by: Erick Friis <erick@langchain.dev>
This commit is contained in:
@@ -1,6 +1,5 @@
|
||||
"""Test suite to test vectostores."""
|
||||
|
||||
import inspect
|
||||
from abc import abstractmethod
|
||||
|
||||
import pytest
|
||||
@@ -169,39 +168,31 @@ class ReadWriteTestSuite(BaseStandardTests):
|
||||
documents = vectorstore.get_by_ids(["1", "2", "3"])
|
||||
assert documents == []
|
||||
|
||||
def test_upsert_documents(self, vectorstore: VectorStore) -> None:
|
||||
"""Run upsert tests."""
|
||||
def test_add_documents_documents(self, vectorstore: VectorStore) -> None:
|
||||
"""Run add_documents tests."""
|
||||
documents = [
|
||||
Document(page_content="foo", metadata={"id": 1}),
|
||||
Document(page_content="bar", metadata={"id": 2}),
|
||||
]
|
||||
response = vectorstore.upsert(documents)
|
||||
ids = response["succeeded"]
|
||||
ids = vectorstore.add_documents(documents)
|
||||
assert vectorstore.get_by_ids(ids) == [
|
||||
Document(page_content="foo", metadata={"id": 1}, id=ids[0]),
|
||||
Document(page_content="bar", metadata={"id": 2}, id=ids[1]),
|
||||
]
|
||||
|
||||
def test_upsert_with_existing_ids(self, vectorstore: VectorStore) -> None:
|
||||
"""Test that upserting with existing IDs is idempotent."""
|
||||
def test_add_documents_with_existing_ids(self, vectorstore: VectorStore) -> None:
|
||||
"""Test that add_documentsing with existing IDs is idempotent."""
|
||||
documents = [
|
||||
Document(id="foo", page_content="foo", metadata={"id": 1}),
|
||||
Document(page_content="bar", metadata={"id": 2}),
|
||||
]
|
||||
response = vectorstore.upsert(documents)
|
||||
ids = response["succeeded"]
|
||||
assert response["failed"] == []
|
||||
ids = vectorstore.add_documents(documents)
|
||||
assert "foo" in ids
|
||||
assert vectorstore.get_by_ids(ids) == [
|
||||
Document(page_content="foo", metadata={"id": 1}, id="foo"),
|
||||
Document(page_content="bar", metadata={"id": 2}, id=ids[1]),
|
||||
]
|
||||
|
||||
def test_upsert_documents_has_no_ids(self, vectorstore: VectorStore) -> None:
|
||||
"""Verify that there is not parameter called ids in upsert"""
|
||||
signature = inspect.signature(vectorstore.upsert)
|
||||
assert "ids" not in signature.parameters
|
||||
|
||||
|
||||
class AsyncReadWriteTestSuite(BaseStandardTests):
|
||||
"""Test suite for checking the **async** read-write API of a vectorstore.
|
||||
@@ -359,35 +350,29 @@ class AsyncReadWriteTestSuite(BaseStandardTests):
|
||||
# This should not raise an exception
|
||||
assert await vectorstore.aget_by_ids(["1", "2", "3"]) == []
|
||||
|
||||
async def test_upsert_documents(self, vectorstore: VectorStore) -> None:
|
||||
"""Run upsert tests."""
|
||||
async def test_add_documents_documents(self, vectorstore: VectorStore) -> None:
|
||||
"""Run add_documents tests."""
|
||||
documents = [
|
||||
Document(page_content="foo", metadata={"id": 1}),
|
||||
Document(page_content="bar", metadata={"id": 2}),
|
||||
]
|
||||
response = await vectorstore.aupsert(documents)
|
||||
ids = response["succeeded"]
|
||||
ids = await vectorstore.aadd_documents(documents)
|
||||
assert await vectorstore.aget_by_ids(ids) == [
|
||||
Document(page_content="foo", metadata={"id": 1}, id=ids[0]),
|
||||
Document(page_content="bar", metadata={"id": 2}, id=ids[1]),
|
||||
]
|
||||
|
||||
async def test_upsert_with_existing_ids(self, vectorstore: VectorStore) -> None:
|
||||
"""Test that upserting with existing IDs is idempotent."""
|
||||
async def test_add_documents_with_existing_ids(
|
||||
self, vectorstore: VectorStore
|
||||
) -> None:
|
||||
"""Test that add_documentsing with existing IDs is idempotent."""
|
||||
documents = [
|
||||
Document(id="foo", page_content="foo", metadata={"id": 1}),
|
||||
Document(page_content="bar", metadata={"id": 2}),
|
||||
]
|
||||
response = await vectorstore.aupsert(documents)
|
||||
ids = response["succeeded"]
|
||||
assert response["failed"] == []
|
||||
ids = await vectorstore.aadd_documents(documents)
|
||||
assert "foo" in ids
|
||||
assert await vectorstore.aget_by_ids(ids) == [
|
||||
Document(page_content="foo", metadata={"id": 1}, id="foo"),
|
||||
Document(page_content="bar", metadata={"id": 2}, id=ids[1]),
|
||||
]
|
||||
|
||||
async def test_upsert_documents_has_no_ids(self, vectorstore: VectorStore) -> None:
|
||||
"""Verify that there is not parameter called ids in upsert"""
|
||||
signature = inspect.signature(vectorstore.aupsert)
|
||||
assert "ids" not in signature.parameters
|
||||
|
Reference in New Issue
Block a user