feat(core): add id field to Document passed to filter for InMemoryVectorStore similarity search (#32688)

Added an id field to the Document passed to filter for
InMemoryVectorStore similarity search. This allows filtering by Document
id and brings the input to the filter in line with the result returned
by the vector similarity search.

---------

Co-authored-by: Eugene Yurtsev <eyurtsev@gmail.com>
This commit is contained in:
PieterKok-jaam
2025-09-08 22:39:18 +02:00
committed by GitHub
parent 97dd7628d2
commit 33c7f230e0
2 changed files with 39 additions and 1 deletions

View File

@@ -376,7 +376,11 @@ class InMemoryVectorStore(VectorStore):
docs = [
doc
for doc in docs
if filter(Document(page_content=doc["text"], metadata=doc["metadata"]))
if filter(
Document(
id=doc["id"], page_content=doc["text"], metadata=doc["metadata"]
)
)
]
if not docs:

View File

@@ -117,6 +117,40 @@ async def test_inmemory_filter() -> None:
assert output == []
async def test_inmemory_filter_by_document_id() -> None:
"""Test filtering by document ID field."""
embedding = DeterministicFakeEmbedding(size=6)
store = InMemoryVectorStore(embedding=embedding)
# Add documents with specific IDs using add_documents
documents = [
Document(page_content="first document", id="doc_1"),
Document(page_content="second document", id="doc_2"),
Document(page_content="third document", id="doc_3"),
]
store.add_documents(documents)
# Test filtering by specific document ID
output = store.similarity_search("document", filter=lambda doc: doc.id == "doc_2")
assert len(output) == 1
assert output[0].page_content == "second document"
assert output[0].id == "doc_2"
# Test async version
output = await store.asimilarity_search(
"document", filter=lambda doc: doc.id in ["doc_1", "doc_3"]
)
assert len(output) == 2
ids = {doc.id for doc in output}
assert ids == {"doc_1", "doc_3"}
# Test filtering with non-existent ID
output = store.similarity_search(
"document", filter=lambda doc: doc.id == "non_existent"
)
assert output == []
async def test_inmemory_upsert() -> None:
"""Test upsert documents."""
embedding = DeterministicFakeEmbedding(size=2)