mirror of
https://github.com/hwchase17/langchain.git
synced 2025-09-11 16:01:33 +00:00
feat(core): add id
field to Document
passed to filter for InMemoryVectorStore
similarity search (#32688)
Added an id field to the Document passed to filter for InMemoryVectorStore similarity search. This allows filtering by Document id and brings the input to the filter in line with the result returned by the vector similarity search. --------- Co-authored-by: Eugene Yurtsev <eyurtsev@gmail.com>
This commit is contained in:
@@ -376,7 +376,11 @@ class InMemoryVectorStore(VectorStore):
|
||||
docs = [
|
||||
doc
|
||||
for doc in docs
|
||||
if filter(Document(page_content=doc["text"], metadata=doc["metadata"]))
|
||||
if filter(
|
||||
Document(
|
||||
id=doc["id"], page_content=doc["text"], metadata=doc["metadata"]
|
||||
)
|
||||
)
|
||||
]
|
||||
|
||||
if not docs:
|
||||
|
@@ -117,6 +117,40 @@ async def test_inmemory_filter() -> None:
|
||||
assert output == []
|
||||
|
||||
|
||||
async def test_inmemory_filter_by_document_id() -> None:
|
||||
"""Test filtering by document ID field."""
|
||||
embedding = DeterministicFakeEmbedding(size=6)
|
||||
store = InMemoryVectorStore(embedding=embedding)
|
||||
|
||||
# Add documents with specific IDs using add_documents
|
||||
documents = [
|
||||
Document(page_content="first document", id="doc_1"),
|
||||
Document(page_content="second document", id="doc_2"),
|
||||
Document(page_content="third document", id="doc_3"),
|
||||
]
|
||||
store.add_documents(documents)
|
||||
|
||||
# Test filtering by specific document ID
|
||||
output = store.similarity_search("document", filter=lambda doc: doc.id == "doc_2")
|
||||
assert len(output) == 1
|
||||
assert output[0].page_content == "second document"
|
||||
assert output[0].id == "doc_2"
|
||||
|
||||
# Test async version
|
||||
output = await store.asimilarity_search(
|
||||
"document", filter=lambda doc: doc.id in ["doc_1", "doc_3"]
|
||||
)
|
||||
assert len(output) == 2
|
||||
ids = {doc.id for doc in output}
|
||||
assert ids == {"doc_1", "doc_3"}
|
||||
|
||||
# Test filtering with non-existent ID
|
||||
output = store.similarity_search(
|
||||
"document", filter=lambda doc: doc.id == "non_existent"
|
||||
)
|
||||
assert output == []
|
||||
|
||||
|
||||
async def test_inmemory_upsert() -> None:
|
||||
"""Test upsert documents."""
|
||||
embedding = DeterministicFakeEmbedding(size=2)
|
||||
|
Reference in New Issue
Block a user