community[patch]: fixed vector similarity filtering (#16967)

**Description:** changed filtering so that failed filter doesn't add
document to results. Currently filtering is entirely broken and all
documents are returned whether or not they pass the filter.

fixes issue introduced in
https://github.com/langchain-ai/langchain/pull/16190
This commit is contained in:
Spencer Kelly
2024-02-12 14:52:57 -08:00
committed by GitHub
parent a23c719c8b
commit 54fa78c887
2 changed files with 9 additions and 6 deletions

View File

@@ -333,8 +333,9 @@ class FAISS(VectorStore):
doc = self.docstore.search(_id)
if not isinstance(doc, Document):
raise ValueError(f"Could not find document for id {_id}, got {doc}")
if filter is not None and filter_func(doc.metadata):
docs.append((doc, scores[0][j]))
if filter is not None:
if filter_func(doc.metadata):
docs.append((doc, scores[0][j]))
else:
docs.append((doc, scores[0][j]))

View File

@@ -438,8 +438,9 @@ def test_faiss_with_metadatas_and_filter() -> None:
)
assert docsearch.docstore.__dict__ == expected_docstore.__dict__
output = docsearch.similarity_search("foo", k=1, filter={"page": 1})
assert output == [Document(page_content="foo", metadata={"page": 0})]
assert output != [Document(page_content="bar", metadata={"page": 1})]
# make sure it returns the result that matches the filter.
# Not the one who's text matches better.
assert output == [Document(page_content="bar", metadata={"page": 1})]
assert output == docsearch.similarity_search(
"foo", k=1, filter=lambda di: di["page"] == 1
)
@@ -465,8 +466,9 @@ async def test_faiss_async_with_metadatas_and_filter() -> None:
)
assert docsearch.docstore.__dict__ == expected_docstore.__dict__
output = await docsearch.asimilarity_search("foo", k=1, filter={"page": 1})
assert output == [Document(page_content="foo", metadata={"page": 0})]
assert output != [Document(page_content="bar", metadata={"page": 1})]
# make sure it returns the result that matches the filter.
# Not the one who's text matches better.
assert output == [Document(page_content="bar", metadata={"page": 1})]
assert output == await docsearch.asimilarity_search(
"foo", k=1, filter=lambda di: di["page"] == 1
)