mirror of
https://github.com/hwchase17/langchain.git
synced 2025-09-17 23:41:46 +00:00
community[patch]: fixed vector similarity filtering (#16967)
**Description:** changed filtering so that failed filter doesn't add document to results. Currently filtering is entirely broken and all documents are returned whether or not they pass the filter. fixes issue introduced in https://github.com/langchain-ai/langchain/pull/16190
This commit is contained in:
@@ -333,8 +333,9 @@ class FAISS(VectorStore):
|
||||
doc = self.docstore.search(_id)
|
||||
if not isinstance(doc, Document):
|
||||
raise ValueError(f"Could not find document for id {_id}, got {doc}")
|
||||
if filter is not None and filter_func(doc.metadata):
|
||||
docs.append((doc, scores[0][j]))
|
||||
if filter is not None:
|
||||
if filter_func(doc.metadata):
|
||||
docs.append((doc, scores[0][j]))
|
||||
else:
|
||||
docs.append((doc, scores[0][j]))
|
||||
|
||||
|
@@ -438,8 +438,9 @@ def test_faiss_with_metadatas_and_filter() -> None:
|
||||
)
|
||||
assert docsearch.docstore.__dict__ == expected_docstore.__dict__
|
||||
output = docsearch.similarity_search("foo", k=1, filter={"page": 1})
|
||||
assert output == [Document(page_content="foo", metadata={"page": 0})]
|
||||
assert output != [Document(page_content="bar", metadata={"page": 1})]
|
||||
# make sure it returns the result that matches the filter.
|
||||
# Not the one who's text matches better.
|
||||
assert output == [Document(page_content="bar", metadata={"page": 1})]
|
||||
assert output == docsearch.similarity_search(
|
||||
"foo", k=1, filter=lambda di: di["page"] == 1
|
||||
)
|
||||
@@ -465,8 +466,9 @@ async def test_faiss_async_with_metadatas_and_filter() -> None:
|
||||
)
|
||||
assert docsearch.docstore.__dict__ == expected_docstore.__dict__
|
||||
output = await docsearch.asimilarity_search("foo", k=1, filter={"page": 1})
|
||||
assert output == [Document(page_content="foo", metadata={"page": 0})]
|
||||
assert output != [Document(page_content="bar", metadata={"page": 1})]
|
||||
# make sure it returns the result that matches the filter.
|
||||
# Not the one who's text matches better.
|
||||
assert output == [Document(page_content="bar", metadata={"page": 1})]
|
||||
assert output == await docsearch.asimilarity_search(
|
||||
"foo", k=1, filter=lambda di: di["page"] == 1
|
||||
)
|
||||
|
Reference in New Issue
Block a user