diff --git a/langchain/vectorstores/faiss.py b/langchain/vectorstores/faiss.py index 6dd7fd544e7..1489bf8ff06 100644 --- a/langchain/vectorstores/faiss.py +++ b/langchain/vectorstores/faiss.py @@ -192,7 +192,7 @@ class FAISS(VectorStore): Args: embedding: Embedding vector to look up documents similar to. k: Number of Documents to return. Defaults to 4. - filter (Optional[Dict[str, str]]): Filter by metadata. Defaults to None. + filter (Optional[Dict[str, Any]]): Filter by metadata. Defaults to None. fetch_k: (Optional[int]) Number of Documents to fetch before filtering. Defaults to 20. **kwargs: kwargs to be passed to similarity search. Can include: @@ -218,7 +218,11 @@ class FAISS(VectorStore): if not isinstance(doc, Document): raise ValueError(f"Could not find document for id {_id}, got {doc}") if filter is not None: - if all(doc.metadata.get(key) == value for key, value in filter.items()): + filter = { + key: [value] if not isinstance(value, list) else value + for key, value in filter.items() + } + if all(doc.metadata.get(key) in value for key, value in filter.items()): docs.append((doc, scores[0][j])) else: docs.append((doc, scores[0][j])) diff --git a/tests/integration_tests/vectorstores/test_faiss.py b/tests/integration_tests/vectorstores/test_faiss.py index d33e6d7827e..37a66e8eb5b 100644 --- a/tests/integration_tests/vectorstores/test_faiss.py +++ b/tests/integration_tests/vectorstores/test_faiss.py @@ -96,6 +96,34 @@ def test_faiss_with_metadatas_and_filter() -> None: assert output == [Document(page_content="bar", metadata={"page": 1})] +def test_faiss_with_metadatas_and_list_filter() -> None: + texts = ["foo", "bar", "baz", "foo", "qux"] + metadatas = [{"page": i} if i <= 3 else {"page": 3} for i in range(len(texts))] + docsearch = FAISS.from_texts(texts, FakeEmbeddings(), metadatas=metadatas) + expected_docstore = InMemoryDocstore( + { + docsearch.index_to_docstore_id[0]: Document( + page_content="foo", metadata={"page": 0} + ), + docsearch.index_to_docstore_id[1]: Document( + page_content="bar", metadata={"page": 1} + ), + docsearch.index_to_docstore_id[2]: Document( + page_content="baz", metadata={"page": 2} + ), + docsearch.index_to_docstore_id[3]: Document( + page_content="foo", metadata={"page": 3} + ), + docsearch.index_to_docstore_id[4]: Document( + page_content="qux", metadata={"page": 3} + ), + } + ) + assert docsearch.docstore.__dict__ == expected_docstore.__dict__ + output = docsearch.similarity_search("foor", k=1, filter={"page": [0, 1, 2]}) + assert output == [Document(page_content="foo", metadata={"page": 0})] + + def test_faiss_search_not_found() -> None: """Test what happens when document is not found.""" texts = ["foo", "bar", "baz"]