community[minor]: Add metadata filtering support for neo4j vector (#20001)

2025-09-25 04:49:17 +00:00 · 2024-04-04 17:37:06 +02:00
parent b52b78478f
commit df25829f33
2 changed files with 367 additions and 6 deletions
--- a/libs/community/tests/integration_tests/vectorstores/test_neo4jvector.py
+++ b/libs/community/tests/integration_tests/vectorstores/test_neo4jvector.py
@@ -1,6 +1,6 @@
 """Test Neo4jVector functionality."""
 import os
-from typing import List
+from typing import Any, Dict, List, cast

 from langchain_core.documents import Document

@@ -11,6 +11,13 @@ from langchain_community.vectorstores.neo4j_vector import (
 )
 from langchain_community.vectorstores.utils import DistanceStrategy
 from tests.integration_tests.vectorstores.fake_embeddings import FakeEmbeddings
+from tests.integration_tests.vectorstores.fixtures.filtering_test_cases import (
+    DOCUMENTS,
+    TYPE_1_FILTERING_TEST_CASES,
+    TYPE_2_FILTERING_TEST_CASES,
+    TYPE_3_FILTERING_TEST_CASES,
+    TYPE_4_FILTERING_TEST_CASES,
+)

 url = os.environ.get("NEO4J_URL", "bolt://localhost:7687")
 username = os.environ.get("NEO4J_USERNAME", "neo4j")
@@ -721,6 +728,8 @@ def test_index_fetching() -> None:

    index_0_store = fetch_store(index_0_str)
    assert index_0_store.index_name == index_0_str
+    drop_vector_indexes(index_1_store)
+    drop_vector_indexes(index_0_store)


 def test_retrieval_params() -> None:
@@ -741,6 +750,7 @@ def test_retrieval_params() -> None:
        Document(page_content="test", metadata={"test": "test1"}),
        Document(page_content="test", metadata={"test": "test1"}),
    ]
+    drop_vector_indexes(docsearch)


 def test_retrieval_dictionary() -> None:
@@ -767,3 +777,38 @@ def test_retrieval_dictionary() -> None:
    ]
    output = docsearch.similarity_search("Foo", k=1)
    assert output == expected_output
+    drop_vector_indexes(docsearch)
+
+
+def test_metadata_filters_type1() -> None:
+    """Test metadata filters"""
+    docsearch = Neo4jVector.from_documents(
+        DOCUMENTS,
+        embedding=FakeEmbeddings(),
+        pre_delete_collection=True,
+    )
+    # We don't test type 5, because LIKE has very SQL specific examples
+    for example in (
+        TYPE_1_FILTERING_TEST_CASES
+        + TYPE_2_FILTERING_TEST_CASES
+        + TYPE_3_FILTERING_TEST_CASES
+        + TYPE_4_FILTERING_TEST_CASES
+    ):
+        filter_dict = cast(Dict[str, Any], example[0])
+        output = docsearch.similarity_search("Foo", filter=filter_dict)
+        indices = cast(List[int], example[1])
+        adjusted_indices = [index - 1 for index in indices]
+        expected_output = [DOCUMENTS[index] for index in adjusted_indices]
+        # We don't return id properties from similarity search by default
+        # Also remove any key where the value is None
+        for doc in expected_output:
+            if "id" in doc.metadata:
+                del doc.metadata["id"]
+            keys_with_none = [
+                key for key, value in doc.metadata.items() if value is None
+            ]
+            for key in keys_with_none:
+                del doc.metadata[key]
+
+        assert output == expected_output
+    drop_vector_indexes(docsearch)