community[minor]: Add metadata filtering support for neo4j vector (#20001)

This commit is contained in:
Tomaz Bratanic
2024-04-04 17:37:06 +02:00
committed by GitHub
parent b52b78478f
commit df25829f33
2 changed files with 367 additions and 6 deletions

View File

@@ -1,6 +1,6 @@
"""Test Neo4jVector functionality."""
import os
from typing import List
from typing import Any, Dict, List, cast
from langchain_core.documents import Document
@@ -11,6 +11,13 @@ from langchain_community.vectorstores.neo4j_vector import (
)
from langchain_community.vectorstores.utils import DistanceStrategy
from tests.integration_tests.vectorstores.fake_embeddings import FakeEmbeddings
from tests.integration_tests.vectorstores.fixtures.filtering_test_cases import (
DOCUMENTS,
TYPE_1_FILTERING_TEST_CASES,
TYPE_2_FILTERING_TEST_CASES,
TYPE_3_FILTERING_TEST_CASES,
TYPE_4_FILTERING_TEST_CASES,
)
url = os.environ.get("NEO4J_URL", "bolt://localhost:7687")
username = os.environ.get("NEO4J_USERNAME", "neo4j")
@@ -721,6 +728,8 @@ def test_index_fetching() -> None:
index_0_store = fetch_store(index_0_str)
assert index_0_store.index_name == index_0_str
drop_vector_indexes(index_1_store)
drop_vector_indexes(index_0_store)
def test_retrieval_params() -> None:
@@ -741,6 +750,7 @@ def test_retrieval_params() -> None:
Document(page_content="test", metadata={"test": "test1"}),
Document(page_content="test", metadata={"test": "test1"}),
]
drop_vector_indexes(docsearch)
def test_retrieval_dictionary() -> None:
@@ -767,3 +777,38 @@ def test_retrieval_dictionary() -> None:
]
output = docsearch.similarity_search("Foo", k=1)
assert output == expected_output
drop_vector_indexes(docsearch)
def test_metadata_filters_type1() -> None:
"""Test metadata filters"""
docsearch = Neo4jVector.from_documents(
DOCUMENTS,
embedding=FakeEmbeddings(),
pre_delete_collection=True,
)
# We don't test type 5, because LIKE has very SQL specific examples
for example in (
TYPE_1_FILTERING_TEST_CASES
+ TYPE_2_FILTERING_TEST_CASES
+ TYPE_3_FILTERING_TEST_CASES
+ TYPE_4_FILTERING_TEST_CASES
):
filter_dict = cast(Dict[str, Any], example[0])
output = docsearch.similarity_search("Foo", filter=filter_dict)
indices = cast(List[int], example[1])
adjusted_indices = [index - 1 for index in indices]
expected_output = [DOCUMENTS[index] for index in adjusted_indices]
# We don't return id properties from similarity search by default
# Also remove any key where the value is None
for doc in expected_output:
if "id" in doc.metadata:
del doc.metadata["id"]
keys_with_none = [
key for key, value in doc.metadata.items() if value is None
]
for key in keys_with_none:
del doc.metadata[key]
assert output == expected_output
drop_vector_indexes(docsearch)