mirror of
https://github.com/hwchase17/langchain.git
synced 2025-06-26 16:43:35 +00:00
community: Add support for relationship indexes in neo4j vector (#20657)
Neo4j has added relationship vector indexes. We can't populate them, but we can use existing indexes for retrieval
This commit is contained in:
parent
940242c1ec
commit
8c08cf4619
@ -68,10 +68,24 @@ class SearchType(str, enum.Enum):
|
||||
DEFAULT_SEARCH_TYPE = SearchType.VECTOR
|
||||
|
||||
|
||||
def _get_search_index_query(search_type: SearchType) -> str:
|
||||
class IndexType(str, enum.Enum):
|
||||
"""Enumerator of the index types."""
|
||||
|
||||
NODE = "NODE"
|
||||
RELATIONSHIP = "RELATIONSHIP"
|
||||
|
||||
|
||||
DEFAULT_INDEX_TYPE = IndexType.NODE
|
||||
|
||||
|
||||
def _get_search_index_query(
|
||||
search_type: SearchType, index_type: IndexType = DEFAULT_INDEX_TYPE
|
||||
) -> str:
|
||||
if index_type == IndexType.NODE:
|
||||
type_to_query_map = {
|
||||
SearchType.VECTOR: (
|
||||
"CALL db.index.vector.queryNodes($index, $k, $embedding) YIELD node, score "
|
||||
"CALL db.index.vector.queryNodes($index, $k, $embedding) "
|
||||
"YIELD node, score "
|
||||
),
|
||||
SearchType.HYBRID: (
|
||||
"CALL { "
|
||||
@ -81,8 +95,8 @@ def _get_search_index_query(search_type: SearchType) -> str:
|
||||
"UNWIND nodes AS n "
|
||||
# We use 0 as min
|
||||
"RETURN n.node AS node, (n.score / max) AS score UNION "
|
||||
"CALL db.index.fulltext.queryNodes($keyword_index, $query, {limit: $k}) "
|
||||
"YIELD node, score "
|
||||
"CALL db.index.fulltext.queryNodes($keyword_index, $query, "
|
||||
"{limit: $k}) YIELD node, score "
|
||||
"WITH collect({node:node, score:score}) AS nodes, max(score) AS max "
|
||||
"UNWIND nodes AS n "
|
||||
# We use 0 as min
|
||||
@ -93,6 +107,11 @@ def _get_search_index_query(search_type: SearchType) -> str:
|
||||
),
|
||||
}
|
||||
return type_to_query_map[search_type]
|
||||
else:
|
||||
return (
|
||||
"CALL db.index.vector.queryRelationships($index, $k, $embedding) "
|
||||
"YIELD relationship, score "
|
||||
)
|
||||
|
||||
|
||||
def check_if_not_null(props: List[str], values: List[Any]) -> None:
|
||||
@ -463,6 +482,7 @@ class Neo4jVector(VectorStore):
|
||||
pre_delete_collection: bool = False,
|
||||
retrieval_query: str = "",
|
||||
relevance_score_fn: Optional[Callable[[float], float]] = None,
|
||||
index_type: IndexType = DEFAULT_INDEX_TYPE,
|
||||
) -> None:
|
||||
try:
|
||||
import neo4j
|
||||
@ -541,6 +561,7 @@ class Neo4jVector(VectorStore):
|
||||
self.override_relevance_score_fn = relevance_score_fn
|
||||
self.retrieval_query = retrieval_query
|
||||
self.search_type = search_type
|
||||
self._index_type = index_type
|
||||
# Calculate embedding dimension
|
||||
self.embedding_dimension = len(embedding.embed_query("foo"))
|
||||
|
||||
@ -615,7 +636,7 @@ class Neo4jVector(VectorStore):
|
||||
# Flag for enterprise
|
||||
self._is_enterprise = True if db_data[0]["edition"] == "enterprise" else False
|
||||
|
||||
def retrieve_existing_index(self) -> Optional[int]:
|
||||
def retrieve_existing_index(self) -> Tuple[Optional[int], Optional[str]]:
|
||||
"""
|
||||
Check if the vector index exists in the Neo4j database
|
||||
and returns its embedding dimension.
|
||||
@ -630,11 +651,11 @@ class Neo4jVector(VectorStore):
|
||||
"""
|
||||
|
||||
index_information = self.query(
|
||||
"SHOW INDEXES YIELD name, type, labelsOrTypes, properties, options "
|
||||
"WHERE type = 'VECTOR' AND (name = $index_name "
|
||||
"SHOW INDEXES YIELD name, type, entityType, labelsOrTypes, "
|
||||
"properties, options WHERE type = 'VECTOR' AND (name = $index_name "
|
||||
"OR (labelsOrTypes[0] = $node_label AND "
|
||||
"properties[0] = $embedding_node_property)) "
|
||||
"RETURN name, labelsOrTypes, properties, options ",
|
||||
"RETURN name, entityType, labelsOrTypes, properties, options ",
|
||||
params={
|
||||
"index_name": self.index_name,
|
||||
"node_label": self.node_label,
|
||||
@ -647,13 +668,14 @@ class Neo4jVector(VectorStore):
|
||||
self.index_name = index_information[0]["name"]
|
||||
self.node_label = index_information[0]["labelsOrTypes"][0]
|
||||
self.embedding_node_property = index_information[0]["properties"][0]
|
||||
self._index_type = index_information[0]["entityType"]
|
||||
embedding_dimension = index_information[0]["options"]["indexConfig"][
|
||||
"vector.dimensions"
|
||||
]
|
||||
|
||||
return embedding_dimension
|
||||
return embedding_dimension, index_information[0]["entityType"]
|
||||
except IndexError:
|
||||
return None
|
||||
return None, None
|
||||
|
||||
def retrieve_existing_fts_index(
|
||||
self, text_node_properties: List[str] = []
|
||||
@ -754,7 +776,13 @@ class Neo4jVector(VectorStore):
|
||||
**kwargs,
|
||||
)
|
||||
# Check if the vector index already exists
|
||||
embedding_dimension = store.retrieve_existing_index()
|
||||
embedding_dimension, index_type = store.retrieve_existing_index()
|
||||
|
||||
# Raise error if relationship index type
|
||||
if index_type == "RELATIONSHIP":
|
||||
raise ValueError(
|
||||
"Data ingestion is not supported with relationship vector index."
|
||||
)
|
||||
|
||||
# If the vector index doesn't exist yet
|
||||
if not embedding_dimension:
|
||||
@ -976,9 +1004,16 @@ class Neo4jVector(VectorStore):
|
||||
index_query = base_index_query + filter_snippets + base_cosine_query
|
||||
|
||||
else:
|
||||
index_query = _get_search_index_query(self.search_type)
|
||||
index_query = _get_search_index_query(self.search_type, self._index_type)
|
||||
filter_params = {}
|
||||
|
||||
if self._index_type == IndexType.RELATIONSHIP:
|
||||
default_retrieval = (
|
||||
f"RETURN relationship.`{self.text_node_property}` AS text, score, "
|
||||
f"relationship {{.*, `{self.text_node_property}`: Null, "
|
||||
f"`{self.embedding_node_property}`: Null, id: Null }} AS metadata"
|
||||
)
|
||||
else:
|
||||
default_retrieval = (
|
||||
f"RETURN node.`{self.text_node_property}` AS text, score, "
|
||||
f"node {{.*, `{self.text_node_property}`: Null, "
|
||||
@ -1141,7 +1176,15 @@ class Neo4jVector(VectorStore):
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
embedding_dimension = store.retrieve_existing_index()
|
||||
embedding_dimension, index_type = store.retrieve_existing_index()
|
||||
|
||||
# Raise error if relationship index type
|
||||
if index_type == "RELATIONSHIP":
|
||||
raise ValueError(
|
||||
"Relationship vector index is not supported with "
|
||||
"`from_existing_index` method. Please use the "
|
||||
"`from_existing_relationship_index` method."
|
||||
)
|
||||
|
||||
if not embedding_dimension:
|
||||
raise ValueError(
|
||||
@ -1174,6 +1217,61 @@ class Neo4jVector(VectorStore):
|
||||
|
||||
return store
|
||||
|
||||
@classmethod
|
||||
def from_existing_relationship_index(
|
||||
cls: Type[Neo4jVector],
|
||||
embedding: Embeddings,
|
||||
index_name: str,
|
||||
search_type: SearchType = DEFAULT_SEARCH_TYPE,
|
||||
**kwargs: Any,
|
||||
) -> Neo4jVector:
|
||||
"""
|
||||
Get instance of an existing Neo4j relationship vector index.
|
||||
This method will return the instance of the store without
|
||||
inserting any new embeddings.
|
||||
Neo4j credentials are required in the form of `url`, `username`,
|
||||
and `password` and optional `database` parameters along with
|
||||
the `index_name` definition.
|
||||
"""
|
||||
|
||||
if search_type == SearchType.HYBRID:
|
||||
raise ValueError(
|
||||
"Hybrid search is not supported in combination "
|
||||
"with relationship vector index"
|
||||
)
|
||||
|
||||
store = cls(
|
||||
embedding=embedding,
|
||||
index_name=index_name,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
embedding_dimension, index_type = store.retrieve_existing_index()
|
||||
|
||||
if not embedding_dimension:
|
||||
raise ValueError(
|
||||
"The specified vector index name does not exist. "
|
||||
"Make sure to check if you spelled it correctly"
|
||||
)
|
||||
# Raise error if relationship index type
|
||||
if index_type == "NODE":
|
||||
raise ValueError(
|
||||
"Node vector index is not supported with "
|
||||
"`from_existing_relationship_index` method. Please use the "
|
||||
"`from_existing_index` method."
|
||||
)
|
||||
|
||||
# Check if embedding function and vector index dimensions match
|
||||
if not store.embedding_dimension == embedding_dimension:
|
||||
raise ValueError(
|
||||
"The provided embedding function and vector index "
|
||||
"dimensions do not match.\n"
|
||||
f"Embedding function dimension: {store.embedding_dimension}\n"
|
||||
f"Vector index dimension: {embedding_dimension}"
|
||||
)
|
||||
|
||||
return store
|
||||
|
||||
@classmethod
|
||||
def from_documents(
|
||||
cls: Type[Neo4jVector],
|
||||
@ -1266,7 +1364,15 @@ class Neo4jVector(VectorStore):
|
||||
)
|
||||
|
||||
# Check if the vector index already exists
|
||||
embedding_dimension = store.retrieve_existing_index()
|
||||
embedding_dimension, index_type = store.retrieve_existing_index()
|
||||
|
||||
# Raise error if relationship index type
|
||||
if index_type == "RELATIONSHIP":
|
||||
raise ValueError(
|
||||
"`from_existing_graph` method does not support "
|
||||
" existing relationship vector index. "
|
||||
"Please use `from_existing_relationship_index` method"
|
||||
)
|
||||
|
||||
# If the vector index doesn't exist yet
|
||||
if not embedding_dimension:
|
||||
|
@ -43,7 +43,9 @@ def drop_vector_indexes(store: Neo4jVector) -> None:
|
||||
"""
|
||||
)
|
||||
for index in all_indexes:
|
||||
store.query(f"DROP INDEX {index['name']}")
|
||||
store.query(f"DROP INDEX `{index['name']}`")
|
||||
|
||||
store.query("MATCH (n) DETACH DELETE n;")
|
||||
|
||||
|
||||
class FakeEmbeddingsWithOsDimension(FakeEmbeddings):
|
||||
@ -812,3 +814,91 @@ def test_metadata_filters_type1() -> None:
|
||||
|
||||
assert output == expected_output
|
||||
drop_vector_indexes(docsearch)
|
||||
|
||||
|
||||
def test_neo4jvector_relationship_index() -> None:
|
||||
"""Test end to end construction and search."""
|
||||
embeddings = FakeEmbeddingsWithOsDimension()
|
||||
docsearch = Neo4jVector.from_texts(
|
||||
texts=texts,
|
||||
embedding=embeddings,
|
||||
url=url,
|
||||
username=username,
|
||||
password=password,
|
||||
pre_delete_collection=True,
|
||||
)
|
||||
# Ingest data
|
||||
docsearch.query(
|
||||
(
|
||||
"CREATE ()-[:REL {text: 'foo', embedding: $e1}]->()"
|
||||
", ()-[:REL {text: 'far', embedding: $e2}]->()"
|
||||
),
|
||||
params={
|
||||
"e1": embeddings.embed_query("foo"),
|
||||
"e2": embeddings.embed_query("bar"),
|
||||
},
|
||||
)
|
||||
# Create relationship index
|
||||
docsearch.query(
|
||||
"""CREATE VECTOR INDEX `relationship`
|
||||
FOR ()-[r:REL]-() ON (r.embedding)
|
||||
OPTIONS {indexConfig: {
|
||||
`vector.dimensions`: 1536,
|
||||
`vector.similarity_function`: 'cosine'
|
||||
}}
|
||||
"""
|
||||
)
|
||||
relationship_index = Neo4jVector.from_existing_relationship_index(
|
||||
embeddings, index_name="relationship"
|
||||
)
|
||||
|
||||
output = relationship_index.similarity_search("foo", k=1)
|
||||
assert output == [Document(page_content="foo")]
|
||||
|
||||
drop_vector_indexes(docsearch)
|
||||
|
||||
|
||||
def test_neo4jvector_relationship_index_retrieval() -> None:
|
||||
"""Test end to end construction and search."""
|
||||
embeddings = FakeEmbeddingsWithOsDimension()
|
||||
docsearch = Neo4jVector.from_texts(
|
||||
texts=texts,
|
||||
embedding=embeddings,
|
||||
url=url,
|
||||
username=username,
|
||||
password=password,
|
||||
pre_delete_collection=True,
|
||||
)
|
||||
# Ingest data
|
||||
docsearch.query(
|
||||
(
|
||||
"CREATE ({node:'text'})-[:REL {text: 'foo', embedding: $e1}]->()"
|
||||
", ({node:'text'})-[:REL {text: 'far', embedding: $e2}]->()"
|
||||
),
|
||||
params={
|
||||
"e1": embeddings.embed_query("foo"),
|
||||
"e2": embeddings.embed_query("bar"),
|
||||
},
|
||||
)
|
||||
# Create relationship index
|
||||
docsearch.query(
|
||||
"""CREATE VECTOR INDEX `relationship`
|
||||
FOR ()-[r:REL]-() ON (r.embedding)
|
||||
OPTIONS {indexConfig: {
|
||||
`vector.dimensions`: 1536,
|
||||
`vector.similarity_function`: 'cosine'
|
||||
}}
|
||||
"""
|
||||
)
|
||||
retrieval_query = (
|
||||
"RETURN relationship.text + '-' + startNode(relationship).node "
|
||||
"AS text, score, {foo:'bar'} AS metadata"
|
||||
)
|
||||
relationship_index = Neo4jVector.from_existing_relationship_index(
|
||||
embeddings, index_name="relationship", retrieval_query=retrieval_query
|
||||
)
|
||||
|
||||
output = relationship_index.similarity_search("foo", k=1)
|
||||
assert output == [Document(page_content="foo-text", metadata={"foo": "bar"})]
|
||||
|
||||
drop_vector_indexes(docsearch)
|
||||
|
Loading…
Reference in New Issue
Block a user