mirror of
https://github.com/hwchase17/langchain.git
synced 2025-06-26 16:43:35 +00:00
community: Add support for relationship indexes in neo4j vector (#20657)
Neo4j has added relationship vector indexes. We can't populate them, but we can use existing indexes for retrieval
This commit is contained in:
parent
940242c1ec
commit
8c08cf4619
@ -68,10 +68,24 @@ class SearchType(str, enum.Enum):
|
|||||||
DEFAULT_SEARCH_TYPE = SearchType.VECTOR
|
DEFAULT_SEARCH_TYPE = SearchType.VECTOR
|
||||||
|
|
||||||
|
|
||||||
def _get_search_index_query(search_type: SearchType) -> str:
|
class IndexType(str, enum.Enum):
|
||||||
|
"""Enumerator of the index types."""
|
||||||
|
|
||||||
|
NODE = "NODE"
|
||||||
|
RELATIONSHIP = "RELATIONSHIP"
|
||||||
|
|
||||||
|
|
||||||
|
DEFAULT_INDEX_TYPE = IndexType.NODE
|
||||||
|
|
||||||
|
|
||||||
|
def _get_search_index_query(
|
||||||
|
search_type: SearchType, index_type: IndexType = DEFAULT_INDEX_TYPE
|
||||||
|
) -> str:
|
||||||
|
if index_type == IndexType.NODE:
|
||||||
type_to_query_map = {
|
type_to_query_map = {
|
||||||
SearchType.VECTOR: (
|
SearchType.VECTOR: (
|
||||||
"CALL db.index.vector.queryNodes($index, $k, $embedding) YIELD node, score "
|
"CALL db.index.vector.queryNodes($index, $k, $embedding) "
|
||||||
|
"YIELD node, score "
|
||||||
),
|
),
|
||||||
SearchType.HYBRID: (
|
SearchType.HYBRID: (
|
||||||
"CALL { "
|
"CALL { "
|
||||||
@ -81,8 +95,8 @@ def _get_search_index_query(search_type: SearchType) -> str:
|
|||||||
"UNWIND nodes AS n "
|
"UNWIND nodes AS n "
|
||||||
# We use 0 as min
|
# We use 0 as min
|
||||||
"RETURN n.node AS node, (n.score / max) AS score UNION "
|
"RETURN n.node AS node, (n.score / max) AS score UNION "
|
||||||
"CALL db.index.fulltext.queryNodes($keyword_index, $query, {limit: $k}) "
|
"CALL db.index.fulltext.queryNodes($keyword_index, $query, "
|
||||||
"YIELD node, score "
|
"{limit: $k}) YIELD node, score "
|
||||||
"WITH collect({node:node, score:score}) AS nodes, max(score) AS max "
|
"WITH collect({node:node, score:score}) AS nodes, max(score) AS max "
|
||||||
"UNWIND nodes AS n "
|
"UNWIND nodes AS n "
|
||||||
# We use 0 as min
|
# We use 0 as min
|
||||||
@ -93,6 +107,11 @@ def _get_search_index_query(search_type: SearchType) -> str:
|
|||||||
),
|
),
|
||||||
}
|
}
|
||||||
return type_to_query_map[search_type]
|
return type_to_query_map[search_type]
|
||||||
|
else:
|
||||||
|
return (
|
||||||
|
"CALL db.index.vector.queryRelationships($index, $k, $embedding) "
|
||||||
|
"YIELD relationship, score "
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def check_if_not_null(props: List[str], values: List[Any]) -> None:
|
def check_if_not_null(props: List[str], values: List[Any]) -> None:
|
||||||
@ -463,6 +482,7 @@ class Neo4jVector(VectorStore):
|
|||||||
pre_delete_collection: bool = False,
|
pre_delete_collection: bool = False,
|
||||||
retrieval_query: str = "",
|
retrieval_query: str = "",
|
||||||
relevance_score_fn: Optional[Callable[[float], float]] = None,
|
relevance_score_fn: Optional[Callable[[float], float]] = None,
|
||||||
|
index_type: IndexType = DEFAULT_INDEX_TYPE,
|
||||||
) -> None:
|
) -> None:
|
||||||
try:
|
try:
|
||||||
import neo4j
|
import neo4j
|
||||||
@ -541,6 +561,7 @@ class Neo4jVector(VectorStore):
|
|||||||
self.override_relevance_score_fn = relevance_score_fn
|
self.override_relevance_score_fn = relevance_score_fn
|
||||||
self.retrieval_query = retrieval_query
|
self.retrieval_query = retrieval_query
|
||||||
self.search_type = search_type
|
self.search_type = search_type
|
||||||
|
self._index_type = index_type
|
||||||
# Calculate embedding dimension
|
# Calculate embedding dimension
|
||||||
self.embedding_dimension = len(embedding.embed_query("foo"))
|
self.embedding_dimension = len(embedding.embed_query("foo"))
|
||||||
|
|
||||||
@ -615,7 +636,7 @@ class Neo4jVector(VectorStore):
|
|||||||
# Flag for enterprise
|
# Flag for enterprise
|
||||||
self._is_enterprise = True if db_data[0]["edition"] == "enterprise" else False
|
self._is_enterprise = True if db_data[0]["edition"] == "enterprise" else False
|
||||||
|
|
||||||
def retrieve_existing_index(self) -> Optional[int]:
|
def retrieve_existing_index(self) -> Tuple[Optional[int], Optional[str]]:
|
||||||
"""
|
"""
|
||||||
Check if the vector index exists in the Neo4j database
|
Check if the vector index exists in the Neo4j database
|
||||||
and returns its embedding dimension.
|
and returns its embedding dimension.
|
||||||
@ -630,11 +651,11 @@ class Neo4jVector(VectorStore):
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
index_information = self.query(
|
index_information = self.query(
|
||||||
"SHOW INDEXES YIELD name, type, labelsOrTypes, properties, options "
|
"SHOW INDEXES YIELD name, type, entityType, labelsOrTypes, "
|
||||||
"WHERE type = 'VECTOR' AND (name = $index_name "
|
"properties, options WHERE type = 'VECTOR' AND (name = $index_name "
|
||||||
"OR (labelsOrTypes[0] = $node_label AND "
|
"OR (labelsOrTypes[0] = $node_label AND "
|
||||||
"properties[0] = $embedding_node_property)) "
|
"properties[0] = $embedding_node_property)) "
|
||||||
"RETURN name, labelsOrTypes, properties, options ",
|
"RETURN name, entityType, labelsOrTypes, properties, options ",
|
||||||
params={
|
params={
|
||||||
"index_name": self.index_name,
|
"index_name": self.index_name,
|
||||||
"node_label": self.node_label,
|
"node_label": self.node_label,
|
||||||
@ -647,13 +668,14 @@ class Neo4jVector(VectorStore):
|
|||||||
self.index_name = index_information[0]["name"]
|
self.index_name = index_information[0]["name"]
|
||||||
self.node_label = index_information[0]["labelsOrTypes"][0]
|
self.node_label = index_information[0]["labelsOrTypes"][0]
|
||||||
self.embedding_node_property = index_information[0]["properties"][0]
|
self.embedding_node_property = index_information[0]["properties"][0]
|
||||||
|
self._index_type = index_information[0]["entityType"]
|
||||||
embedding_dimension = index_information[0]["options"]["indexConfig"][
|
embedding_dimension = index_information[0]["options"]["indexConfig"][
|
||||||
"vector.dimensions"
|
"vector.dimensions"
|
||||||
]
|
]
|
||||||
|
|
||||||
return embedding_dimension
|
return embedding_dimension, index_information[0]["entityType"]
|
||||||
except IndexError:
|
except IndexError:
|
||||||
return None
|
return None, None
|
||||||
|
|
||||||
def retrieve_existing_fts_index(
|
def retrieve_existing_fts_index(
|
||||||
self, text_node_properties: List[str] = []
|
self, text_node_properties: List[str] = []
|
||||||
@ -754,7 +776,13 @@ class Neo4jVector(VectorStore):
|
|||||||
**kwargs,
|
**kwargs,
|
||||||
)
|
)
|
||||||
# Check if the vector index already exists
|
# Check if the vector index already exists
|
||||||
embedding_dimension = store.retrieve_existing_index()
|
embedding_dimension, index_type = store.retrieve_existing_index()
|
||||||
|
|
||||||
|
# Raise error if relationship index type
|
||||||
|
if index_type == "RELATIONSHIP":
|
||||||
|
raise ValueError(
|
||||||
|
"Data ingestion is not supported with relationship vector index."
|
||||||
|
)
|
||||||
|
|
||||||
# If the vector index doesn't exist yet
|
# If the vector index doesn't exist yet
|
||||||
if not embedding_dimension:
|
if not embedding_dimension:
|
||||||
@ -976,9 +1004,16 @@ class Neo4jVector(VectorStore):
|
|||||||
index_query = base_index_query + filter_snippets + base_cosine_query
|
index_query = base_index_query + filter_snippets + base_cosine_query
|
||||||
|
|
||||||
else:
|
else:
|
||||||
index_query = _get_search_index_query(self.search_type)
|
index_query = _get_search_index_query(self.search_type, self._index_type)
|
||||||
filter_params = {}
|
filter_params = {}
|
||||||
|
|
||||||
|
if self._index_type == IndexType.RELATIONSHIP:
|
||||||
|
default_retrieval = (
|
||||||
|
f"RETURN relationship.`{self.text_node_property}` AS text, score, "
|
||||||
|
f"relationship {{.*, `{self.text_node_property}`: Null, "
|
||||||
|
f"`{self.embedding_node_property}`: Null, id: Null }} AS metadata"
|
||||||
|
)
|
||||||
|
else:
|
||||||
default_retrieval = (
|
default_retrieval = (
|
||||||
f"RETURN node.`{self.text_node_property}` AS text, score, "
|
f"RETURN node.`{self.text_node_property}` AS text, score, "
|
||||||
f"node {{.*, `{self.text_node_property}`: Null, "
|
f"node {{.*, `{self.text_node_property}`: Null, "
|
||||||
@ -1141,7 +1176,15 @@ class Neo4jVector(VectorStore):
|
|||||||
**kwargs,
|
**kwargs,
|
||||||
)
|
)
|
||||||
|
|
||||||
embedding_dimension = store.retrieve_existing_index()
|
embedding_dimension, index_type = store.retrieve_existing_index()
|
||||||
|
|
||||||
|
# Raise error if relationship index type
|
||||||
|
if index_type == "RELATIONSHIP":
|
||||||
|
raise ValueError(
|
||||||
|
"Relationship vector index is not supported with "
|
||||||
|
"`from_existing_index` method. Please use the "
|
||||||
|
"`from_existing_relationship_index` method."
|
||||||
|
)
|
||||||
|
|
||||||
if not embedding_dimension:
|
if not embedding_dimension:
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
@ -1174,6 +1217,61 @@ class Neo4jVector(VectorStore):
|
|||||||
|
|
||||||
return store
|
return store
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def from_existing_relationship_index(
|
||||||
|
cls: Type[Neo4jVector],
|
||||||
|
embedding: Embeddings,
|
||||||
|
index_name: str,
|
||||||
|
search_type: SearchType = DEFAULT_SEARCH_TYPE,
|
||||||
|
**kwargs: Any,
|
||||||
|
) -> Neo4jVector:
|
||||||
|
"""
|
||||||
|
Get instance of an existing Neo4j relationship vector index.
|
||||||
|
This method will return the instance of the store without
|
||||||
|
inserting any new embeddings.
|
||||||
|
Neo4j credentials are required in the form of `url`, `username`,
|
||||||
|
and `password` and optional `database` parameters along with
|
||||||
|
the `index_name` definition.
|
||||||
|
"""
|
||||||
|
|
||||||
|
if search_type == SearchType.HYBRID:
|
||||||
|
raise ValueError(
|
||||||
|
"Hybrid search is not supported in combination "
|
||||||
|
"with relationship vector index"
|
||||||
|
)
|
||||||
|
|
||||||
|
store = cls(
|
||||||
|
embedding=embedding,
|
||||||
|
index_name=index_name,
|
||||||
|
**kwargs,
|
||||||
|
)
|
||||||
|
|
||||||
|
embedding_dimension, index_type = store.retrieve_existing_index()
|
||||||
|
|
||||||
|
if not embedding_dimension:
|
||||||
|
raise ValueError(
|
||||||
|
"The specified vector index name does not exist. "
|
||||||
|
"Make sure to check if you spelled it correctly"
|
||||||
|
)
|
||||||
|
# Raise error if relationship index type
|
||||||
|
if index_type == "NODE":
|
||||||
|
raise ValueError(
|
||||||
|
"Node vector index is not supported with "
|
||||||
|
"`from_existing_relationship_index` method. Please use the "
|
||||||
|
"`from_existing_index` method."
|
||||||
|
)
|
||||||
|
|
||||||
|
# Check if embedding function and vector index dimensions match
|
||||||
|
if not store.embedding_dimension == embedding_dimension:
|
||||||
|
raise ValueError(
|
||||||
|
"The provided embedding function and vector index "
|
||||||
|
"dimensions do not match.\n"
|
||||||
|
f"Embedding function dimension: {store.embedding_dimension}\n"
|
||||||
|
f"Vector index dimension: {embedding_dimension}"
|
||||||
|
)
|
||||||
|
|
||||||
|
return store
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def from_documents(
|
def from_documents(
|
||||||
cls: Type[Neo4jVector],
|
cls: Type[Neo4jVector],
|
||||||
@ -1266,7 +1364,15 @@ class Neo4jVector(VectorStore):
|
|||||||
)
|
)
|
||||||
|
|
||||||
# Check if the vector index already exists
|
# Check if the vector index already exists
|
||||||
embedding_dimension = store.retrieve_existing_index()
|
embedding_dimension, index_type = store.retrieve_existing_index()
|
||||||
|
|
||||||
|
# Raise error if relationship index type
|
||||||
|
if index_type == "RELATIONSHIP":
|
||||||
|
raise ValueError(
|
||||||
|
"`from_existing_graph` method does not support "
|
||||||
|
" existing relationship vector index. "
|
||||||
|
"Please use `from_existing_relationship_index` method"
|
||||||
|
)
|
||||||
|
|
||||||
# If the vector index doesn't exist yet
|
# If the vector index doesn't exist yet
|
||||||
if not embedding_dimension:
|
if not embedding_dimension:
|
||||||
|
@ -43,7 +43,9 @@ def drop_vector_indexes(store: Neo4jVector) -> None:
|
|||||||
"""
|
"""
|
||||||
)
|
)
|
||||||
for index in all_indexes:
|
for index in all_indexes:
|
||||||
store.query(f"DROP INDEX {index['name']}")
|
store.query(f"DROP INDEX `{index['name']}`")
|
||||||
|
|
||||||
|
store.query("MATCH (n) DETACH DELETE n;")
|
||||||
|
|
||||||
|
|
||||||
class FakeEmbeddingsWithOsDimension(FakeEmbeddings):
|
class FakeEmbeddingsWithOsDimension(FakeEmbeddings):
|
||||||
@ -812,3 +814,91 @@ def test_metadata_filters_type1() -> None:
|
|||||||
|
|
||||||
assert output == expected_output
|
assert output == expected_output
|
||||||
drop_vector_indexes(docsearch)
|
drop_vector_indexes(docsearch)
|
||||||
|
|
||||||
|
|
||||||
|
def test_neo4jvector_relationship_index() -> None:
|
||||||
|
"""Test end to end construction and search."""
|
||||||
|
embeddings = FakeEmbeddingsWithOsDimension()
|
||||||
|
docsearch = Neo4jVector.from_texts(
|
||||||
|
texts=texts,
|
||||||
|
embedding=embeddings,
|
||||||
|
url=url,
|
||||||
|
username=username,
|
||||||
|
password=password,
|
||||||
|
pre_delete_collection=True,
|
||||||
|
)
|
||||||
|
# Ingest data
|
||||||
|
docsearch.query(
|
||||||
|
(
|
||||||
|
"CREATE ()-[:REL {text: 'foo', embedding: $e1}]->()"
|
||||||
|
", ()-[:REL {text: 'far', embedding: $e2}]->()"
|
||||||
|
),
|
||||||
|
params={
|
||||||
|
"e1": embeddings.embed_query("foo"),
|
||||||
|
"e2": embeddings.embed_query("bar"),
|
||||||
|
},
|
||||||
|
)
|
||||||
|
# Create relationship index
|
||||||
|
docsearch.query(
|
||||||
|
"""CREATE VECTOR INDEX `relationship`
|
||||||
|
FOR ()-[r:REL]-() ON (r.embedding)
|
||||||
|
OPTIONS {indexConfig: {
|
||||||
|
`vector.dimensions`: 1536,
|
||||||
|
`vector.similarity_function`: 'cosine'
|
||||||
|
}}
|
||||||
|
"""
|
||||||
|
)
|
||||||
|
relationship_index = Neo4jVector.from_existing_relationship_index(
|
||||||
|
embeddings, index_name="relationship"
|
||||||
|
)
|
||||||
|
|
||||||
|
output = relationship_index.similarity_search("foo", k=1)
|
||||||
|
assert output == [Document(page_content="foo")]
|
||||||
|
|
||||||
|
drop_vector_indexes(docsearch)
|
||||||
|
|
||||||
|
|
||||||
|
def test_neo4jvector_relationship_index_retrieval() -> None:
|
||||||
|
"""Test end to end construction and search."""
|
||||||
|
embeddings = FakeEmbeddingsWithOsDimension()
|
||||||
|
docsearch = Neo4jVector.from_texts(
|
||||||
|
texts=texts,
|
||||||
|
embedding=embeddings,
|
||||||
|
url=url,
|
||||||
|
username=username,
|
||||||
|
password=password,
|
||||||
|
pre_delete_collection=True,
|
||||||
|
)
|
||||||
|
# Ingest data
|
||||||
|
docsearch.query(
|
||||||
|
(
|
||||||
|
"CREATE ({node:'text'})-[:REL {text: 'foo', embedding: $e1}]->()"
|
||||||
|
", ({node:'text'})-[:REL {text: 'far', embedding: $e2}]->()"
|
||||||
|
),
|
||||||
|
params={
|
||||||
|
"e1": embeddings.embed_query("foo"),
|
||||||
|
"e2": embeddings.embed_query("bar"),
|
||||||
|
},
|
||||||
|
)
|
||||||
|
# Create relationship index
|
||||||
|
docsearch.query(
|
||||||
|
"""CREATE VECTOR INDEX `relationship`
|
||||||
|
FOR ()-[r:REL]-() ON (r.embedding)
|
||||||
|
OPTIONS {indexConfig: {
|
||||||
|
`vector.dimensions`: 1536,
|
||||||
|
`vector.similarity_function`: 'cosine'
|
||||||
|
}}
|
||||||
|
"""
|
||||||
|
)
|
||||||
|
retrieval_query = (
|
||||||
|
"RETURN relationship.text + '-' + startNode(relationship).node "
|
||||||
|
"AS text, score, {foo:'bar'} AS metadata"
|
||||||
|
)
|
||||||
|
relationship_index = Neo4jVector.from_existing_relationship_index(
|
||||||
|
embeddings, index_name="relationship", retrieval_query=retrieval_query
|
||||||
|
)
|
||||||
|
|
||||||
|
output = relationship_index.similarity_search("foo", k=1)
|
||||||
|
assert output == [Document(page_content="foo-text", metadata={"foo": "bar"})]
|
||||||
|
|
||||||
|
drop_vector_indexes(docsearch)
|
||||||
|
Loading…
Reference in New Issue
Block a user