mirror of
https://github.com/hwchase17/langchain.git
synced 2025-09-10 07:21:03 +00:00
community: Apache AGE wrapper. Ensure Node Uniqueness by ID. (#28759)
**Description:** The Apache AGE graph integration incorrectly handled node merging, allowing duplicate nodes with different IDs but the same type and other properties. Unlike [Neo4j](cdf6202156/libs/community/langchain_community/graphs/neo4j_graph.py (L47)
), [Memgraph](cdf6202156/libs/community/langchain_community/graphs/memgraph_graph.py (L50)
), [Kuzu](cdf6202156/libs/community/langchain_community/graphs/kuzu_graph.py (L253)
), and [Gremlin](cdf6202156/libs/community/langchain_community/graphs/gremlin_graph.py (L165)
), it did not use the node ID as the primary identifier for merging. This inconsistency caused data integrity issues and unexpected behavior when users expected updates to specific nodes by ID. **Solution:** This PR modifies the `node_insert_query` to `MERGE` nodes based on label and ID *only* and updates properties with `SET`, aligning the behavior with other graph database integrations. The `_format_properties` method was also modified to handle id overrides. **Impact:** This fix ensures data integrity by preventing duplicate nodes, and provides a consistent behavior across graph database integrations.
This commit is contained in:
@@ -697,7 +697,8 @@ class AGEGraph(GraphStore):
|
|||||||
# query for inserting nodes
|
# query for inserting nodes
|
||||||
node_insert_query = (
|
node_insert_query = (
|
||||||
"""
|
"""
|
||||||
MERGE (n:`{label}` {properties})
|
MERGE (n:`{label}` {{`id`: "{id}"}})
|
||||||
|
SET n = {properties}
|
||||||
"""
|
"""
|
||||||
if not include_source
|
if not include_source
|
||||||
else """
|
else """
|
||||||
@@ -735,6 +736,7 @@ class AGEGraph(GraphStore):
|
|||||||
query = node_insert_query.format(
|
query = node_insert_query.format(
|
||||||
label=AGEGraph.clean_graph_labels(node.type),
|
label=AGEGraph.clean_graph_labels(node.type),
|
||||||
properties=self._format_properties(node.properties),
|
properties=self._format_properties(node.properties),
|
||||||
|
id=node.id,
|
||||||
)
|
)
|
||||||
|
|
||||||
self.query(query)
|
self.query(query)
|
||||||
|
@@ -10,7 +10,11 @@ from langchain_community.graphs.graph_document import GraphDocument, Node, Relat
|
|||||||
|
|
||||||
test_data = [
|
test_data = [
|
||||||
GraphDocument(
|
GraphDocument(
|
||||||
nodes=[Node(id="foo", type="foo"), Node(id="bar", type="bar")],
|
nodes=[
|
||||||
|
Node(id="foo", type="foo"),
|
||||||
|
Node(id="bar", type="bar"),
|
||||||
|
Node(id="foo", type="foo", properties={"property_a": "a"}),
|
||||||
|
],
|
||||||
relationships=[
|
relationships=[
|
||||||
Relationship(
|
Relationship(
|
||||||
source=Node(id="foo", type="foo"),
|
source=Node(id="foo", type="foo"),
|
||||||
|
Reference in New Issue
Block a user