Diffbot Graph Transformer / Neo4j Graph document ingestion (#9979)

Co-authored-by: Bagatur <baskaryan@gmail.com>
This commit is contained in:
Tomaz Bratanic
2023-09-06 22:32:59 +02:00
committed by GitHub
parent ccb9e3ee2d
commit db73c9d5b5
7 changed files with 761 additions and 1 deletions

View File

@@ -0,0 +1,51 @@
from __future__ import annotations
from typing import List, Union
from langchain.load.serializable import Serializable
from langchain.pydantic_v1 import Field
from langchain.schema import Document
class Node(Serializable):
"""Represents a node in a graph with associated properties.
Attributes:
id (Union[str, int]): A unique identifier for the node.
type (str): The type or label of the node, default is "Node".
properties (dict): Additional properties and metadata associated with the node.
"""
id: Union[str, int]
type: str = "Node"
properties: dict = Field(default_factory=dict)
class Relationship(Serializable):
"""Represents a directed relationship between two nodes in a graph.
Attributes:
source (Node): The source node of the relationship.
target (Node): The target node of the relationship.
type (str): The type of the relationship.
properties (dict): Additional properties associated with the relationship.
"""
source: Node
target: Node
type: str
properties: dict = Field(default_factory=dict)
class GraphDocument(Serializable):
"""Represents a graph document consisting of nodes and relationships.
Attributes:
nodes (List[Node]): A list of nodes in the graph.
relationships (List[Relationship]): A list of relationships in the graph.
source (Document): The document from which the graph information is derived.
"""
nodes: List[Node]
relationships: List[Relationship]
source: Document

View File

@@ -1,5 +1,7 @@
from typing import Any, Dict, List
from langchain.graphs.graph_document import GraphDocument
node_properties_query = """
CALL apoc.meta.data()
YIELD label, other, elementType, type, property
@@ -99,3 +101,56 @@ class Neo4jGraph:
The relationships are the following:
{[el['output'] for el in relationships]}
"""
def add_graph_documents(
self, graph_documents: List[GraphDocument], include_source: bool = False
) -> None:
"""
Take GraphDocument as input as uses it to construct a graph.
"""
for document in graph_documents:
include_docs_query = (
"CREATE (d:Document) "
"SET d.text = $document.page_content "
"SET d += $document.metadata "
"WITH d "
)
# Import nodes
self.query(
(
f"{include_docs_query if include_source else ''}"
"UNWIND $data AS row "
"CALL apoc.merge.node([row.type], {id: row.id}, "
"row.properties, {}) YIELD node "
f"{'MERGE (d)-[:MENTIONS]->(node) ' if include_source else ''}"
"RETURN distinct 'done' AS result"
),
{
"data": [el.__dict__ for el in document.nodes],
"document": document.source.__dict__,
},
)
# Import relationships
self.query(
"UNWIND $data AS row "
"CALL apoc.merge.node([row.source_label], {id: row.source},"
"{}, {}) YIELD node as source "
"CALL apoc.merge.node([row.target_label], {id: row.target},"
"{}, {}) YIELD node as target "
"CALL apoc.merge.relationship(source, row.type, "
"{}, row.properties, target) YIELD rel "
"RETURN distinct 'done'",
{
"data": [
{
"source": el.source.id,
"source_label": el.source.type,
"target": el.target.id,
"target_label": el.target.type,
"type": el.type.replace(" ", "_").upper(),
"properties": el.properties,
}
for el in document.relationships
]
},
)