experimental: LLMGraphTransformer - added relationship properties. (#21856)

- **Description:** The generated relationships in the graph had no properties, but the Relationship class was properly defined with properties. This made it very difficult to transform conditional sentences into a graph. Adding properties to relationships can solve this issue elegantly. The changes expand on the existing LLMGraphTransformer implementation but add the possibility to define allowed relationship properties like this: LLMGraphTransformer(llm=llm, relationship_properties=["Condition", "Time"],) - **Issue:** no issue found - **Dependencies:** n/a - **Twitter handle:** @IstvanSpace -Quick Test ================================================================= from dotenv import load_dotenv import os from langchain_community.graphs import Neo4jGraph from langchain_experimental.graph_transformers import LLMGraphTransformer from langchain_openai import ChatOpenAI from langchain_core.prompts import ChatPromptTemplate from langchain_core.documents import Document load_dotenv() os.environ["NEO4J_URI"] = os.getenv("NEO4J_URI") os.environ["NEO4J_USERNAME"] = os.getenv("NEO4J_USERNAME") os.environ["NEO4J_PASSWORD"] = os.getenv("NEO4J_PASSWORD") graph = Neo4jGraph() llm = ChatOpenAI(temperature=0, model_name="gpt-4o") llm_transformer = LLMGraphTransformer(llm=llm) #text = "Harry potter likes pies, but only if it rains outside" text = "Jack has a dog named Max. Jack only walks Max if it is sunny outside." documents = [Document(page_content=text)] llm_transformer_props = LLMGraphTransformer( llm=llm, relationship_properties=["Condition"], ) graph_documents_props = llm_transformer_props.convert_to_graph_documents(documents) print(f"Nodes:{graph_documents_props[0].nodes}") print(f"Relationships:{graph_documents_props[0].relationships}") graph.add_graph_documents(graph_documents_props) --------- Co-authored-by: Istvan Lorincz <istvan.lorincz@pm.me> Co-authored-by: Bagatur <22008038+baskaryan@users.noreply.github.com>
2025-07-03 11:47:49 +00:00 · 2024-06-14 11:41:04 -07:00 · 2024-06-14 11:41:04 -07:00 · 513e491ce9
commit 513e491ce9
parent 694ae87748
1 changed files with 128 additions and 42 deletions
--- a/libs/experimental/langchain_experimental/graph_transformers/llm.py
+++ b/libs/experimental/langchain_experimental/graph_transformers/llm.py
@ -72,8 +72,8 @@ system_prompt = (
    "You are a top-tier algorithm designed for extracting information in structured "
    "formats to build a knowledge graph.\n"
    "Try to capture as much information from the text as possible without "
-    "sacrifing accuracy. Do not add any information that is not explicitly "
+    "sacrificing accuracy. Do not add any information that is not explicitly "
-    "mentioned in the text\n"
+    "mentioned in the text.\n"
    "- **Nodes** represent entities and concepts.\n"
    "- The aim is to achieve simplicity and clarity in the knowledge graph, making it\n"
    "accessible for a vast audience.\n"
@ -82,7 +82,7 @@ system_prompt = (
    "Ensure you use basic or elementary types for node labels.\n"
    "- For example, when you identify an entity representing a person, "
    "always label it as **'person'**. Avoid using more specific terms "
-    "like 'mathematician' or 'scientist'"
+    "like 'mathematician' or 'scientist'."
    "- **Node IDs**: Never utilize integers as node IDs. Node IDs should be "
    "names or human-readable identifiers found in the text.\n"
    "- **Relationships** represent connections between entities or concepts.\n"
@ -138,8 +138,8 @@ def _get_additional_info(input_type: str) -> str:
    elif input_type == "relationship":
        return (
            "Instead of using specific and momentary types such as "
-            "'BECAME_PROFESSOR', use more general and timeless relationship types like "
+            "'BECAME_PROFESSOR', use more general and timeless relationship types "
-            "'PROFESSOR'. However, do not sacrifice any accuracy for generality"
+            "like 'PROFESSOR'. However, do not sacrifice any accuracy for generality"
        )
    elif input_type == "property":
        return ""
@ -280,10 +280,32 @@ def create_simple_model(
    rel_types: Optional[List[str]] = None,
    node_properties: Union[bool, List[str]] = False,
    llm_type: Optional[str] = None,
    relationship_properties: Union[bool, List[str]] = False,
 ) -> Type[_Graph]:
    """
-    Simple model allows to limit node and/or relationship types.
+    Create a simple graph model with optional constraints on node
-    Doesn't have any node or relationship properties.
+    and relationship types.
    Args:
        node_labels (Optional[List[str]]): Specifies the allowed node types.
            Defaults to None, allowing all node types.
        rel_types (Optional[List[str]]): Specifies the allowed relationship types.
            Defaults to None, allowing all relationship types.
        node_properties (Union[bool, List[str]]): Specifies if node properties should
            be included. If a list is provided, only properties with keys in the list
            will be included. If True, all properties are included. Defaults to False.
        relationship_properties (Union[bool, List[str]]): Specifies if relationship
            properties should be included. If a list is provided, only properties with
            keys in the list will be included. If True, all properties are included.
            Defaults to False.
        llm_type (Optional[str]): The type of the language model. Defaults to None.
            Only openai supports enum param: openai-chat.
    Returns:
        Type[_Graph]: A graph model with the specified constraints.
    Raises:
        ValueError: If 'id' is included in the node or relationship properties list.
    """
    node_fields: Dict[str, Tuple[Any, Any]] = {
@ -325,39 +347,80 @@ def create_simple_model(
        )
    SimpleNode = create_model("SimpleNode", **node_fields)  # type: ignore
-    class SimpleRelationship(BaseModel):
+    relationship_fields: Dict[str, Tuple[Any, Any]] = {
-        """Represents a directed relationship between two nodes in a graph."""
+        "source_node_id": (
-
+            str,
-        source_node_id: str = Field(
+            Field(
-            description="Name or human-readable unique identifier of source node"
+                ...,
-        )
+                description="Name or human-readable unique identifier of source node",
-        source_node_type: str = optional_enum_field(
+            ),
        ),
        "source_node_type": (
            str,
            optional_enum_field(
                node_labels,
                description="The type or label of the source node.",
                input_type="node",
-            llm_type=llm_type,
+            ),
-        )
+        ),
-        target_node_id: str = Field(
+        "target_node_id": (
-            description="Name or human-readable unique identifier of target node"
+            str,
-        )
+            Field(
-        target_node_type: str = optional_enum_field(
+                ...,
                description="Name or human-readable unique identifier of target node",
            ),
        ),
        "target_node_type": (
            str,
            optional_enum_field(
                node_labels,
                description="The type or label of the target node.",
                input_type="node",
-            llm_type=llm_type,
+            ),
-        )
+        ),
-        type: str = optional_enum_field(
+        "type": (
            str,
            optional_enum_field(
                rel_types,
                description="The type of the relationship.",
                input_type="relationship",
-            llm_type=llm_type,
+            ),
        ),
    }
    if relationship_properties:
        if (
            isinstance(relationship_properties, list)
            and "id" in relationship_properties
        ):
            raise ValueError(
                "The relationship property 'id' is reserved and cannot be used."
            )
        # Map True to empty array
        relationship_properties_mapped: List[str] = (
            [] if relationship_properties is True else relationship_properties
        )
        class RelationshipProperty(BaseModel):
            """A single property consisting of key and value"""
            key: str = optional_enum_field(
                relationship_properties_mapped,
                description="Property key.",
                input_type="property",
            )
            value: str = Field(..., description="value")
        relationship_fields["properties"] = (
            Optional[List[RelationshipProperty]],
            Field(None, description="List of relationship properties"),
        )
    SimpleRelationship = create_model("SimpleRelationship", **relationship_fields)  # type: ignore
    class DynamicGraph(_Graph):
        """Represents a graph document consisting of nodes and relationships."""
        nodes: Optional[List[SimpleNode]] = Field(description="List of nodes")  # type: ignore
-        relationships: Optional[List[SimpleRelationship]] = Field(
+        relationships: Optional[List[SimpleRelationship]] = Field(  # type: ignore
            description="List of relationships"
        )
@ -377,7 +440,13 @@ def map_to_base_relationship(rel: Any) -> Relationship:
    """Map the SimpleRelationship to the base Relationship."""
    source = Node(id=rel.source_node_id, type=rel.source_node_type)
    target = Node(id=rel.target_node_id, type=rel.target_node_type)
-    return Relationship(source=source, target=target, type=rel.type)
+    properties = {}
    if hasattr(rel, "properties") and rel.properties:
        for p in rel.properties:
            properties[format_property_key(p.key)] = p.value
    return Relationship(
        source=source, target=target, type=rel.type, properties=properties
    )
 def _parse_and_clean_json(
@ -387,10 +456,15 @@ def _parse_and_clean_json(
    for node in argument_json["nodes"]:
        if not node.get("id"):  # Id is mandatory, skip this node
            continue
        node_properties = {}
        if "properties" in node and node["properties"]:
            for p in node["properties"]:
                node_properties[format_property_key(p["key"])] = p["value"]
        nodes.append(
            Node(
                id=node["id"],
                type=node.get("type"),
                properties=node_properties,
            )
        )
    relationships = []
@ -423,6 +497,11 @@ def _parse_and_clean_json(
            except IndexError:
                rel["target_node_type"] = None
        rel_properties = {}
        if "properties" in rel and rel["properties"]:
            for p in rel["properties"]:
                rel_properties[format_property_key(p["key"])] = p["value"]
        source_node = Node(
            id=rel["source_node_id"],
            type=rel["source_node_type"],
@ -436,6 +515,7 @@ def _parse_and_clean_json(
                source=source_node,
                target=target_node,
                type=rel["type"],
                properties=rel_properties,
            )
        )
    return nodes, relationships
@ -458,6 +538,7 @@ def _format_relationships(rels: List[Relationship]) -> List[Relationship]:
            source=_format_nodes([el.source])[0],
            target=_format_nodes([el.target])[0],
            type=el.type.replace(" ", "_").upper(),
            properties=el.properties,
        )
        for el in rels
    ]
@ -513,8 +594,8 @@ class LLMGraphTransformer:
    """Transform documents into graph-based documents using a LLM.
    It allows specifying constraints on the types of nodes and relationships to include
-    in the output graph. The class doesn't support neither extract and node or
+    in the output graph. The class supports extracting properties for both nodes and
-    relationship properties
+    relationships.
    Args:
        llm (BaseLanguageModel): An instance of a language model supporting structured
@ -553,6 +634,7 @@ class LLMGraphTransformer:
        prompt: Optional[ChatPromptTemplate] = None,
        strict_mode: bool = True,
        node_properties: Union[bool, List[str]] = False,
        relationship_properties: Union[bool, List[str]] = False,
    ) -> None:
        self.allowed_nodes = allowed_nodes
        self.allowed_relationships = allowed_relationships
@ -564,14 +646,14 @@ class LLMGraphTransformer:
        except NotImplementedError:
            self._function_call = False
        if not self._function_call:
-            if node_properties:
+            if node_properties or relationship_properties:
                raise ValueError(
-                    "The 'node_properties' parameter cannot be used "
+                    "The 'node_properties' and 'relationship_properties' parameters "
-                    "in combination with a LLM that doesn't support "
+                    "cannot be used in combination with a LLM that doesn't support "
                    "native function calling."
                )
            try:
-                import json_repair
+                import json_repair  # type: ignore
                self.json_repair = json_repair
            except ImportError:
@ -590,7 +672,11 @@ class LLMGraphTransformer:
            except AttributeError:
                llm_type = None
            schema = create_simple_model(
-                allowed_nodes, allowed_relationships, node_properties, llm_type
+                allowed_nodes,
                allowed_relationships,
                node_properties,
                llm_type,
                relationship_properties,
            )
            structured_llm = llm.with_structured_output(schema, include_raw=True)
            prompt = prompt or default_prompt