experimental: LLMGraphTransformer - added relationship properties. (#21856)

- **Description:** 
The generated relationships in the graph had no properties, but the
Relationship class was properly defined with properties. This made it
very difficult to transform conditional sentences into a graph. Adding
properties to relationships can solve this issue elegantly.
The changes expand on the existing LLMGraphTransformer implementation
but add the possibility to define allowed relationship properties like
this: LLMGraphTransformer(llm=llm, relationship_properties=["Condition",
"Time"],)
- **Issue:** 
    no issue found
 - **Dependencies:**
    n/a
- **Twitter handle:** 
    @IstvanSpace


-Quick Test
=================================================================
from dotenv import load_dotenv
import os
from langchain_community.graphs import Neo4jGraph
from langchain_experimental.graph_transformers import
LLMGraphTransformer
from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.documents import Document

load_dotenv()
os.environ["NEO4J_URI"] = os.getenv("NEO4J_URI")
os.environ["NEO4J_USERNAME"] = os.getenv("NEO4J_USERNAME")
os.environ["NEO4J_PASSWORD"] = os.getenv("NEO4J_PASSWORD")
graph = Neo4jGraph()
llm = ChatOpenAI(temperature=0, model_name="gpt-4o")
llm_transformer = LLMGraphTransformer(llm=llm)
#text = "Harry potter likes pies, but only if it rains outside"
text = "Jack has a dog named Max. Jack only walks Max if it is sunny
outside."
documents = [Document(page_content=text)]
llm_transformer_props = LLMGraphTransformer(
    llm=llm,
    relationship_properties=["Condition"],
)
graph_documents_props =
llm_transformer_props.convert_to_graph_documents(documents)
print(f"Nodes:{graph_documents_props[0].nodes}")
print(f"Relationships:{graph_documents_props[0].relationships}")
graph.add_graph_documents(graph_documents_props)

---------

Co-authored-by: Istvan Lorincz <istvan.lorincz@pm.me>
Co-authored-by: Bagatur <22008038+baskaryan@users.noreply.github.com>
This commit is contained in:
Istvan/Nebulinq 2024-06-14 11:41:04 -07:00 committed by GitHub
parent 694ae87748
commit 513e491ce9
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -72,8 +72,8 @@ system_prompt = (
"You are a top-tier algorithm designed for extracting information in structured " "You are a top-tier algorithm designed for extracting information in structured "
"formats to build a knowledge graph.\n" "formats to build a knowledge graph.\n"
"Try to capture as much information from the text as possible without " "Try to capture as much information from the text as possible without "
"sacrifing accuracy. Do not add any information that is not explicitly " "sacrificing accuracy. Do not add any information that is not explicitly "
"mentioned in the text\n" "mentioned in the text.\n"
"- **Nodes** represent entities and concepts.\n" "- **Nodes** represent entities and concepts.\n"
"- The aim is to achieve simplicity and clarity in the knowledge graph, making it\n" "- The aim is to achieve simplicity and clarity in the knowledge graph, making it\n"
"accessible for a vast audience.\n" "accessible for a vast audience.\n"
@ -82,7 +82,7 @@ system_prompt = (
"Ensure you use basic or elementary types for node labels.\n" "Ensure you use basic or elementary types for node labels.\n"
"- For example, when you identify an entity representing a person, " "- For example, when you identify an entity representing a person, "
"always label it as **'person'**. Avoid using more specific terms " "always label it as **'person'**. Avoid using more specific terms "
"like 'mathematician' or 'scientist'" "like 'mathematician' or 'scientist'."
"- **Node IDs**: Never utilize integers as node IDs. Node IDs should be " "- **Node IDs**: Never utilize integers as node IDs. Node IDs should be "
"names or human-readable identifiers found in the text.\n" "names or human-readable identifiers found in the text.\n"
"- **Relationships** represent connections between entities or concepts.\n" "- **Relationships** represent connections between entities or concepts.\n"
@ -138,8 +138,8 @@ def _get_additional_info(input_type: str) -> str:
elif input_type == "relationship": elif input_type == "relationship":
return ( return (
"Instead of using specific and momentary types such as " "Instead of using specific and momentary types such as "
"'BECAME_PROFESSOR', use more general and timeless relationship types like " "'BECAME_PROFESSOR', use more general and timeless relationship types "
"'PROFESSOR'. However, do not sacrifice any accuracy for generality" "like 'PROFESSOR'. However, do not sacrifice any accuracy for generality"
) )
elif input_type == "property": elif input_type == "property":
return "" return ""
@ -280,10 +280,32 @@ def create_simple_model(
rel_types: Optional[List[str]] = None, rel_types: Optional[List[str]] = None,
node_properties: Union[bool, List[str]] = False, node_properties: Union[bool, List[str]] = False,
llm_type: Optional[str] = None, llm_type: Optional[str] = None,
relationship_properties: Union[bool, List[str]] = False,
) -> Type[_Graph]: ) -> Type[_Graph]:
""" """
Simple model allows to limit node and/or relationship types. Create a simple graph model with optional constraints on node
Doesn't have any node or relationship properties. and relationship types.
Args:
node_labels (Optional[List[str]]): Specifies the allowed node types.
Defaults to None, allowing all node types.
rel_types (Optional[List[str]]): Specifies the allowed relationship types.
Defaults to None, allowing all relationship types.
node_properties (Union[bool, List[str]]): Specifies if node properties should
be included. If a list is provided, only properties with keys in the list
will be included. If True, all properties are included. Defaults to False.
relationship_properties (Union[bool, List[str]]): Specifies if relationship
properties should be included. If a list is provided, only properties with
keys in the list will be included. If True, all properties are included.
Defaults to False.
llm_type (Optional[str]): The type of the language model. Defaults to None.
Only openai supports enum param: openai-chat.
Returns:
Type[_Graph]: A graph model with the specified constraints.
Raises:
ValueError: If 'id' is included in the node or relationship properties list.
""" """
node_fields: Dict[str, Tuple[Any, Any]] = { node_fields: Dict[str, Tuple[Any, Any]] = {
@ -325,39 +347,80 @@ def create_simple_model(
) )
SimpleNode = create_model("SimpleNode", **node_fields) # type: ignore SimpleNode = create_model("SimpleNode", **node_fields) # type: ignore
class SimpleRelationship(BaseModel): relationship_fields: Dict[str, Tuple[Any, Any]] = {
"""Represents a directed relationship between two nodes in a graph.""" "source_node_id": (
str,
source_node_id: str = Field( Field(
description="Name or human-readable unique identifier of source node" ...,
) description="Name or human-readable unique identifier of source node",
source_node_type: str = optional_enum_field( ),
),
"source_node_type": (
str,
optional_enum_field(
node_labels, node_labels,
description="The type or label of the source node.", description="The type or label of the source node.",
input_type="node", input_type="node",
llm_type=llm_type, ),
) ),
target_node_id: str = Field( "target_node_id": (
description="Name or human-readable unique identifier of target node" str,
) Field(
target_node_type: str = optional_enum_field( ...,
description="Name or human-readable unique identifier of target node",
),
),
"target_node_type": (
str,
optional_enum_field(
node_labels, node_labels,
description="The type or label of the target node.", description="The type or label of the target node.",
input_type="node", input_type="node",
llm_type=llm_type, ),
) ),
type: str = optional_enum_field( "type": (
str,
optional_enum_field(
rel_types, rel_types,
description="The type of the relationship.", description="The type of the relationship.",
input_type="relationship", input_type="relationship",
llm_type=llm_type, ),
),
}
if relationship_properties:
if (
isinstance(relationship_properties, list)
and "id" in relationship_properties
):
raise ValueError(
"The relationship property 'id' is reserved and cannot be used."
) )
# Map True to empty array
relationship_properties_mapped: List[str] = (
[] if relationship_properties is True else relationship_properties
)
class RelationshipProperty(BaseModel):
"""A single property consisting of key and value"""
key: str = optional_enum_field(
relationship_properties_mapped,
description="Property key.",
input_type="property",
)
value: str = Field(..., description="value")
relationship_fields["properties"] = (
Optional[List[RelationshipProperty]],
Field(None, description="List of relationship properties"),
)
SimpleRelationship = create_model("SimpleRelationship", **relationship_fields) # type: ignore
class DynamicGraph(_Graph): class DynamicGraph(_Graph):
"""Represents a graph document consisting of nodes and relationships.""" """Represents a graph document consisting of nodes and relationships."""
nodes: Optional[List[SimpleNode]] = Field(description="List of nodes") # type: ignore nodes: Optional[List[SimpleNode]] = Field(description="List of nodes") # type: ignore
relationships: Optional[List[SimpleRelationship]] = Field( relationships: Optional[List[SimpleRelationship]] = Field( # type: ignore
description="List of relationships" description="List of relationships"
) )
@ -377,7 +440,13 @@ def map_to_base_relationship(rel: Any) -> Relationship:
"""Map the SimpleRelationship to the base Relationship.""" """Map the SimpleRelationship to the base Relationship."""
source = Node(id=rel.source_node_id, type=rel.source_node_type) source = Node(id=rel.source_node_id, type=rel.source_node_type)
target = Node(id=rel.target_node_id, type=rel.target_node_type) target = Node(id=rel.target_node_id, type=rel.target_node_type)
return Relationship(source=source, target=target, type=rel.type) properties = {}
if hasattr(rel, "properties") and rel.properties:
for p in rel.properties:
properties[format_property_key(p.key)] = p.value
return Relationship(
source=source, target=target, type=rel.type, properties=properties
)
def _parse_and_clean_json( def _parse_and_clean_json(
@ -387,10 +456,15 @@ def _parse_and_clean_json(
for node in argument_json["nodes"]: for node in argument_json["nodes"]:
if not node.get("id"): # Id is mandatory, skip this node if not node.get("id"): # Id is mandatory, skip this node
continue continue
node_properties = {}
if "properties" in node and node["properties"]:
for p in node["properties"]:
node_properties[format_property_key(p["key"])] = p["value"]
nodes.append( nodes.append(
Node( Node(
id=node["id"], id=node["id"],
type=node.get("type"), type=node.get("type"),
properties=node_properties,
) )
) )
relationships = [] relationships = []
@ -423,6 +497,11 @@ def _parse_and_clean_json(
except IndexError: except IndexError:
rel["target_node_type"] = None rel["target_node_type"] = None
rel_properties = {}
if "properties" in rel and rel["properties"]:
for p in rel["properties"]:
rel_properties[format_property_key(p["key"])] = p["value"]
source_node = Node( source_node = Node(
id=rel["source_node_id"], id=rel["source_node_id"],
type=rel["source_node_type"], type=rel["source_node_type"],
@ -436,6 +515,7 @@ def _parse_and_clean_json(
source=source_node, source=source_node,
target=target_node, target=target_node,
type=rel["type"], type=rel["type"],
properties=rel_properties,
) )
) )
return nodes, relationships return nodes, relationships
@ -458,6 +538,7 @@ def _format_relationships(rels: List[Relationship]) -> List[Relationship]:
source=_format_nodes([el.source])[0], source=_format_nodes([el.source])[0],
target=_format_nodes([el.target])[0], target=_format_nodes([el.target])[0],
type=el.type.replace(" ", "_").upper(), type=el.type.replace(" ", "_").upper(),
properties=el.properties,
) )
for el in rels for el in rels
] ]
@ -513,8 +594,8 @@ class LLMGraphTransformer:
"""Transform documents into graph-based documents using a LLM. """Transform documents into graph-based documents using a LLM.
It allows specifying constraints on the types of nodes and relationships to include It allows specifying constraints on the types of nodes and relationships to include
in the output graph. The class doesn't support neither extract and node or in the output graph. The class supports extracting properties for both nodes and
relationship properties relationships.
Args: Args:
llm (BaseLanguageModel): An instance of a language model supporting structured llm (BaseLanguageModel): An instance of a language model supporting structured
@ -553,6 +634,7 @@ class LLMGraphTransformer:
prompt: Optional[ChatPromptTemplate] = None, prompt: Optional[ChatPromptTemplate] = None,
strict_mode: bool = True, strict_mode: bool = True,
node_properties: Union[bool, List[str]] = False, node_properties: Union[bool, List[str]] = False,
relationship_properties: Union[bool, List[str]] = False,
) -> None: ) -> None:
self.allowed_nodes = allowed_nodes self.allowed_nodes = allowed_nodes
self.allowed_relationships = allowed_relationships self.allowed_relationships = allowed_relationships
@ -564,14 +646,14 @@ class LLMGraphTransformer:
except NotImplementedError: except NotImplementedError:
self._function_call = False self._function_call = False
if not self._function_call: if not self._function_call:
if node_properties: if node_properties or relationship_properties:
raise ValueError( raise ValueError(
"The 'node_properties' parameter cannot be used " "The 'node_properties' and 'relationship_properties' parameters "
"in combination with a LLM that doesn't support " "cannot be used in combination with a LLM that doesn't support "
"native function calling." "native function calling."
) )
try: try:
import json_repair import json_repair # type: ignore
self.json_repair = json_repair self.json_repair = json_repair
except ImportError: except ImportError:
@ -590,7 +672,11 @@ class LLMGraphTransformer:
except AttributeError: except AttributeError:
llm_type = None llm_type = None
schema = create_simple_model( schema = create_simple_model(
allowed_nodes, allowed_relationships, node_properties, llm_type allowed_nodes,
allowed_relationships,
node_properties,
llm_type,
relationship_properties,
) )
structured_llm = llm.with_structured_output(schema, include_raw=True) structured_llm = llm.with_structured_output(schema, include_raw=True)
prompt = prompt or default_prompt prompt = prompt or default_prompt