mirror of
https://github.com/hwchase17/langchain.git
synced 2025-07-03 11:47:49 +00:00
experimental: LLMGraphTransformer - added relationship properties. (#21856)
- **Description:** The generated relationships in the graph had no properties, but the Relationship class was properly defined with properties. This made it very difficult to transform conditional sentences into a graph. Adding properties to relationships can solve this issue elegantly. The changes expand on the existing LLMGraphTransformer implementation but add the possibility to define allowed relationship properties like this: LLMGraphTransformer(llm=llm, relationship_properties=["Condition", "Time"],) - **Issue:** no issue found - **Dependencies:** n/a - **Twitter handle:** @IstvanSpace -Quick Test ================================================================= from dotenv import load_dotenv import os from langchain_community.graphs import Neo4jGraph from langchain_experimental.graph_transformers import LLMGraphTransformer from langchain_openai import ChatOpenAI from langchain_core.prompts import ChatPromptTemplate from langchain_core.documents import Document load_dotenv() os.environ["NEO4J_URI"] = os.getenv("NEO4J_URI") os.environ["NEO4J_USERNAME"] = os.getenv("NEO4J_USERNAME") os.environ["NEO4J_PASSWORD"] = os.getenv("NEO4J_PASSWORD") graph = Neo4jGraph() llm = ChatOpenAI(temperature=0, model_name="gpt-4o") llm_transformer = LLMGraphTransformer(llm=llm) #text = "Harry potter likes pies, but only if it rains outside" text = "Jack has a dog named Max. Jack only walks Max if it is sunny outside." documents = [Document(page_content=text)] llm_transformer_props = LLMGraphTransformer( llm=llm, relationship_properties=["Condition"], ) graph_documents_props = llm_transformer_props.convert_to_graph_documents(documents) print(f"Nodes:{graph_documents_props[0].nodes}") print(f"Relationships:{graph_documents_props[0].relationships}") graph.add_graph_documents(graph_documents_props) --------- Co-authored-by: Istvan Lorincz <istvan.lorincz@pm.me> Co-authored-by: Bagatur <22008038+baskaryan@users.noreply.github.com>
This commit is contained in:
parent
694ae87748
commit
513e491ce9
@ -72,8 +72,8 @@ system_prompt = (
|
|||||||
"You are a top-tier algorithm designed for extracting information in structured "
|
"You are a top-tier algorithm designed for extracting information in structured "
|
||||||
"formats to build a knowledge graph.\n"
|
"formats to build a knowledge graph.\n"
|
||||||
"Try to capture as much information from the text as possible without "
|
"Try to capture as much information from the text as possible without "
|
||||||
"sacrifing accuracy. Do not add any information that is not explicitly "
|
"sacrificing accuracy. Do not add any information that is not explicitly "
|
||||||
"mentioned in the text\n"
|
"mentioned in the text.\n"
|
||||||
"- **Nodes** represent entities and concepts.\n"
|
"- **Nodes** represent entities and concepts.\n"
|
||||||
"- The aim is to achieve simplicity and clarity in the knowledge graph, making it\n"
|
"- The aim is to achieve simplicity and clarity in the knowledge graph, making it\n"
|
||||||
"accessible for a vast audience.\n"
|
"accessible for a vast audience.\n"
|
||||||
@ -82,7 +82,7 @@ system_prompt = (
|
|||||||
"Ensure you use basic or elementary types for node labels.\n"
|
"Ensure you use basic or elementary types for node labels.\n"
|
||||||
"- For example, when you identify an entity representing a person, "
|
"- For example, when you identify an entity representing a person, "
|
||||||
"always label it as **'person'**. Avoid using more specific terms "
|
"always label it as **'person'**. Avoid using more specific terms "
|
||||||
"like 'mathematician' or 'scientist'"
|
"like 'mathematician' or 'scientist'."
|
||||||
"- **Node IDs**: Never utilize integers as node IDs. Node IDs should be "
|
"- **Node IDs**: Never utilize integers as node IDs. Node IDs should be "
|
||||||
"names or human-readable identifiers found in the text.\n"
|
"names or human-readable identifiers found in the text.\n"
|
||||||
"- **Relationships** represent connections between entities or concepts.\n"
|
"- **Relationships** represent connections between entities or concepts.\n"
|
||||||
@ -138,8 +138,8 @@ def _get_additional_info(input_type: str) -> str:
|
|||||||
elif input_type == "relationship":
|
elif input_type == "relationship":
|
||||||
return (
|
return (
|
||||||
"Instead of using specific and momentary types such as "
|
"Instead of using specific and momentary types such as "
|
||||||
"'BECAME_PROFESSOR', use more general and timeless relationship types like "
|
"'BECAME_PROFESSOR', use more general and timeless relationship types "
|
||||||
"'PROFESSOR'. However, do not sacrifice any accuracy for generality"
|
"like 'PROFESSOR'. However, do not sacrifice any accuracy for generality"
|
||||||
)
|
)
|
||||||
elif input_type == "property":
|
elif input_type == "property":
|
||||||
return ""
|
return ""
|
||||||
@ -280,10 +280,32 @@ def create_simple_model(
|
|||||||
rel_types: Optional[List[str]] = None,
|
rel_types: Optional[List[str]] = None,
|
||||||
node_properties: Union[bool, List[str]] = False,
|
node_properties: Union[bool, List[str]] = False,
|
||||||
llm_type: Optional[str] = None,
|
llm_type: Optional[str] = None,
|
||||||
|
relationship_properties: Union[bool, List[str]] = False,
|
||||||
) -> Type[_Graph]:
|
) -> Type[_Graph]:
|
||||||
"""
|
"""
|
||||||
Simple model allows to limit node and/or relationship types.
|
Create a simple graph model with optional constraints on node
|
||||||
Doesn't have any node or relationship properties.
|
and relationship types.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
node_labels (Optional[List[str]]): Specifies the allowed node types.
|
||||||
|
Defaults to None, allowing all node types.
|
||||||
|
rel_types (Optional[List[str]]): Specifies the allowed relationship types.
|
||||||
|
Defaults to None, allowing all relationship types.
|
||||||
|
node_properties (Union[bool, List[str]]): Specifies if node properties should
|
||||||
|
be included. If a list is provided, only properties with keys in the list
|
||||||
|
will be included. If True, all properties are included. Defaults to False.
|
||||||
|
relationship_properties (Union[bool, List[str]]): Specifies if relationship
|
||||||
|
properties should be included. If a list is provided, only properties with
|
||||||
|
keys in the list will be included. If True, all properties are included.
|
||||||
|
Defaults to False.
|
||||||
|
llm_type (Optional[str]): The type of the language model. Defaults to None.
|
||||||
|
Only openai supports enum param: openai-chat.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Type[_Graph]: A graph model with the specified constraints.
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
ValueError: If 'id' is included in the node or relationship properties list.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
node_fields: Dict[str, Tuple[Any, Any]] = {
|
node_fields: Dict[str, Tuple[Any, Any]] = {
|
||||||
@ -325,39 +347,80 @@ def create_simple_model(
|
|||||||
)
|
)
|
||||||
SimpleNode = create_model("SimpleNode", **node_fields) # type: ignore
|
SimpleNode = create_model("SimpleNode", **node_fields) # type: ignore
|
||||||
|
|
||||||
class SimpleRelationship(BaseModel):
|
relationship_fields: Dict[str, Tuple[Any, Any]] = {
|
||||||
"""Represents a directed relationship between two nodes in a graph."""
|
"source_node_id": (
|
||||||
|
str,
|
||||||
source_node_id: str = Field(
|
Field(
|
||||||
description="Name or human-readable unique identifier of source node"
|
...,
|
||||||
)
|
description="Name or human-readable unique identifier of source node",
|
||||||
source_node_type: str = optional_enum_field(
|
),
|
||||||
|
),
|
||||||
|
"source_node_type": (
|
||||||
|
str,
|
||||||
|
optional_enum_field(
|
||||||
node_labels,
|
node_labels,
|
||||||
description="The type or label of the source node.",
|
description="The type or label of the source node.",
|
||||||
input_type="node",
|
input_type="node",
|
||||||
llm_type=llm_type,
|
),
|
||||||
)
|
),
|
||||||
target_node_id: str = Field(
|
"target_node_id": (
|
||||||
description="Name or human-readable unique identifier of target node"
|
str,
|
||||||
)
|
Field(
|
||||||
target_node_type: str = optional_enum_field(
|
...,
|
||||||
|
description="Name or human-readable unique identifier of target node",
|
||||||
|
),
|
||||||
|
),
|
||||||
|
"target_node_type": (
|
||||||
|
str,
|
||||||
|
optional_enum_field(
|
||||||
node_labels,
|
node_labels,
|
||||||
description="The type or label of the target node.",
|
description="The type or label of the target node.",
|
||||||
input_type="node",
|
input_type="node",
|
||||||
llm_type=llm_type,
|
),
|
||||||
)
|
),
|
||||||
type: str = optional_enum_field(
|
"type": (
|
||||||
|
str,
|
||||||
|
optional_enum_field(
|
||||||
rel_types,
|
rel_types,
|
||||||
description="The type of the relationship.",
|
description="The type of the relationship.",
|
||||||
input_type="relationship",
|
input_type="relationship",
|
||||||
llm_type=llm_type,
|
),
|
||||||
|
),
|
||||||
|
}
|
||||||
|
if relationship_properties:
|
||||||
|
if (
|
||||||
|
isinstance(relationship_properties, list)
|
||||||
|
and "id" in relationship_properties
|
||||||
|
):
|
||||||
|
raise ValueError(
|
||||||
|
"The relationship property 'id' is reserved and cannot be used."
|
||||||
)
|
)
|
||||||
|
# Map True to empty array
|
||||||
|
relationship_properties_mapped: List[str] = (
|
||||||
|
[] if relationship_properties is True else relationship_properties
|
||||||
|
)
|
||||||
|
|
||||||
|
class RelationshipProperty(BaseModel):
|
||||||
|
"""A single property consisting of key and value"""
|
||||||
|
|
||||||
|
key: str = optional_enum_field(
|
||||||
|
relationship_properties_mapped,
|
||||||
|
description="Property key.",
|
||||||
|
input_type="property",
|
||||||
|
)
|
||||||
|
value: str = Field(..., description="value")
|
||||||
|
|
||||||
|
relationship_fields["properties"] = (
|
||||||
|
Optional[List[RelationshipProperty]],
|
||||||
|
Field(None, description="List of relationship properties"),
|
||||||
|
)
|
||||||
|
SimpleRelationship = create_model("SimpleRelationship", **relationship_fields) # type: ignore
|
||||||
|
|
||||||
class DynamicGraph(_Graph):
|
class DynamicGraph(_Graph):
|
||||||
"""Represents a graph document consisting of nodes and relationships."""
|
"""Represents a graph document consisting of nodes and relationships."""
|
||||||
|
|
||||||
nodes: Optional[List[SimpleNode]] = Field(description="List of nodes") # type: ignore
|
nodes: Optional[List[SimpleNode]] = Field(description="List of nodes") # type: ignore
|
||||||
relationships: Optional[List[SimpleRelationship]] = Field(
|
relationships: Optional[List[SimpleRelationship]] = Field( # type: ignore
|
||||||
description="List of relationships"
|
description="List of relationships"
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -377,7 +440,13 @@ def map_to_base_relationship(rel: Any) -> Relationship:
|
|||||||
"""Map the SimpleRelationship to the base Relationship."""
|
"""Map the SimpleRelationship to the base Relationship."""
|
||||||
source = Node(id=rel.source_node_id, type=rel.source_node_type)
|
source = Node(id=rel.source_node_id, type=rel.source_node_type)
|
||||||
target = Node(id=rel.target_node_id, type=rel.target_node_type)
|
target = Node(id=rel.target_node_id, type=rel.target_node_type)
|
||||||
return Relationship(source=source, target=target, type=rel.type)
|
properties = {}
|
||||||
|
if hasattr(rel, "properties") and rel.properties:
|
||||||
|
for p in rel.properties:
|
||||||
|
properties[format_property_key(p.key)] = p.value
|
||||||
|
return Relationship(
|
||||||
|
source=source, target=target, type=rel.type, properties=properties
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def _parse_and_clean_json(
|
def _parse_and_clean_json(
|
||||||
@ -387,10 +456,15 @@ def _parse_and_clean_json(
|
|||||||
for node in argument_json["nodes"]:
|
for node in argument_json["nodes"]:
|
||||||
if not node.get("id"): # Id is mandatory, skip this node
|
if not node.get("id"): # Id is mandatory, skip this node
|
||||||
continue
|
continue
|
||||||
|
node_properties = {}
|
||||||
|
if "properties" in node and node["properties"]:
|
||||||
|
for p in node["properties"]:
|
||||||
|
node_properties[format_property_key(p["key"])] = p["value"]
|
||||||
nodes.append(
|
nodes.append(
|
||||||
Node(
|
Node(
|
||||||
id=node["id"],
|
id=node["id"],
|
||||||
type=node.get("type"),
|
type=node.get("type"),
|
||||||
|
properties=node_properties,
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
relationships = []
|
relationships = []
|
||||||
@ -423,6 +497,11 @@ def _parse_and_clean_json(
|
|||||||
except IndexError:
|
except IndexError:
|
||||||
rel["target_node_type"] = None
|
rel["target_node_type"] = None
|
||||||
|
|
||||||
|
rel_properties = {}
|
||||||
|
if "properties" in rel and rel["properties"]:
|
||||||
|
for p in rel["properties"]:
|
||||||
|
rel_properties[format_property_key(p["key"])] = p["value"]
|
||||||
|
|
||||||
source_node = Node(
|
source_node = Node(
|
||||||
id=rel["source_node_id"],
|
id=rel["source_node_id"],
|
||||||
type=rel["source_node_type"],
|
type=rel["source_node_type"],
|
||||||
@ -436,6 +515,7 @@ def _parse_and_clean_json(
|
|||||||
source=source_node,
|
source=source_node,
|
||||||
target=target_node,
|
target=target_node,
|
||||||
type=rel["type"],
|
type=rel["type"],
|
||||||
|
properties=rel_properties,
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
return nodes, relationships
|
return nodes, relationships
|
||||||
@ -458,6 +538,7 @@ def _format_relationships(rels: List[Relationship]) -> List[Relationship]:
|
|||||||
source=_format_nodes([el.source])[0],
|
source=_format_nodes([el.source])[0],
|
||||||
target=_format_nodes([el.target])[0],
|
target=_format_nodes([el.target])[0],
|
||||||
type=el.type.replace(" ", "_").upper(),
|
type=el.type.replace(" ", "_").upper(),
|
||||||
|
properties=el.properties,
|
||||||
)
|
)
|
||||||
for el in rels
|
for el in rels
|
||||||
]
|
]
|
||||||
@ -513,8 +594,8 @@ class LLMGraphTransformer:
|
|||||||
"""Transform documents into graph-based documents using a LLM.
|
"""Transform documents into graph-based documents using a LLM.
|
||||||
|
|
||||||
It allows specifying constraints on the types of nodes and relationships to include
|
It allows specifying constraints on the types of nodes and relationships to include
|
||||||
in the output graph. The class doesn't support neither extract and node or
|
in the output graph. The class supports extracting properties for both nodes and
|
||||||
relationship properties
|
relationships.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
llm (BaseLanguageModel): An instance of a language model supporting structured
|
llm (BaseLanguageModel): An instance of a language model supporting structured
|
||||||
@ -553,6 +634,7 @@ class LLMGraphTransformer:
|
|||||||
prompt: Optional[ChatPromptTemplate] = None,
|
prompt: Optional[ChatPromptTemplate] = None,
|
||||||
strict_mode: bool = True,
|
strict_mode: bool = True,
|
||||||
node_properties: Union[bool, List[str]] = False,
|
node_properties: Union[bool, List[str]] = False,
|
||||||
|
relationship_properties: Union[bool, List[str]] = False,
|
||||||
) -> None:
|
) -> None:
|
||||||
self.allowed_nodes = allowed_nodes
|
self.allowed_nodes = allowed_nodes
|
||||||
self.allowed_relationships = allowed_relationships
|
self.allowed_relationships = allowed_relationships
|
||||||
@ -564,14 +646,14 @@ class LLMGraphTransformer:
|
|||||||
except NotImplementedError:
|
except NotImplementedError:
|
||||||
self._function_call = False
|
self._function_call = False
|
||||||
if not self._function_call:
|
if not self._function_call:
|
||||||
if node_properties:
|
if node_properties or relationship_properties:
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
"The 'node_properties' parameter cannot be used "
|
"The 'node_properties' and 'relationship_properties' parameters "
|
||||||
"in combination with a LLM that doesn't support "
|
"cannot be used in combination with a LLM that doesn't support "
|
||||||
"native function calling."
|
"native function calling."
|
||||||
)
|
)
|
||||||
try:
|
try:
|
||||||
import json_repair
|
import json_repair # type: ignore
|
||||||
|
|
||||||
self.json_repair = json_repair
|
self.json_repair = json_repair
|
||||||
except ImportError:
|
except ImportError:
|
||||||
@ -590,7 +672,11 @@ class LLMGraphTransformer:
|
|||||||
except AttributeError:
|
except AttributeError:
|
||||||
llm_type = None
|
llm_type = None
|
||||||
schema = create_simple_model(
|
schema = create_simple_model(
|
||||||
allowed_nodes, allowed_relationships, node_properties, llm_type
|
allowed_nodes,
|
||||||
|
allowed_relationships,
|
||||||
|
node_properties,
|
||||||
|
llm_type,
|
||||||
|
relationship_properties,
|
||||||
)
|
)
|
||||||
structured_llm = llm.with_structured_output(schema, include_raw=True)
|
structured_llm = llm.with_structured_output(schema, include_raw=True)
|
||||||
prompt = prompt or default_prompt
|
prompt = prompt or default_prompt
|
||||||
|
Loading…
Reference in New Issue
Block a user