Templates (#12294)

Co-authored-by: Harrison Chase <hw.chase.17@gmail.com> Co-authored-by: Lance Martin <lance@langchain.dev> Co-authored-by: Jacob Lee <jacoblee93@gmail.com>
2025-09-16 15:04:13 +00:00 · 2023-10-25 18:47:42 -07:00
parent 43257a295c
commit ebf998acb6
242 changed files with 53432 additions and 31 deletions
--- a/templates/neo4j-generation/README.md
+++ b/templates/neo4j-generation/README.md
@@ -0,0 +1,25 @@
+# Graph Generation Chain for Neo4j Knowledge Graph
+
+Harness the power of natural language understanding and convert plain text into structured knowledge graphs with the Graph Generation Chain.
+This system integrates with the Neo4j graph database using OpenAI's LLM.
+By leveraging OpenAI Functions capabilities, the Graph Generation Chain efficiently extracts graph structure from text.
+
+## Set up Environment
+
+You need to define the following environment variables
+
+```
+OPENAI_API_KEY=<YOUR_OPENAI_API_KEY>
+NEO4J_URI=<YOUR_NEO4J_URI>
+NEO4J_USERNAME=<YOUR_NEO4J_USERNAME>
+NEO4J_PASSWORD=<YOUR_NEO4J_PASSWORD>
+```
+
+## Installation
+
+To get started with the Graph Generation Chain:
+
+```bash
+# from inside your LangServe instance
+poe add neo4j-generation
+```
--- a/templates/neo4j-generation/main.py
+++ b/templates/neo4j-generation/main.py
@@ -0,0 +1,14 @@
+from neo4j_generation.chain import chain
+
+
+if __name__ == "__main__":
+    text = "Harrison works at LangChain, which is located in San Francisco"
+    allowed_nodes = ["Person", "Organization", "Location"]
+    allowed_relationships = ["WORKS_AT", "LOCATED_IN"]
+    print(
+        chain(
+            text,
+            allowed_nodes=allowed_nodes,
+            allowed_relationships=allowed_relationships,
+        )
+    )
--- a/templates/neo4j-generation/neo4j_generation/init.py
+++ b/templates/neo4j-generation/neo4j_generation/init.py
@@ -0,0 +1,3 @@
+from neo4j_generation.chain import chain
+
+__all__ = ["chain"]
--- a/templates/neo4j-generation/neo4j_generation/chain.py
+++ b/templates/neo4j-generation/neo4j_generation/chain.py
@@ -0,0 +1,109 @@
+from typing import Optional, List
+from langchain.chains.openai_functions import (
+    create_structured_output_chain,
+)
+from langchain.chat_models import ChatOpenAI
+from langchain.prompts import ChatPromptTemplate
+from langchain.graphs import Neo4jGraph
+from langchain.graphs.graph_document import GraphDocument
+from langchain.schema import Document
+
+from neo4j_generation.utils import (
+    KnowledgeGraph,
+    map_to_base_node,
+    map_to_base_relationship,
+)
+
+graph = Neo4jGraph()
+
+
+llm = ChatOpenAI(model="gpt-3.5-turbo-16k", temperature=0)
+
+
+def get_extraction_chain(
+    allowed_nodes: Optional[List[str]] = None, allowed_rels: Optional[List[str]] = None
+):
+    """
+    Constructs and returns an extraction chain for building a knowledge graph based on specified parameters.
+
+    The function generates a chat prompt template, outlining the instructions for an LLM to extract information
+    and construct a knowledge graph. It primarily focuses on consistency in labeling nodes, handling numerical data
+    and dates, coreference resolution, and strict compliance with the provided rules.
+
+    Parameters:
+    - allowed_nodes (Optional[List[str]]): A list of node labels that are allowed to be used in the knowledge graph.
+                                           If not provided, there won't be any specific restriction on node labels.
+    - allowed_rels (Optional[List[str]]): A list of relationship types that are allowed in the knowledge graph.
+                                         If not provided, there won't be any specific restriction on relationship types.
+    """
+    prompt = ChatPromptTemplate.from_messages(
+        [
+            (
+                "system",
+                f"""# Knowledge Graph Instructions for GPT-4
+## 1. Overview
+You are a top-tier algorithm designed for extracting information in structured formats to build a knowledge graph.
+- **Nodes** represent entities and concepts. They're akin to Wikipedia nodes.
+- The aim is to achieve simplicity and clarity in the knowledge graph, making it accessible for a vast audience.
+## 2. Labeling Nodes
+- **Consistency**: Ensure you use basic or elementary types for node labels.
+  - For example, when you identify an entity representing a person, always label it as **"person"**. Avoid using more specific terms like "mathematician" or "scientist".
+- **Node IDs**: Never utilize integers as node IDs. Node IDs should be names or human-readable identifiers found in the text.
+{'- **Allowed Node Labels:**' + ", ".join(allowed_nodes) if allowed_nodes else ""}
+{'- **Allowed Relationship Types**:' + ", ".join(allowed_rels) if allowed_rels else ""}
+## 3. Handling Numerical Data and Dates
+- Numerical data, like age or other related information, should be incorporated as attributes or properties of the respective nodes.
+- **No Separate Nodes for Dates/Numbers**: Do not create separate nodes for dates or numerical values. Always attach them as attributes or properties of nodes.
+- **Property Format**: Properties must be in a key-value format.
+- **Quotation Marks**: Never use escaped single or double quotes within property values.
+- **Naming Convention**: Use camelCase for property keys, e.g., `birthDate`.
+## 4. Coreference Resolution
+- **Maintain Entity Consistency**: When extracting entities, it's vital to ensure consistency.
+If an entity, such as "John Doe", is mentioned multiple times in the text but is referred to by different names or pronouns (e.g., "Joe", "he"),
+always use the most complete identifier for that entity throughout the knowledge graph. In this example, use "John Doe" as the entity ID.
+Remember, the knowledge graph should be coherent and easily understandable, so maintaining consistency in entity references is crucial.
+## 5. Strict Compliance
+Adhere to the rules strictly. Non-compliance will result in termination.
+          """,
+            ),
+            (
+                "human",
+                "Use the given format to extract information from the following input: {input}",
+            ),
+            ("human", "Tip: Make sure to answer in the correct format"),
+        ]
+    )
+    return create_structured_output_chain(KnowledgeGraph, llm, prompt, verbose=False)
+
+
+def chain(
+    text: str,
+    allowed_nodes: Optional[List[str]] = None,
+    allowed_relationships: Optional[List[str]] = None,
+) -> str:
+    """
+    Process the given text to extract graph data and constructs a graph document from the extracted information.
+    The constructed graph document is then added to the graph.
+
+    Parameters:
+    - text (str): The input text from which the information will be extracted to construct the graph.
+    - allowed_nodes (Optional[List[str]]): A list of node labels to guide the extraction process.
+                                   If not provided, extraction won't have specific restriction on node labels.
+    - allowed_relationships (Optional[List[str]]): A list of relationship types to guide the extraction process.
+                                  If not provided, extraction won't have specific restriction on relationship types.
+
+    Returns:
+    str: A confirmation message indicating the completion of the graph construction.
+    """
+    # Extract graph data using OpenAI functions
+    extract_chain = get_extraction_chain(allowed_nodes, allowed_relationships)
+    data = extract_chain.run(text)
+    # Construct a graph document
+    graph_document = GraphDocument(
+        nodes=[map_to_base_node(node) for node in data.nodes],
+        relationships=[map_to_base_relationship(rel) for rel in data.rels],
+        source=Document(page_content=text),
+    )
+    # Store information into a graph
+    graph.add_graph_documents([graph_document])
+    return "Graph construction finished"
--- a/templates/neo4j-generation/neo4j_generation/utils.py
+++ b/templates/neo4j-generation/neo4j_generation/utils.py
@@ -0,0 +1,73 @@
+from langchain.graphs.graph_document import (
+    Node as BaseNode,
+    Relationship as BaseRelationship,
+)
+from typing import List, Optional
+from langchain.pydantic_v1 import Field, BaseModel
+
+
+class Property(BaseModel):
+    """A single property consisting of key and value"""
+
+    key: str = Field(..., description="key")
+    value: str = Field(..., description="value")
+
+
+class Node(BaseNode):
+    properties: Optional[List[Property]] = Field(
+        None, description="List of node properties"
+    )
+
+
+class Relationship(BaseRelationship):
+    properties: Optional[List[Property]] = Field(
+        None, description="List of relationship properties"
+    )
+
+
+class KnowledgeGraph(BaseModel):
+    """Generate a knowledge graph with entities and relationships."""
+
+    nodes: List[Node] = Field(..., description="List of nodes in the knowledge graph")
+    rels: List[Relationship] = Field(
+        ..., description="List of relationships in the knowledge graph"
+    )
+
+
+def format_property_key(s: str) -> str:
+    words = s.split()
+    if not words:
+        return s
+    first_word = words[0].lower()
+    capitalized_words = [word.capitalize() for word in words[1:]]
+    return "".join([first_word] + capitalized_words)
+
+
+def props_to_dict(props) -> dict:
+    """Convert properties to a dictionary."""
+    properties = {}
+    if not props:
+        return properties
+    for p in props:
+        properties[format_property_key(p.key)] = p.value
+    return properties
+
+
+def map_to_base_node(node: Node) -> BaseNode:
+    """Map the KnowledgeGraph Node to the base Node."""
+    properties = props_to_dict(node.properties) if node.properties else {}
+    # Add name property for better Cypher statement generation
+    properties["name"] = node.id.title()
+    return BaseNode(
+        id=node.id.title(), type=node.type.capitalize(), properties=properties
+    )
+
+
+def map_to_base_relationship(rel: Relationship) -> BaseRelationship:
+    """Map the KnowledgeGraph Relationship to the base Relationship."""
+    source = map_to_base_node(rel.source)
+    target = map_to_base_node(rel.target)
+    properties = props_to_dict(rel.properties) if rel.properties else {}
+    return BaseRelationship(
+        source=source, target=target, type=rel.type, properties=properties
+    )
--- a/templates/neo4j-generation/poetry.lock
+++ b/templates/neo4j-generation/poetry.lock
--- a/templates/neo4j-generation/pyproject.toml
+++ b/templates/neo4j-generation/pyproject.toml
@@ -0,0 +1,21 @@
+[tool.poetry]
+name = "neo4j_generation"
+version = "0.0.1"
+description = ""
+authors = ["Tomaz Bratanic <tomaz.bratanic@neo4j.com>"]
+readme = "README.md"
+
+[tool.poetry.dependencies]
+python = ">=3.8.1,<4.0"
+langchain = ">=0.0.320"
+openai = "^0.28.1"
+neo4j = "^5.12.0"
+
+[tool.langserve]
+export_module = "neo4j_generation.chain"
+export_attr = "chain"
+
+
+[build-system]
+requires = ["poetry-core"]
+build-backend = "poetry.core.masonry.api"
--- a/templates/neo4j-generation/tests/init.py
+++ b/templates/neo4j-generation/tests/init.py