mirror of
https://github.com/hwchase17/langchain.git
synced 2025-09-16 15:04:13 +00:00
Templates (#12294)
Co-authored-by: Harrison Chase <hw.chase.17@gmail.com> Co-authored-by: Lance Martin <lance@langchain.dev> Co-authored-by: Jacob Lee <jacoblee93@gmail.com>
This commit is contained in:
25
templates/neo4j-generation/README.md
Normal file
25
templates/neo4j-generation/README.md
Normal file
@@ -0,0 +1,25 @@
|
||||
# Graph Generation Chain for Neo4j Knowledge Graph
|
||||
|
||||
Harness the power of natural language understanding and convert plain text into structured knowledge graphs with the Graph Generation Chain.
|
||||
This system integrates with the Neo4j graph database using OpenAI's LLM.
|
||||
By leveraging OpenAI Functions capabilities, the Graph Generation Chain efficiently extracts graph structure from text.
|
||||
|
||||
## Set up Environment
|
||||
|
||||
You need to define the following environment variables
|
||||
|
||||
```
|
||||
OPENAI_API_KEY=<YOUR_OPENAI_API_KEY>
|
||||
NEO4J_URI=<YOUR_NEO4J_URI>
|
||||
NEO4J_USERNAME=<YOUR_NEO4J_USERNAME>
|
||||
NEO4J_PASSWORD=<YOUR_NEO4J_PASSWORD>
|
||||
```
|
||||
|
||||
## Installation
|
||||
|
||||
To get started with the Graph Generation Chain:
|
||||
|
||||
```bash
|
||||
# from inside your LangServe instance
|
||||
poe add neo4j-generation
|
||||
```
|
14
templates/neo4j-generation/main.py
Normal file
14
templates/neo4j-generation/main.py
Normal file
@@ -0,0 +1,14 @@
|
||||
from neo4j_generation.chain import chain
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
text = "Harrison works at LangChain, which is located in San Francisco"
|
||||
allowed_nodes = ["Person", "Organization", "Location"]
|
||||
allowed_relationships = ["WORKS_AT", "LOCATED_IN"]
|
||||
print(
|
||||
chain(
|
||||
text,
|
||||
allowed_nodes=allowed_nodes,
|
||||
allowed_relationships=allowed_relationships,
|
||||
)
|
||||
)
|
3
templates/neo4j-generation/neo4j_generation/__init__.py
Normal file
3
templates/neo4j-generation/neo4j_generation/__init__.py
Normal file
@@ -0,0 +1,3 @@
|
||||
from neo4j_generation.chain import chain
|
||||
|
||||
__all__ = ["chain"]
|
109
templates/neo4j-generation/neo4j_generation/chain.py
Normal file
109
templates/neo4j-generation/neo4j_generation/chain.py
Normal file
@@ -0,0 +1,109 @@
|
||||
from typing import Optional, List
|
||||
from langchain.chains.openai_functions import (
|
||||
create_structured_output_chain,
|
||||
)
|
||||
from langchain.chat_models import ChatOpenAI
|
||||
from langchain.prompts import ChatPromptTemplate
|
||||
from langchain.graphs import Neo4jGraph
|
||||
from langchain.graphs.graph_document import GraphDocument
|
||||
from langchain.schema import Document
|
||||
|
||||
from neo4j_generation.utils import (
|
||||
KnowledgeGraph,
|
||||
map_to_base_node,
|
||||
map_to_base_relationship,
|
||||
)
|
||||
|
||||
graph = Neo4jGraph()
|
||||
|
||||
|
||||
llm = ChatOpenAI(model="gpt-3.5-turbo-16k", temperature=0)
|
||||
|
||||
|
||||
def get_extraction_chain(
|
||||
allowed_nodes: Optional[List[str]] = None, allowed_rels: Optional[List[str]] = None
|
||||
):
|
||||
"""
|
||||
Constructs and returns an extraction chain for building a knowledge graph based on specified parameters.
|
||||
|
||||
The function generates a chat prompt template, outlining the instructions for an LLM to extract information
|
||||
and construct a knowledge graph. It primarily focuses on consistency in labeling nodes, handling numerical data
|
||||
and dates, coreference resolution, and strict compliance with the provided rules.
|
||||
|
||||
Parameters:
|
||||
- allowed_nodes (Optional[List[str]]): A list of node labels that are allowed to be used in the knowledge graph.
|
||||
If not provided, there won't be any specific restriction on node labels.
|
||||
- allowed_rels (Optional[List[str]]): A list of relationship types that are allowed in the knowledge graph.
|
||||
If not provided, there won't be any specific restriction on relationship types.
|
||||
"""
|
||||
prompt = ChatPromptTemplate.from_messages(
|
||||
[
|
||||
(
|
||||
"system",
|
||||
f"""# Knowledge Graph Instructions for GPT-4
|
||||
## 1. Overview
|
||||
You are a top-tier algorithm designed for extracting information in structured formats to build a knowledge graph.
|
||||
- **Nodes** represent entities and concepts. They're akin to Wikipedia nodes.
|
||||
- The aim is to achieve simplicity and clarity in the knowledge graph, making it accessible for a vast audience.
|
||||
## 2. Labeling Nodes
|
||||
- **Consistency**: Ensure you use basic or elementary types for node labels.
|
||||
- For example, when you identify an entity representing a person, always label it as **"person"**. Avoid using more specific terms like "mathematician" or "scientist".
|
||||
- **Node IDs**: Never utilize integers as node IDs. Node IDs should be names or human-readable identifiers found in the text.
|
||||
{'- **Allowed Node Labels:**' + ", ".join(allowed_nodes) if allowed_nodes else ""}
|
||||
{'- **Allowed Relationship Types**:' + ", ".join(allowed_rels) if allowed_rels else ""}
|
||||
## 3. Handling Numerical Data and Dates
|
||||
- Numerical data, like age or other related information, should be incorporated as attributes or properties of the respective nodes.
|
||||
- **No Separate Nodes for Dates/Numbers**: Do not create separate nodes for dates or numerical values. Always attach them as attributes or properties of nodes.
|
||||
- **Property Format**: Properties must be in a key-value format.
|
||||
- **Quotation Marks**: Never use escaped single or double quotes within property values.
|
||||
- **Naming Convention**: Use camelCase for property keys, e.g., `birthDate`.
|
||||
## 4. Coreference Resolution
|
||||
- **Maintain Entity Consistency**: When extracting entities, it's vital to ensure consistency.
|
||||
If an entity, such as "John Doe", is mentioned multiple times in the text but is referred to by different names or pronouns (e.g., "Joe", "he"),
|
||||
always use the most complete identifier for that entity throughout the knowledge graph. In this example, use "John Doe" as the entity ID.
|
||||
Remember, the knowledge graph should be coherent and easily understandable, so maintaining consistency in entity references is crucial.
|
||||
## 5. Strict Compliance
|
||||
Adhere to the rules strictly. Non-compliance will result in termination.
|
||||
""",
|
||||
),
|
||||
(
|
||||
"human",
|
||||
"Use the given format to extract information from the following input: {input}",
|
||||
),
|
||||
("human", "Tip: Make sure to answer in the correct format"),
|
||||
]
|
||||
)
|
||||
return create_structured_output_chain(KnowledgeGraph, llm, prompt, verbose=False)
|
||||
|
||||
|
||||
def chain(
|
||||
text: str,
|
||||
allowed_nodes: Optional[List[str]] = None,
|
||||
allowed_relationships: Optional[List[str]] = None,
|
||||
) -> str:
|
||||
"""
|
||||
Process the given text to extract graph data and constructs a graph document from the extracted information.
|
||||
The constructed graph document is then added to the graph.
|
||||
|
||||
Parameters:
|
||||
- text (str): The input text from which the information will be extracted to construct the graph.
|
||||
- allowed_nodes (Optional[List[str]]): A list of node labels to guide the extraction process.
|
||||
If not provided, extraction won't have specific restriction on node labels.
|
||||
- allowed_relationships (Optional[List[str]]): A list of relationship types to guide the extraction process.
|
||||
If not provided, extraction won't have specific restriction on relationship types.
|
||||
|
||||
Returns:
|
||||
str: A confirmation message indicating the completion of the graph construction.
|
||||
"""
|
||||
# Extract graph data using OpenAI functions
|
||||
extract_chain = get_extraction_chain(allowed_nodes, allowed_relationships)
|
||||
data = extract_chain.run(text)
|
||||
# Construct a graph document
|
||||
graph_document = GraphDocument(
|
||||
nodes=[map_to_base_node(node) for node in data.nodes],
|
||||
relationships=[map_to_base_relationship(rel) for rel in data.rels],
|
||||
source=Document(page_content=text),
|
||||
)
|
||||
# Store information into a graph
|
||||
graph.add_graph_documents([graph_document])
|
||||
return "Graph construction finished"
|
73
templates/neo4j-generation/neo4j_generation/utils.py
Normal file
73
templates/neo4j-generation/neo4j_generation/utils.py
Normal file
@@ -0,0 +1,73 @@
|
||||
from langchain.graphs.graph_document import (
|
||||
Node as BaseNode,
|
||||
Relationship as BaseRelationship,
|
||||
)
|
||||
from typing import List, Optional
|
||||
from langchain.pydantic_v1 import Field, BaseModel
|
||||
|
||||
|
||||
class Property(BaseModel):
|
||||
"""A single property consisting of key and value"""
|
||||
|
||||
key: str = Field(..., description="key")
|
||||
value: str = Field(..., description="value")
|
||||
|
||||
|
||||
class Node(BaseNode):
|
||||
properties: Optional[List[Property]] = Field(
|
||||
None, description="List of node properties"
|
||||
)
|
||||
|
||||
|
||||
class Relationship(BaseRelationship):
|
||||
properties: Optional[List[Property]] = Field(
|
||||
None, description="List of relationship properties"
|
||||
)
|
||||
|
||||
|
||||
class KnowledgeGraph(BaseModel):
|
||||
"""Generate a knowledge graph with entities and relationships."""
|
||||
|
||||
nodes: List[Node] = Field(..., description="List of nodes in the knowledge graph")
|
||||
rels: List[Relationship] = Field(
|
||||
..., description="List of relationships in the knowledge graph"
|
||||
)
|
||||
|
||||
|
||||
def format_property_key(s: str) -> str:
|
||||
words = s.split()
|
||||
if not words:
|
||||
return s
|
||||
first_word = words[0].lower()
|
||||
capitalized_words = [word.capitalize() for word in words[1:]]
|
||||
return "".join([first_word] + capitalized_words)
|
||||
|
||||
|
||||
def props_to_dict(props) -> dict:
|
||||
"""Convert properties to a dictionary."""
|
||||
properties = {}
|
||||
if not props:
|
||||
return properties
|
||||
for p in props:
|
||||
properties[format_property_key(p.key)] = p.value
|
||||
return properties
|
||||
|
||||
|
||||
def map_to_base_node(node: Node) -> BaseNode:
|
||||
"""Map the KnowledgeGraph Node to the base Node."""
|
||||
properties = props_to_dict(node.properties) if node.properties else {}
|
||||
# Add name property for better Cypher statement generation
|
||||
properties["name"] = node.id.title()
|
||||
return BaseNode(
|
||||
id=node.id.title(), type=node.type.capitalize(), properties=properties
|
||||
)
|
||||
|
||||
|
||||
def map_to_base_relationship(rel: Relationship) -> BaseRelationship:
|
||||
"""Map the KnowledgeGraph Relationship to the base Relationship."""
|
||||
source = map_to_base_node(rel.source)
|
||||
target = map_to_base_node(rel.target)
|
||||
properties = props_to_dict(rel.properties) if rel.properties else {}
|
||||
return BaseRelationship(
|
||||
source=source, target=target, type=rel.type, properties=properties
|
||||
)
|
1296
templates/neo4j-generation/poetry.lock
generated
Normal file
1296
templates/neo4j-generation/poetry.lock
generated
Normal file
File diff suppressed because it is too large
Load Diff
21
templates/neo4j-generation/pyproject.toml
Normal file
21
templates/neo4j-generation/pyproject.toml
Normal file
@@ -0,0 +1,21 @@
|
||||
[tool.poetry]
|
||||
name = "neo4j_generation"
|
||||
version = "0.0.1"
|
||||
description = ""
|
||||
authors = ["Tomaz Bratanic <tomaz.bratanic@neo4j.com>"]
|
||||
readme = "README.md"
|
||||
|
||||
[tool.poetry.dependencies]
|
||||
python = ">=3.8.1,<4.0"
|
||||
langchain = ">=0.0.320"
|
||||
openai = "^0.28.1"
|
||||
neo4j = "^5.12.0"
|
||||
|
||||
[tool.langserve]
|
||||
export_module = "neo4j_generation.chain"
|
||||
export_attr = "chain"
|
||||
|
||||
|
||||
[build-system]
|
||||
requires = ["poetry-core"]
|
||||
build-backend = "poetry.core.masonry.api"
|
0
templates/neo4j-generation/tests/__init__.py
Normal file
0
templates/neo4j-generation/tests/__init__.py
Normal file
Reference in New Issue
Block a user