langchain[minor]: Reduce the number of tokens required to describe a Cypher/Neo4j schema (#13851)

Instead of using JSON-like syntax to describe node and relationship
properties we changed to a shorter and more concise schema description

Old:

```
        Node properties are the following:
        [{'properties': [{'property': 'name', 'type': 'STRING'}], 'labels': 'Movie'}, {'properties': [{'property': 'name', 'type': 'STRING'}], 'labels': 'Actor'}]
        Relationship properties are the following:
        []
        The relationships are the following:
        ['(:Actor)-[:ACTED_IN]->(:Movie)']
```

New:

```
Node properties are the following:
Movie {name: STRING},Actor {name: STRING}
Relationship properties are the following:

The relationships are the following:
(:Actor)-[:ACTED_IN]->(:Movie)
```
This commit is contained in:
Tomaz Bratanic 2023-11-29 20:13:12 +01:00 committed by GitHub
parent 7ec4dbeb80
commit 3eb391561b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 132 additions and 94 deletions

View File

@ -48,16 +48,7 @@
"execution_count": 2, "execution_count": 2,
"id": "0928915d", "id": "0928915d",
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [],
{
"name": "stderr",
"output_type": "stream",
"text": [
"/home/tomaz/neo4j/langchain/libs/langchain/langchain/graphs/neo4j_graph.py:52: ExperimentalWarning: The configuration may change in the future.\n",
" self._driver.verify_connectivity()\n"
]
}
],
"source": [ "source": [
"graph = Neo4jGraph(\n", "graph = Neo4jGraph(\n",
" url=\"bolt://localhost:7687\", username=\"neo4j\", password=\"pleaseletmein\"\n", " url=\"bolt://localhost:7687\", username=\"neo4j\", password=\"pleaseletmein\"\n",
@ -132,14 +123,12 @@
"name": "stdout", "name": "stdout",
"output_type": "stream", "output_type": "stream",
"text": [ "text": [
"\n",
"Node properties are the following:\n", "Node properties are the following:\n",
" [{'properties': [{'property': 'name', 'type': 'STRING'}], 'labels': 'Movie'}, {'properties': [{'property': 'name', 'type': 'STRING'}], 'labels': 'Actor'}]\n", "Movie {name: STRING},Actor {name: STRING}\n",
"Relationship properties are the following:\n", "Relationship properties are the following:\n",
" []\n", "\n",
"The relationships are the following:\n", "The relationships are the following:\n",
" ['(:Actor)-[:ACTED_IN]->(:Movie)']\n", "(:Actor)-[:ACTED_IN]->(:Movie)\n"
" \n"
] ]
} }
], ],
@ -557,11 +546,11 @@
"output_type": "stream", "output_type": "stream",
"text": [ "text": [
"Node properties are the following:\n", "Node properties are the following:\n",
" {'Actor': [{'property': 'name', 'type': 'STRING'}]}\n", "Actor {name: STRING}\n",
"Relationships properties are the following: \n", "Relationship properties are the following:\n",
" {}\n", "\n",
"Relationships are: \n", "The relationships are the following:\n",
"[]\n" "\n"
] ]
} }
], ],
@ -656,7 +645,7 @@
"name": "python", "name": "python",
"nbconvert_exporter": "python", "nbconvert_exporter": "python",
"pygments_lexer": "ipython3", "pygments_lexer": "ipython3",
"version": "3.8.8" "version": "3.10.13"
} }
}, },
"nbformat": 4, "nbformat": 4,

View File

@ -46,7 +46,7 @@ def construct_schema(
def filter_func(x: str) -> bool: def filter_func(x: str) -> bool:
return x in include_types if include_types else x not in exclude_types return x in include_types if include_types else x not in exclude_types
filtered_schema = { filtered_schema: Dict[str, Any] = {
"node_props": { "node_props": {
k: v k: v
for k, v in structured_schema.get("node_props", {}).items() for k, v in structured_schema.get("node_props", {}).items()
@ -64,16 +64,37 @@ def construct_schema(
], ],
} }
return ( # Format node properties
f"Node properties are the following: \n {filtered_schema['node_props']}\n" formatted_node_props = []
f"Relationships properties are the following: \n {filtered_schema['rel_props']}" for label, properties in filtered_schema["node_props"].items():
"\nRelationships are: \n" props_str = ", ".join(
+ str( [f"{prop['property']}: {prop['type']}" for prop in properties]
[ )
formatted_node_props.append(f"{label} {{{props_str}}}")
# Format relationship properties
formatted_rel_props = []
for rel_type, properties in filtered_schema["rel_props"].items():
props_str = ", ".join(
[f"{prop['property']}: {prop['type']}" for prop in properties]
)
formatted_rel_props.append(f"{rel_type} {{{props_str}}}")
# Format relationships
formatted_rels = [
f"(:{el['start']})-[:{el['type']}]->(:{el['end']})" f"(:{el['start']})-[:{el['type']}]->(:{el['end']})"
for el in filtered_schema["relationships"] for el in filtered_schema["relationships"]
] ]
)
return "\n".join(
[
"Node properties are the following:",
",".join(formatted_node_props),
"Relationship properties are the following:",
",".join(formatted_rel_props),
"The relationships are the following:",
",".join(formatted_rels),
]
) )

View File

@ -127,14 +127,38 @@ class Neo4jGraph(GraphStore):
"rel_props": {el["type"]: el["properties"] for el in rel_properties}, "rel_props": {el["type"]: el["properties"] for el in rel_properties},
"relationships": relationships, "relationships": relationships,
} }
self.schema = f"""
Node properties are the following: # Format node properties
{node_properties} formatted_node_props = []
Relationship properties are the following: for el in node_properties:
{rel_properties} props_str = ", ".join(
The relationships are the following: [f"{prop['property']}: {prop['type']}" for prop in el["properties"]]
{[f"(:{el['start']})-[:{el['type']}]->(:{el['end']})" for el in relationships]} )
""" formatted_node_props.append(f"{el['labels']} {{{props_str}}}")
# Format relationship properties
formatted_rel_props = []
for el in rel_properties:
props_str = ", ".join(
[f"{prop['property']}: {prop['type']}" for prop in el["properties"]]
)
formatted_rel_props.append(f"{el['type']} {{{props_str}}}")
# Format relationships
formatted_rels = [
f"(:{el['start']})-[:{el['type']}]->(:{el['end']})" for el in relationships
]
self.schema = "\n".join(
[
"Node properties are the following:",
",".join(formatted_node_props),
"Relationship properties are the following:",
",".join(formatted_rel_props),
"The relationships are the following:",
",".join(formatted_rels),
]
)
def add_graph_documents( def add_graph_documents(
self, graph_documents: List[GraphDocument], include_source: bool = False self, graph_documents: List[GraphDocument], include_source: bool = False

View File

@ -146,11 +146,23 @@ def test_cypher_intermediate_steps() -> None:
assert output["result"] == expected_output assert output["result"] == expected_output
query = output["intermediate_steps"][0]["query"] query = output["intermediate_steps"][0]["query"]
expected_query = ( # LLM can return variations of the same query
expected_queries = [
(
"\n\nMATCH (a:Actor)-[:ACTED_IN]->" "\n\nMATCH (a:Actor)-[:ACTED_IN]->"
"(m:Movie {title: 'Pulp Fiction'}) RETURN a.name" "(m:Movie {title: 'Pulp Fiction'}) RETURN a.name"
) ),
assert query == expected_query (
"\n\nMATCH (a:Actor)-[:ACTED_IN]->"
"(m:Movie {title: 'Pulp Fiction'}) RETURN a.name;"
),
(
"\n\nMATCH (a:Actor)-[:ACTED_IN]->"
"(m:Movie) WHERE m.title = 'Pulp Fiction' RETURN a.name"
),
]
assert query in expected_queries
context = output["intermediate_steps"][1]["context"] context = output["intermediate_steps"][1]["context"]
expected_context = [{"a.name": "Bruce Willis"}] expected_context = [{"a.name": "Bruce Willis"}]
@ -308,13 +320,11 @@ def test_exclude_types() -> None:
) )
expected_schema = ( expected_schema = (
"Node properties are the following:\n" "Node properties are the following:\n"
" {'Movie': [{'property': 'title', 'type': 'STRING'}], " "Movie {title: STRING},Actor {name: STRING}\n"
"'Actor': [{'property': 'name', 'type': 'STRING'}]}\n" "Relationship properties are the following:\n\n"
"Relationships properties are the following: \n" "The relationships are the following:\n"
" {}\nRelationships are: \n" "(:Actor)-[:ACTED_IN]->(:Movie)"
"['(:Actor)-[:ACTED_IN]->(:Movie)']"
) )
assert chain.graph_schema == expected_schema assert chain.graph_schema == expected_schema
@ -348,11 +358,10 @@ def test_include_types() -> None:
) )
expected_schema = ( expected_schema = (
"Node properties are the following:\n" "Node properties are the following:\n"
" {'Movie': [{'property': 'title', 'type': 'STRING'}], " "Movie {title: STRING},Actor {name: STRING}\n"
"'Actor': [{'property': 'name', 'type': 'STRING'}]}\n" "Relationship properties are the following:\n\n"
"Relationships properties are the following: \n" "The relationships are the following:\n"
" {}\nRelationships are: \n" "(:Actor)-[:ACTED_IN]->(:Movie)"
"['(:Actor)-[:ACTED_IN]->(:Movie)']"
) )
assert chain.graph_schema == expected_schema assert chain.graph_schema == expected_schema
@ -388,10 +397,8 @@ def test_include_types2() -> None:
) )
expected_schema = ( expected_schema = (
"Node properties are the following:\n" "Node properties are the following:\n"
" {'Movie': [{'property': 'title', 'type': 'STRING'}]}\n" "Movie {title: STRING}\n"
"Relationships properties are the following: \n" "Relationship properties are the following:\n\n"
" {}\nRelationships are: \n" "The relationships are the following:\n"
"[]"
) )
assert chain.graph_schema == expected_schema assert chain.graph_schema == expected_schema

View File

@ -152,16 +152,18 @@ def test_graph_cypher_qa_chain() -> None:
readonlymemory = ReadOnlySharedMemory(memory=memory) readonlymemory = ReadOnlySharedMemory(memory=memory)
prompt1 = ( prompt1 = (
"You are a nice chatbot having a conversation with a human.\n\n " "You are a nice chatbot having a conversation with a human.\n\n "
"Schema:\n Node properties are the following: \n {}\nRelationships " "Schema:\n Node properties are the following:\n\nRelationship "
"properties are the following: \n {}\nRelationships are: \n[]\n\n " "properties are the following:\n\nThe relationships are the "
"following:\n\n\n "
"Previous conversation:\n \n\n New human question: " "Previous conversation:\n \n\n New human question: "
"Test question\n Response:" "Test question\n Response:"
) )
prompt2 = ( prompt2 = (
"You are a nice chatbot having a conversation with a human.\n\n " "You are a nice chatbot having a conversation with a human.\n\n "
"Schema:\n Node properties are the following: \n {}\nRelationships " "Schema:\n Node properties are the following:\n\nRelationship "
"properties are the following: \n {}\nRelationships are: \n[]\n\n " "properties are the following:\n\nThe relationships are the "
"following:\n\n\n "
"Previous conversation:\n Human: Test question\nAI: foo\n\n " "Previous conversation:\n Human: Test question\nAI: foo\n\n "
"New human question: Test new question\n Response:" "New human question: Test new question\n Response:"
) )
@ -214,11 +216,10 @@ def test_exclude_types() -> None:
output = construct_schema(structured_schema, [], exclude_types) output = construct_schema(structured_schema, [], exclude_types)
expected_schema = ( expected_schema = (
"Node properties are the following:\n" "Node properties are the following:\n"
" {'Movie': [{'property': 'title', 'type': 'STRING'}], " "Movie {title: STRING},Actor {name: STRING}\n"
"'Actor': [{'property': 'name', 'type': 'STRING'}]}\n" "Relationship properties are the following:\n\n"
"Relationships properties are the following: \n" "The relationships are the following:\n"
" {}\nRelationships are: \n" "(:Actor)-[:ACTED_IN]->(:Movie)"
"['(:Actor)-[:ACTED_IN]->(:Movie)']"
) )
assert output == expected_schema assert output == expected_schema
@ -240,11 +241,10 @@ def test_include_types() -> None:
output = construct_schema(structured_schema, include_types, []) output = construct_schema(structured_schema, include_types, [])
expected_schema = ( expected_schema = (
"Node properties are the following:\n" "Node properties are the following:\n"
" {'Movie': [{'property': 'title', 'type': 'STRING'}], " "Movie {title: STRING},Actor {name: STRING}\n"
"'Actor': [{'property': 'name', 'type': 'STRING'}]}\n" "Relationship properties are the following:\n\n"
"Relationships properties are the following: \n" "The relationships are the following:\n"
" {}\nRelationships are: \n" "(:Actor)-[:ACTED_IN]->(:Movie)"
"['(:Actor)-[:ACTED_IN]->(:Movie)']"
) )
assert output == expected_schema assert output == expected_schema
@ -266,11 +266,9 @@ def test_include_types2() -> None:
output = construct_schema(structured_schema, include_types, []) output = construct_schema(structured_schema, include_types, [])
expected_schema = ( expected_schema = (
"Node properties are the following:\n" "Node properties are the following:\n"
" {'Movie': [{'property': 'title', 'type': 'STRING'}], " "Movie {title: STRING},Actor {name: STRING}\n"
"'Actor': [{'property': 'name', 'type': 'STRING'}]}\n" "Relationship properties are the following:\n\n"
"Relationships properties are the following: \n" "The relationships are the following:\n"
" {}\nRelationships are: \n"
"[]"
) )
assert output == expected_schema assert output == expected_schema
@ -292,11 +290,10 @@ def test_include_types3() -> None:
output = construct_schema(structured_schema, include_types, []) output = construct_schema(structured_schema, include_types, [])
expected_schema = ( expected_schema = (
"Node properties are the following:\n" "Node properties are the following:\n"
" {'Movie': [{'property': 'title', 'type': 'STRING'}], " "Movie {title: STRING},Actor {name: STRING}\n"
"'Actor': [{'property': 'name', 'type': 'STRING'}]}\n" "Relationship properties are the following:\n\n"
"Relationships properties are the following: \n" "The relationships are the following:\n"
" {}\nRelationships are: \n" "(:Actor)-[:ACTED_IN]->(:Movie)"
"['(:Actor)-[:ACTED_IN]->(:Movie)']"
) )
assert output == expected_schema assert output == expected_schema