langchain[minor]: Reduce the number of tokens required to describe a Cypher/Neo4j schema (#13851)

Instead of using JSON-like syntax to describe node and relationship
properties we changed to a shorter and more concise schema description

Old:

```
        Node properties are the following:
        [{'properties': [{'property': 'name', 'type': 'STRING'}], 'labels': 'Movie'}, {'properties': [{'property': 'name', 'type': 'STRING'}], 'labels': 'Actor'}]
        Relationship properties are the following:
        []
        The relationships are the following:
        ['(:Actor)-[:ACTED_IN]->(:Movie)']
```

New:

```
Node properties are the following:
Movie {name: STRING},Actor {name: STRING}
Relationship properties are the following:

The relationships are the following:
(:Actor)-[:ACTED_IN]->(:Movie)
```
This commit is contained in:
Tomaz Bratanic 2023-11-29 20:13:12 +01:00 committed by GitHub
parent 7ec4dbeb80
commit 3eb391561b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 132 additions and 94 deletions

View File

@ -48,16 +48,7 @@
"execution_count": 2,
"id": "0928915d",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/home/tomaz/neo4j/langchain/libs/langchain/langchain/graphs/neo4j_graph.py:52: ExperimentalWarning: The configuration may change in the future.\n",
" self._driver.verify_connectivity()\n"
]
}
],
"outputs": [],
"source": [
"graph = Neo4jGraph(\n",
" url=\"bolt://localhost:7687\", username=\"neo4j\", password=\"pleaseletmein\"\n",
@ -132,14 +123,12 @@
"name": "stdout",
"output_type": "stream",
"text": [
"Node properties are the following:\n",
"Movie {name: STRING},Actor {name: STRING}\n",
"Relationship properties are the following:\n",
"\n",
" Node properties are the following:\n",
" [{'properties': [{'property': 'name', 'type': 'STRING'}], 'labels': 'Movie'}, {'properties': [{'property': 'name', 'type': 'STRING'}], 'labels': 'Actor'}]\n",
" Relationship properties are the following:\n",
" []\n",
" The relationships are the following:\n",
" ['(:Actor)-[:ACTED_IN]->(:Movie)']\n",
" \n"
"The relationships are the following:\n",
"(:Actor)-[:ACTED_IN]->(:Movie)\n"
]
}
],
@ -556,12 +545,12 @@
"name": "stdout",
"output_type": "stream",
"text": [
"Node properties are the following: \n",
" {'Actor': [{'property': 'name', 'type': 'STRING'}]}\n",
"Relationships properties are the following: \n",
" {}\n",
"Relationships are: \n",
"[]\n"
"Node properties are the following:\n",
"Actor {name: STRING}\n",
"Relationship properties are the following:\n",
"\n",
"The relationships are the following:\n",
"\n"
]
}
],
@ -656,7 +645,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.8"
"version": "3.10.13"
}
},
"nbformat": 4,

View File

@ -46,7 +46,7 @@ def construct_schema(
def filter_func(x: str) -> bool:
return x in include_types if include_types else x not in exclude_types
filtered_schema = {
filtered_schema: Dict[str, Any] = {
"node_props": {
k: v
for k, v in structured_schema.get("node_props", {}).items()
@ -64,16 +64,37 @@ def construct_schema(
],
}
return (
f"Node properties are the following: \n {filtered_schema['node_props']}\n"
f"Relationships properties are the following: \n {filtered_schema['rel_props']}"
"\nRelationships are: \n"
+ str(
[
f"(:{el['start']})-[:{el['type']}]->(:{el['end']})"
for el in filtered_schema["relationships"]
]
# Format node properties
formatted_node_props = []
for label, properties in filtered_schema["node_props"].items():
props_str = ", ".join(
[f"{prop['property']}: {prop['type']}" for prop in properties]
)
formatted_node_props.append(f"{label} {{{props_str}}}")
# Format relationship properties
formatted_rel_props = []
for rel_type, properties in filtered_schema["rel_props"].items():
props_str = ", ".join(
[f"{prop['property']}: {prop['type']}" for prop in properties]
)
formatted_rel_props.append(f"{rel_type} {{{props_str}}}")
# Format relationships
formatted_rels = [
f"(:{el['start']})-[:{el['type']}]->(:{el['end']})"
for el in filtered_schema["relationships"]
]
return "\n".join(
[
"Node properties are the following:",
",".join(formatted_node_props),
"Relationship properties are the following:",
",".join(formatted_rel_props),
"The relationships are the following:",
",".join(formatted_rels),
]
)

View File

@ -127,14 +127,38 @@ class Neo4jGraph(GraphStore):
"rel_props": {el["type"]: el["properties"] for el in rel_properties},
"relationships": relationships,
}
self.schema = f"""
Node properties are the following:
{node_properties}
Relationship properties are the following:
{rel_properties}
The relationships are the following:
{[f"(:{el['start']})-[:{el['type']}]->(:{el['end']})" for el in relationships]}
"""
# Format node properties
formatted_node_props = []
for el in node_properties:
props_str = ", ".join(
[f"{prop['property']}: {prop['type']}" for prop in el["properties"]]
)
formatted_node_props.append(f"{el['labels']} {{{props_str}}}")
# Format relationship properties
formatted_rel_props = []
for el in rel_properties:
props_str = ", ".join(
[f"{prop['property']}: {prop['type']}" for prop in el["properties"]]
)
formatted_rel_props.append(f"{el['type']} {{{props_str}}}")
# Format relationships
formatted_rels = [
f"(:{el['start']})-[:{el['type']}]->(:{el['end']})" for el in relationships
]
self.schema = "\n".join(
[
"Node properties are the following:",
",".join(formatted_node_props),
"Relationship properties are the following:",
",".join(formatted_rel_props),
"The relationships are the following:",
",".join(formatted_rels),
]
)
def add_graph_documents(
self, graph_documents: List[GraphDocument], include_source: bool = False

View File

@ -146,11 +146,23 @@ def test_cypher_intermediate_steps() -> None:
assert output["result"] == expected_output
query = output["intermediate_steps"][0]["query"]
expected_query = (
"\n\nMATCH (a:Actor)-[:ACTED_IN]->"
"(m:Movie {title: 'Pulp Fiction'}) RETURN a.name"
)
assert query == expected_query
# LLM can return variations of the same query
expected_queries = [
(
"\n\nMATCH (a:Actor)-[:ACTED_IN]->"
"(m:Movie {title: 'Pulp Fiction'}) RETURN a.name"
),
(
"\n\nMATCH (a:Actor)-[:ACTED_IN]->"
"(m:Movie {title: 'Pulp Fiction'}) RETURN a.name;"
),
(
"\n\nMATCH (a:Actor)-[:ACTED_IN]->"
"(m:Movie) WHERE m.title = 'Pulp Fiction' RETURN a.name"
),
]
assert query in expected_queries
context = output["intermediate_steps"][1]["context"]
expected_context = [{"a.name": "Bruce Willis"}]
@ -307,14 +319,12 @@ def test_exclude_types() -> None:
OpenAI(temperature=0), graph=graph, exclude_types=["Person", "DIRECTED"]
)
expected_schema = (
"Node properties are the following: \n"
" {'Movie': [{'property': 'title', 'type': 'STRING'}], "
"'Actor': [{'property': 'name', 'type': 'STRING'}]}\n"
"Relationships properties are the following: \n"
" {}\nRelationships are: \n"
"['(:Actor)-[:ACTED_IN]->(:Movie)']"
"Node properties are the following:\n"
"Movie {title: STRING},Actor {name: STRING}\n"
"Relationship properties are the following:\n\n"
"The relationships are the following:\n"
"(:Actor)-[:ACTED_IN]->(:Movie)"
)
assert chain.graph_schema == expected_schema
@ -347,12 +357,11 @@ def test_include_types() -> None:
OpenAI(temperature=0), graph=graph, include_types=["Movie", "Actor", "ACTED_IN"]
)
expected_schema = (
"Node properties are the following: \n"
" {'Movie': [{'property': 'title', 'type': 'STRING'}], "
"'Actor': [{'property': 'name', 'type': 'STRING'}]}\n"
"Relationships properties are the following: \n"
" {}\nRelationships are: \n"
"['(:Actor)-[:ACTED_IN]->(:Movie)']"
"Node properties are the following:\n"
"Movie {title: STRING},Actor {name: STRING}\n"
"Relationship properties are the following:\n\n"
"The relationships are the following:\n"
"(:Actor)-[:ACTED_IN]->(:Movie)"
)
assert chain.graph_schema == expected_schema
@ -387,11 +396,9 @@ def test_include_types2() -> None:
OpenAI(temperature=0), graph=graph, include_types=["Movie", "ACTED_IN"]
)
expected_schema = (
"Node properties are the following: \n"
" {'Movie': [{'property': 'title', 'type': 'STRING'}]}\n"
"Relationships properties are the following: \n"
" {}\nRelationships are: \n"
"[]"
"Node properties are the following:\n"
"Movie {title: STRING}\n"
"Relationship properties are the following:\n\n"
"The relationships are the following:\n"
)
assert chain.graph_schema == expected_schema

View File

@ -152,16 +152,18 @@ def test_graph_cypher_qa_chain() -> None:
readonlymemory = ReadOnlySharedMemory(memory=memory)
prompt1 = (
"You are a nice chatbot having a conversation with a human.\n\n "
"Schema:\n Node properties are the following: \n {}\nRelationships "
"properties are the following: \n {}\nRelationships are: \n[]\n\n "
"Schema:\n Node properties are the following:\n\nRelationship "
"properties are the following:\n\nThe relationships are the "
"following:\n\n\n "
"Previous conversation:\n \n\n New human question: "
"Test question\n Response:"
)
prompt2 = (
"You are a nice chatbot having a conversation with a human.\n\n "
"Schema:\n Node properties are the following: \n {}\nRelationships "
"properties are the following: \n {}\nRelationships are: \n[]\n\n "
"Schema:\n Node properties are the following:\n\nRelationship "
"properties are the following:\n\nThe relationships are the "
"following:\n\n\n "
"Previous conversation:\n Human: Test question\nAI: foo\n\n "
"New human question: Test new question\n Response:"
)
@ -213,12 +215,11 @@ def test_exclude_types() -> None:
exclude_types = ["Person", "DIRECTED"]
output = construct_schema(structured_schema, [], exclude_types)
expected_schema = (
"Node properties are the following: \n"
" {'Movie': [{'property': 'title', 'type': 'STRING'}], "
"'Actor': [{'property': 'name', 'type': 'STRING'}]}\n"
"Relationships properties are the following: \n"
" {}\nRelationships are: \n"
"['(:Actor)-[:ACTED_IN]->(:Movie)']"
"Node properties are the following:\n"
"Movie {title: STRING},Actor {name: STRING}\n"
"Relationship properties are the following:\n\n"
"The relationships are the following:\n"
"(:Actor)-[:ACTED_IN]->(:Movie)"
)
assert output == expected_schema
@ -239,12 +240,11 @@ def test_include_types() -> None:
include_types = ["Movie", "Actor", "ACTED_IN"]
output = construct_schema(structured_schema, include_types, [])
expected_schema = (
"Node properties are the following: \n"
" {'Movie': [{'property': 'title', 'type': 'STRING'}], "
"'Actor': [{'property': 'name', 'type': 'STRING'}]}\n"
"Relationships properties are the following: \n"
" {}\nRelationships are: \n"
"['(:Actor)-[:ACTED_IN]->(:Movie)']"
"Node properties are the following:\n"
"Movie {title: STRING},Actor {name: STRING}\n"
"Relationship properties are the following:\n\n"
"The relationships are the following:\n"
"(:Actor)-[:ACTED_IN]->(:Movie)"
)
assert output == expected_schema
@ -265,12 +265,10 @@ def test_include_types2() -> None:
include_types = ["Movie", "Actor"]
output = construct_schema(structured_schema, include_types, [])
expected_schema = (
"Node properties are the following: \n"
" {'Movie': [{'property': 'title', 'type': 'STRING'}], "
"'Actor': [{'property': 'name', 'type': 'STRING'}]}\n"
"Relationships properties are the following: \n"
" {}\nRelationships are: \n"
"[]"
"Node properties are the following:\n"
"Movie {title: STRING},Actor {name: STRING}\n"
"Relationship properties are the following:\n\n"
"The relationships are the following:\n"
)
assert output == expected_schema
@ -291,12 +289,11 @@ def test_include_types3() -> None:
include_types = ["Movie", "Actor", "ACTED_IN"]
output = construct_schema(structured_schema, include_types, [])
expected_schema = (
"Node properties are the following: \n"
" {'Movie': [{'property': 'title', 'type': 'STRING'}], "
"'Actor': [{'property': 'name', 'type': 'STRING'}]}\n"
"Relationships properties are the following: \n"
" {}\nRelationships are: \n"
"['(:Actor)-[:ACTED_IN]->(:Movie)']"
"Node properties are the following:\n"
"Movie {title: STRING},Actor {name: STRING}\n"
"Relationship properties are the following:\n\n"
"The relationships are the following:\n"
"(:Actor)-[:ACTED_IN]->(:Movie)"
)
assert output == expected_schema