langchain[minor]: Reduce the number of tokens required to describe a Cypher/Neo4j schema (#13851)

Instead of using JSON-like syntax to describe node and relationship properties we changed to a shorter and more concise schema description Old: ``` Node properties are the following: [{'properties': [{'property': 'name', 'type': 'STRING'}], 'labels': 'Movie'}, {'properties': [{'property': 'name', 'type': 'STRING'}], 'labels': 'Actor'}] Relationship properties are the following: [] The relationships are the following: ['(:Actor)-[:ACTED_IN]->(:Movie)'] ``` New: ``` Node properties are the following: Movie {name: STRING},Actor {name: STRING} Relationship properties are the following: The relationships are the following: (:Actor)-[:ACTED_IN]->(:Movie) ```
2025-06-29 09:58:44 +00:00 · 2023-11-29 20:13:12 +01:00 · 2023-11-29 20:13:12 +01:00 · 3eb391561b
commit 3eb391561b
parent 7ec4dbeb80
5 changed files with 132 additions and 94 deletions
--- a/docs/docs/use_cases/graph/graph_cypher_qa.ipynb
+++ b/docs/docs/use_cases/graph/graph_cypher_qa.ipynb
@ -48,16 +48,7 @@
   "execution_count": 2,
   "id": "0928915d",
   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/home/tomaz/neo4j/langchain/libs/langchain/langchain/graphs/neo4j_graph.py:52: ExperimentalWarning: The configuration may change in the future.\n",
-      "  self._driver.verify_connectivity()\n"
-     ]
-    }
-   ],
+   "outputs": [],
   "source": [
    "graph = Neo4jGraph(\n",
    "    url=\"bolt://localhost:7687\", username=\"neo4j\", password=\"pleaseletmein\"\n",
@ -132,14 +123,12 @@
     "name": "stdout",
     "output_type": "stream",
     "text": [
+      "Node properties are the following:\n",
+      "Movie {name: STRING},Actor {name: STRING}\n",
+      "Relationship properties are the following:\n",
      "\n",
-      "        Node properties are the following:\n",
-      "        [{'properties': [{'property': 'name', 'type': 'STRING'}], 'labels': 'Movie'}, {'properties': [{'property': 'name', 'type': 'STRING'}], 'labels': 'Actor'}]\n",
-      "        Relationship properties are the following:\n",
-      "        []\n",
-      "        The relationships are the following:\n",
-      "        ['(:Actor)-[:ACTED_IN]->(:Movie)']\n",
-      "        \n"
+      "The relationships are the following:\n",
+      "(:Actor)-[:ACTED_IN]->(:Movie)\n"
     ]
    }
   ],
@ -556,12 +545,12 @@
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "Node properties are the following: \n",
-      " {'Actor': [{'property': 'name', 'type': 'STRING'}]}\n",
-      "Relationships properties are the following: \n",
-      " {}\n",
-      "Relationships are: \n",
-      "[]\n"
+      "Node properties are the following:\n",
+      "Actor {name: STRING}\n",
+      "Relationship properties are the following:\n",
+      "\n",
+      "The relationships are the following:\n",
+      "\n"
     ]
    }
   ],
@ -656,7 +645,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.8.8"
+   "version": "3.10.13"
  }
 },
 "nbformat": 4,
--- a/libs/langchain/langchain/chains/graph_qa/cypher.py
+++ b/libs/langchain/langchain/chains/graph_qa/cypher.py
@ -46,7 +46,7 @@ def construct_schema(
    def filter_func(x: str) -> bool:
        return x in include_types if include_types else x not in exclude_types

-    filtered_schema = {
+    filtered_schema: Dict[str, Any] = {
        "node_props": {
            k: v
            for k, v in structured_schema.get("node_props", {}).items()
@ -64,16 +64,37 @@ def construct_schema(
        ],
    }

-    return (
-        f"Node properties are the following: \n {filtered_schema['node_props']}\n"
-        f"Relationships properties are the following: \n {filtered_schema['rel_props']}"
-        "\nRelationships are: \n"
-        + str(
-            [
-                f"(:{el['start']})-[:{el['type']}]->(:{el['end']})"
-                for el in filtered_schema["relationships"]
-            ]
+    # Format node properties
+    formatted_node_props = []
+    for label, properties in filtered_schema["node_props"].items():
+        props_str = ", ".join(
+            [f"{prop['property']}: {prop['type']}" for prop in properties]
        )
+        formatted_node_props.append(f"{label} {{{props_str}}}")
+
+    # Format relationship properties
+    formatted_rel_props = []
+    for rel_type, properties in filtered_schema["rel_props"].items():
+        props_str = ", ".join(
+            [f"{prop['property']}: {prop['type']}" for prop in properties]
+        )
+        formatted_rel_props.append(f"{rel_type} {{{props_str}}}")
+
+    # Format relationships
+    formatted_rels = [
+        f"(:{el['start']})-[:{el['type']}]->(:{el['end']})"
+        for el in filtered_schema["relationships"]
+    ]
+
+    return "\n".join(
+        [
+            "Node properties are the following:",
+            ",".join(formatted_node_props),
+            "Relationship properties are the following:",
+            ",".join(formatted_rel_props),
+            "The relationships are the following:",
+            ",".join(formatted_rels),
+        ]
    )


--- a/libs/langchain/langchain/graphs/neo4j_graph.py
+++ b/libs/langchain/langchain/graphs/neo4j_graph.py
@ -127,14 +127,38 @@ class Neo4jGraph(GraphStore):
            "rel_props": {el["type"]: el["properties"] for el in rel_properties},
            "relationships": relationships,
        }
-        self.schema = f"""
-        Node properties are the following:
-        {node_properties}
-        Relationship properties are the following:
-        {rel_properties}
-        The relationships are the following:
-        {[f"(:{el['start']})-[:{el['type']}]->(:{el['end']})" for el in relationships]}
-        """
+
+        # Format node properties
+        formatted_node_props = []
+        for el in node_properties:
+            props_str = ", ".join(
+                [f"{prop['property']}: {prop['type']}" for prop in el["properties"]]
+            )
+            formatted_node_props.append(f"{el['labels']} {{{props_str}}}")
+
+        # Format relationship properties
+        formatted_rel_props = []
+        for el in rel_properties:
+            props_str = ", ".join(
+                [f"{prop['property']}: {prop['type']}" for prop in el["properties"]]
+            )
+            formatted_rel_props.append(f"{el['type']} {{{props_str}}}")
+
+        # Format relationships
+        formatted_rels = [
+            f"(:{el['start']})-[:{el['type']}]->(:{el['end']})" for el in relationships
+        ]
+
+        self.schema = "\n".join(
+            [
+                "Node properties are the following:",
+                ",".join(formatted_node_props),
+                "Relationship properties are the following:",
+                ",".join(formatted_rel_props),
+                "The relationships are the following:",
+                ",".join(formatted_rels),
+            ]
+        )

    def add_graph_documents(
        self, graph_documents: List[GraphDocument], include_source: bool = False
--- a/libs/langchain/tests/integration_tests/chains/test_graph_database.py
+++ b/libs/langchain/tests/integration_tests/chains/test_graph_database.py
@ -146,11 +146,23 @@ def test_cypher_intermediate_steps() -> None:
    assert output["result"] == expected_output

    query = output["intermediate_steps"][0]["query"]
-    expected_query = (
-        "\n\nMATCH (a:Actor)-[:ACTED_IN]->"
-        "(m:Movie {title: 'Pulp Fiction'}) RETURN a.name"
-    )
-    assert query == expected_query
+    # LLM can return variations of the same query
+    expected_queries = [
+        (
+            "\n\nMATCH (a:Actor)-[:ACTED_IN]->"
+            "(m:Movie {title: 'Pulp Fiction'}) RETURN a.name"
+        ),
+        (
+            "\n\nMATCH (a:Actor)-[:ACTED_IN]->"
+            "(m:Movie {title: 'Pulp Fiction'}) RETURN a.name;"
+        ),
+        (
+            "\n\nMATCH (a:Actor)-[:ACTED_IN]->"
+            "(m:Movie) WHERE m.title = 'Pulp Fiction' RETURN a.name"
+        ),
+    ]
+
+    assert query in expected_queries

    context = output["intermediate_steps"][1]["context"]
    expected_context = [{"a.name": "Bruce Willis"}]
@ -307,14 +319,12 @@ def test_exclude_types() -> None:
        OpenAI(temperature=0), graph=graph, exclude_types=["Person", "DIRECTED"]
    )
    expected_schema = (
-        "Node properties are the following: \n"
-        " {'Movie': [{'property': 'title', 'type': 'STRING'}], "
-        "'Actor': [{'property': 'name', 'type': 'STRING'}]}\n"
-        "Relationships properties are the following: \n"
-        " {}\nRelationships are: \n"
-        "['(:Actor)-[:ACTED_IN]->(:Movie)']"
+        "Node properties are the following:\n"
+        "Movie {title: STRING},Actor {name: STRING}\n"
+        "Relationship properties are the following:\n\n"
+        "The relationships are the following:\n"
+        "(:Actor)-[:ACTED_IN]->(:Movie)"
    )
-
    assert chain.graph_schema == expected_schema


@ -347,12 +357,11 @@ def test_include_types() -> None:
        OpenAI(temperature=0), graph=graph, include_types=["Movie", "Actor", "ACTED_IN"]
    )
    expected_schema = (
-        "Node properties are the following: \n"
-        " {'Movie': [{'property': 'title', 'type': 'STRING'}], "
-        "'Actor': [{'property': 'name', 'type': 'STRING'}]}\n"
-        "Relationships properties are the following: \n"
-        " {}\nRelationships are: \n"
-        "['(:Actor)-[:ACTED_IN]->(:Movie)']"
+        "Node properties are the following:\n"
+        "Movie {title: STRING},Actor {name: STRING}\n"
+        "Relationship properties are the following:\n\n"
+        "The relationships are the following:\n"
+        "(:Actor)-[:ACTED_IN]->(:Movie)"
    )

    assert chain.graph_schema == expected_schema
@ -387,11 +396,9 @@ def test_include_types2() -> None:
        OpenAI(temperature=0), graph=graph, include_types=["Movie", "ACTED_IN"]
    )
    expected_schema = (
-        "Node properties are the following: \n"
-        " {'Movie': [{'property': 'title', 'type': 'STRING'}]}\n"
-        "Relationships properties are the following: \n"
-        " {}\nRelationships are: \n"
-        "[]"
+        "Node properties are the following:\n"
+        "Movie {title: STRING}\n"
+        "Relationship properties are the following:\n\n"
+        "The relationships are the following:\n"
    )
-
    assert chain.graph_schema == expected_schema
--- a/libs/langchain/tests/unit_tests/chains/test_graph_qa.py
+++ b/libs/langchain/tests/unit_tests/chains/test_graph_qa.py
@ -152,16 +152,18 @@ def test_graph_cypher_qa_chain() -> None:
    readonlymemory = ReadOnlySharedMemory(memory=memory)
    prompt1 = (
        "You are a nice chatbot having a conversation with a human.\n\n    "
-        "Schema:\n    Node properties are the following: \n {}\nRelationships "
-        "properties are the following: \n {}\nRelationships are: \n[]\n\n    "
+        "Schema:\n    Node properties are the following:\n\nRelationship "
+        "properties are the following:\n\nThe relationships are the "
+        "following:\n\n\n    "
        "Previous conversation:\n    \n\n    New human question: "
        "Test question\n    Response:"
    )

    prompt2 = (
        "You are a nice chatbot having a conversation with a human.\n\n    "
-        "Schema:\n    Node properties are the following: \n {}\nRelationships "
-        "properties are the following: \n {}\nRelationships are: \n[]\n\n    "
+        "Schema:\n    Node properties are the following:\n\nRelationship "
+        "properties are the following:\n\nThe relationships are the "
+        "following:\n\n\n    "
        "Previous conversation:\n    Human: Test question\nAI: foo\n\n    "
        "New human question: Test new question\n    Response:"
    )
@ -213,12 +215,11 @@ def test_exclude_types() -> None:
    exclude_types = ["Person", "DIRECTED"]
    output = construct_schema(structured_schema, [], exclude_types)
    expected_schema = (
-        "Node properties are the following: \n"
-        " {'Movie': [{'property': 'title', 'type': 'STRING'}], "
-        "'Actor': [{'property': 'name', 'type': 'STRING'}]}\n"
-        "Relationships properties are the following: \n"
-        " {}\nRelationships are: \n"
-        "['(:Actor)-[:ACTED_IN]->(:Movie)']"
+        "Node properties are the following:\n"
+        "Movie {title: STRING},Actor {name: STRING}\n"
+        "Relationship properties are the following:\n\n"
+        "The relationships are the following:\n"
+        "(:Actor)-[:ACTED_IN]->(:Movie)"
    )
    assert output == expected_schema

@ -239,12 +240,11 @@ def test_include_types() -> None:
    include_types = ["Movie", "Actor", "ACTED_IN"]
    output = construct_schema(structured_schema, include_types, [])
    expected_schema = (
-        "Node properties are the following: \n"
-        " {'Movie': [{'property': 'title', 'type': 'STRING'}], "
-        "'Actor': [{'property': 'name', 'type': 'STRING'}]}\n"
-        "Relationships properties are the following: \n"
-        " {}\nRelationships are: \n"
-        "['(:Actor)-[:ACTED_IN]->(:Movie)']"
+        "Node properties are the following:\n"
+        "Movie {title: STRING},Actor {name: STRING}\n"
+        "Relationship properties are the following:\n\n"
+        "The relationships are the following:\n"
+        "(:Actor)-[:ACTED_IN]->(:Movie)"
    )
    assert output == expected_schema

@ -265,12 +265,10 @@ def test_include_types2() -> None:
    include_types = ["Movie", "Actor"]
    output = construct_schema(structured_schema, include_types, [])
    expected_schema = (
-        "Node properties are the following: \n"
-        " {'Movie': [{'property': 'title', 'type': 'STRING'}], "
-        "'Actor': [{'property': 'name', 'type': 'STRING'}]}\n"
-        "Relationships properties are the following: \n"
-        " {}\nRelationships are: \n"
-        "[]"
+        "Node properties are the following:\n"
+        "Movie {title: STRING},Actor {name: STRING}\n"
+        "Relationship properties are the following:\n\n"
+        "The relationships are the following:\n"
    )
    assert output == expected_schema

@ -291,12 +289,11 @@ def test_include_types3() -> None:
    include_types = ["Movie", "Actor", "ACTED_IN"]
    output = construct_schema(structured_schema, include_types, [])
    expected_schema = (
-        "Node properties are the following: \n"
-        " {'Movie': [{'property': 'title', 'type': 'STRING'}], "
-        "'Actor': [{'property': 'name', 'type': 'STRING'}]}\n"
-        "Relationships properties are the following: \n"
-        " {}\nRelationships are: \n"
-        "['(:Actor)-[:ACTED_IN]->(:Movie)']"
+        "Node properties are the following:\n"
+        "Movie {title: STRING},Actor {name: STRING}\n"
+        "Relationship properties are the following:\n\n"
+        "The relationships are the following:\n"
+        "(:Actor)-[:ACTED_IN]->(:Movie)"
    )
    assert output == expected_schema