From 3eb391561b016214205bb3ea504c5fe7d3079a4e Mon Sep 17 00:00:00 2001
From: Tomaz Bratanic <bratanic.tomaz@gmail.com>
Date: Wed, 29 Nov 2023 20:13:12 +0100
Subject: [PATCH] langchain[minor]: Reduce the number of tokens required to
 describe a Cypher/Neo4j schema (#13851)

Instead of using JSON-like syntax to describe node and relationship
properties we changed to a shorter and more concise schema description

Old:

```
        Node properties are the following:
        [{'properties': [{'property': 'name', 'type': 'STRING'}], 'labels': 'Movie'}, {'properties': [{'property': 'name', 'type': 'STRING'}], 'labels': 'Actor'}]
        Relationship properties are the following:
        []
        The relationships are the following:
        ['(:Actor)-[:ACTED_IN]->(:Movie)']
```

New:

```
Node properties are the following:
Movie {name: STRING},Actor {name: STRING}
Relationship properties are the following:

The relationships are the following:
(:Actor)-[:ACTED_IN]->(:Movie)
```
---
 .../use_cases/graph/graph_cypher_qa.ipynb     | 37 +++++--------
 .../langchain/chains/graph_qa/cypher.py       | 41 ++++++++++----
 .../langchain/langchain/graphs/neo4j_graph.py | 40 +++++++++++---
 .../chains/test_graph_database.py             | 55 +++++++++++--------
 .../tests/unit_tests/chains/test_graph_qa.py  | 53 +++++++++---------
 5 files changed, 132 insertions(+), 94 deletions(-)

diff --git a/docs/docs/use_cases/graph/graph_cypher_qa.ipynb b/docs/docs/use_cases/graph/graph_cypher_qa.ipynb
index daa06069a2a..56e30288442 100644
--- a/docs/docs/use_cases/graph/graph_cypher_qa.ipynb
+++ b/docs/docs/use_cases/graph/graph_cypher_qa.ipynb
@@ -48,16 +48,7 @@
    "execution_count": 2,
    "id": "0928915d",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/home/tomaz/neo4j/langchain/libs/langchain/langchain/graphs/neo4j_graph.py:52: ExperimentalWarning: The configuration may change in the future.\n",
-      "  self._driver.verify_connectivity()\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "graph = Neo4jGraph(\n",
     "    url=\"bolt://localhost:7687\", username=\"neo4j\", password=\"pleaseletmein\"\n",
@@ -132,14 +123,12 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
+      "Node properties are the following:\n",
+      "Movie {name: STRING},Actor {name: STRING}\n",
+      "Relationship properties are the following:\n",
       "\n",
-      "        Node properties are the following:\n",
-      "        [{'properties': [{'property': 'name', 'type': 'STRING'}], 'labels': 'Movie'}, {'properties': [{'property': 'name', 'type': 'STRING'}], 'labels': 'Actor'}]\n",
-      "        Relationship properties are the following:\n",
-      "        []\n",
-      "        The relationships are the following:\n",
-      "        ['(:Actor)-[:ACTED_IN]->(:Movie)']\n",
-      "        \n"
+      "The relationships are the following:\n",
+      "(:Actor)-[:ACTED_IN]->(:Movie)\n"
      ]
     }
    ],
@@ -556,12 +545,12 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Node properties are the following: \n",
-      " {'Actor': [{'property': 'name', 'type': 'STRING'}]}\n",
-      "Relationships properties are the following: \n",
-      " {}\n",
-      "Relationships are: \n",
-      "[]\n"
+      "Node properties are the following:\n",
+      "Actor {name: STRING}\n",
+      "Relationship properties are the following:\n",
+      "\n",
+      "The relationships are the following:\n",
+      "\n"
      ]
     }
    ],
@@ -656,7 +645,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.8.8"
+   "version": "3.10.13"
   }
  },
  "nbformat": 4,
diff --git a/libs/langchain/langchain/chains/graph_qa/cypher.py b/libs/langchain/langchain/chains/graph_qa/cypher.py
index 91ce4e0454b..1e91d22b07b 100644
--- a/libs/langchain/langchain/chains/graph_qa/cypher.py
+++ b/libs/langchain/langchain/chains/graph_qa/cypher.py
@@ -46,7 +46,7 @@ def construct_schema(
     def filter_func(x: str) -> bool:
         return x in include_types if include_types else x not in exclude_types
 
-    filtered_schema = {
+    filtered_schema: Dict[str, Any] = {
         "node_props": {
             k: v
             for k, v in structured_schema.get("node_props", {}).items()
@@ -64,16 +64,37 @@ def construct_schema(
         ],
     }
 
-    return (
-        f"Node properties are the following: \n {filtered_schema['node_props']}\n"
-        f"Relationships properties are the following: \n {filtered_schema['rel_props']}"
-        "\nRelationships are: \n"
-        + str(
-            [
-                f"(:{el['start']})-[:{el['type']}]->(:{el['end']})"
-                for el in filtered_schema["relationships"]
-            ]
+    # Format node properties
+    formatted_node_props = []
+    for label, properties in filtered_schema["node_props"].items():
+        props_str = ", ".join(
+            [f"{prop['property']}: {prop['type']}" for prop in properties]
         )
+        formatted_node_props.append(f"{label} {{{props_str}}}")
+
+    # Format relationship properties
+    formatted_rel_props = []
+    for rel_type, properties in filtered_schema["rel_props"].items():
+        props_str = ", ".join(
+            [f"{prop['property']}: {prop['type']}" for prop in properties]
+        )
+        formatted_rel_props.append(f"{rel_type} {{{props_str}}}")
+
+    # Format relationships
+    formatted_rels = [
+        f"(:{el['start']})-[:{el['type']}]->(:{el['end']})"
+        for el in filtered_schema["relationships"]
+    ]
+
+    return "\n".join(
+        [
+            "Node properties are the following:",
+            ",".join(formatted_node_props),
+            "Relationship properties are the following:",
+            ",".join(formatted_rel_props),
+            "The relationships are the following:",
+            ",".join(formatted_rels),
+        ]
     )
 
 
diff --git a/libs/langchain/langchain/graphs/neo4j_graph.py b/libs/langchain/langchain/graphs/neo4j_graph.py
index dfbf38bb983..ca78b7a323a 100644
--- a/libs/langchain/langchain/graphs/neo4j_graph.py
+++ b/libs/langchain/langchain/graphs/neo4j_graph.py
@@ -127,14 +127,38 @@ class Neo4jGraph(GraphStore):
             "rel_props": {el["type"]: el["properties"] for el in rel_properties},
             "relationships": relationships,
         }
-        self.schema = f"""
-        Node properties are the following:
-        {node_properties}
-        Relationship properties are the following:
-        {rel_properties}
-        The relationships are the following:
-        {[f"(:{el['start']})-[:{el['type']}]->(:{el['end']})" for el in relationships]}
-        """
+
+        # Format node properties
+        formatted_node_props = []
+        for el in node_properties:
+            props_str = ", ".join(
+                [f"{prop['property']}: {prop['type']}" for prop in el["properties"]]
+            )
+            formatted_node_props.append(f"{el['labels']} {{{props_str}}}")
+
+        # Format relationship properties
+        formatted_rel_props = []
+        for el in rel_properties:
+            props_str = ", ".join(
+                [f"{prop['property']}: {prop['type']}" for prop in el["properties"]]
+            )
+            formatted_rel_props.append(f"{el['type']} {{{props_str}}}")
+
+        # Format relationships
+        formatted_rels = [
+            f"(:{el['start']})-[:{el['type']}]->(:{el['end']})" for el in relationships
+        ]
+
+        self.schema = "\n".join(
+            [
+                "Node properties are the following:",
+                ",".join(formatted_node_props),
+                "Relationship properties are the following:",
+                ",".join(formatted_rel_props),
+                "The relationships are the following:",
+                ",".join(formatted_rels),
+            ]
+        )
 
     def add_graph_documents(
         self, graph_documents: List[GraphDocument], include_source: bool = False
diff --git a/libs/langchain/tests/integration_tests/chains/test_graph_database.py b/libs/langchain/tests/integration_tests/chains/test_graph_database.py
index eb40972461c..f46273a43f9 100644
--- a/libs/langchain/tests/integration_tests/chains/test_graph_database.py
+++ b/libs/langchain/tests/integration_tests/chains/test_graph_database.py
@@ -146,11 +146,23 @@ def test_cypher_intermediate_steps() -> None:
     assert output["result"] == expected_output
 
     query = output["intermediate_steps"][0]["query"]
-    expected_query = (
-        "\n\nMATCH (a:Actor)-[:ACTED_IN]->"
-        "(m:Movie {title: 'Pulp Fiction'}) RETURN a.name"
-    )
-    assert query == expected_query
+    # LLM can return variations of the same query
+    expected_queries = [
+        (
+            "\n\nMATCH (a:Actor)-[:ACTED_IN]->"
+            "(m:Movie {title: 'Pulp Fiction'}) RETURN a.name"
+        ),
+        (
+            "\n\nMATCH (a:Actor)-[:ACTED_IN]->"
+            "(m:Movie {title: 'Pulp Fiction'}) RETURN a.name;"
+        ),
+        (
+            "\n\nMATCH (a:Actor)-[:ACTED_IN]->"
+            "(m:Movie) WHERE m.title = 'Pulp Fiction' RETURN a.name"
+        ),
+    ]
+
+    assert query in expected_queries
 
     context = output["intermediate_steps"][1]["context"]
     expected_context = [{"a.name": "Bruce Willis"}]
@@ -307,14 +319,12 @@ def test_exclude_types() -> None:
         OpenAI(temperature=0), graph=graph, exclude_types=["Person", "DIRECTED"]
     )
     expected_schema = (
-        "Node properties are the following: \n"
-        " {'Movie': [{'property': 'title', 'type': 'STRING'}], "
-        "'Actor': [{'property': 'name', 'type': 'STRING'}]}\n"
-        "Relationships properties are the following: \n"
-        " {}\nRelationships are: \n"
-        "['(:Actor)-[:ACTED_IN]->(:Movie)']"
+        "Node properties are the following:\n"
+        "Movie {title: STRING},Actor {name: STRING}\n"
+        "Relationship properties are the following:\n\n"
+        "The relationships are the following:\n"
+        "(:Actor)-[:ACTED_IN]->(:Movie)"
     )
-
     assert chain.graph_schema == expected_schema
 
 
@@ -347,12 +357,11 @@ def test_include_types() -> None:
         OpenAI(temperature=0), graph=graph, include_types=["Movie", "Actor", "ACTED_IN"]
     )
     expected_schema = (
-        "Node properties are the following: \n"
-        " {'Movie': [{'property': 'title', 'type': 'STRING'}], "
-        "'Actor': [{'property': 'name', 'type': 'STRING'}]}\n"
-        "Relationships properties are the following: \n"
-        " {}\nRelationships are: \n"
-        "['(:Actor)-[:ACTED_IN]->(:Movie)']"
+        "Node properties are the following:\n"
+        "Movie {title: STRING},Actor {name: STRING}\n"
+        "Relationship properties are the following:\n\n"
+        "The relationships are the following:\n"
+        "(:Actor)-[:ACTED_IN]->(:Movie)"
     )
 
     assert chain.graph_schema == expected_schema
@@ -387,11 +396,9 @@ def test_include_types2() -> None:
         OpenAI(temperature=0), graph=graph, include_types=["Movie", "ACTED_IN"]
     )
     expected_schema = (
-        "Node properties are the following: \n"
-        " {'Movie': [{'property': 'title', 'type': 'STRING'}]}\n"
-        "Relationships properties are the following: \n"
-        " {}\nRelationships are: \n"
-        "[]"
+        "Node properties are the following:\n"
+        "Movie {title: STRING}\n"
+        "Relationship properties are the following:\n\n"
+        "The relationships are the following:\n"
     )
-
     assert chain.graph_schema == expected_schema
diff --git a/libs/langchain/tests/unit_tests/chains/test_graph_qa.py b/libs/langchain/tests/unit_tests/chains/test_graph_qa.py
index 8c034ffed0f..d7655409a48 100644
--- a/libs/langchain/tests/unit_tests/chains/test_graph_qa.py
+++ b/libs/langchain/tests/unit_tests/chains/test_graph_qa.py
@@ -152,16 +152,18 @@ def test_graph_cypher_qa_chain() -> None:
     readonlymemory = ReadOnlySharedMemory(memory=memory)
     prompt1 = (
         "You are a nice chatbot having a conversation with a human.\n\n    "
-        "Schema:\n    Node properties are the following: \n {}\nRelationships "
-        "properties are the following: \n {}\nRelationships are: \n[]\n\n    "
+        "Schema:\n    Node properties are the following:\n\nRelationship "
+        "properties are the following:\n\nThe relationships are the "
+        "following:\n\n\n    "
         "Previous conversation:\n    \n\n    New human question: "
         "Test question\n    Response:"
     )
 
     prompt2 = (
         "You are a nice chatbot having a conversation with a human.\n\n    "
-        "Schema:\n    Node properties are the following: \n {}\nRelationships "
-        "properties are the following: \n {}\nRelationships are: \n[]\n\n    "
+        "Schema:\n    Node properties are the following:\n\nRelationship "
+        "properties are the following:\n\nThe relationships are the "
+        "following:\n\n\n    "
         "Previous conversation:\n    Human: Test question\nAI: foo\n\n    "
         "New human question: Test new question\n    Response:"
     )
@@ -213,12 +215,11 @@ def test_exclude_types() -> None:
     exclude_types = ["Person", "DIRECTED"]
     output = construct_schema(structured_schema, [], exclude_types)
     expected_schema = (
-        "Node properties are the following: \n"
-        " {'Movie': [{'property': 'title', 'type': 'STRING'}], "
-        "'Actor': [{'property': 'name', 'type': 'STRING'}]}\n"
-        "Relationships properties are the following: \n"
-        " {}\nRelationships are: \n"
-        "['(:Actor)-[:ACTED_IN]->(:Movie)']"
+        "Node properties are the following:\n"
+        "Movie {title: STRING},Actor {name: STRING}\n"
+        "Relationship properties are the following:\n\n"
+        "The relationships are the following:\n"
+        "(:Actor)-[:ACTED_IN]->(:Movie)"
     )
     assert output == expected_schema
 
@@ -239,12 +240,11 @@ def test_include_types() -> None:
     include_types = ["Movie", "Actor", "ACTED_IN"]
     output = construct_schema(structured_schema, include_types, [])
     expected_schema = (
-        "Node properties are the following: \n"
-        " {'Movie': [{'property': 'title', 'type': 'STRING'}], "
-        "'Actor': [{'property': 'name', 'type': 'STRING'}]}\n"
-        "Relationships properties are the following: \n"
-        " {}\nRelationships are: \n"
-        "['(:Actor)-[:ACTED_IN]->(:Movie)']"
+        "Node properties are the following:\n"
+        "Movie {title: STRING},Actor {name: STRING}\n"
+        "Relationship properties are the following:\n\n"
+        "The relationships are the following:\n"
+        "(:Actor)-[:ACTED_IN]->(:Movie)"
     )
     assert output == expected_schema
 
@@ -265,12 +265,10 @@ def test_include_types2() -> None:
     include_types = ["Movie", "Actor"]
     output = construct_schema(structured_schema, include_types, [])
     expected_schema = (
-        "Node properties are the following: \n"
-        " {'Movie': [{'property': 'title', 'type': 'STRING'}], "
-        "'Actor': [{'property': 'name', 'type': 'STRING'}]}\n"
-        "Relationships properties are the following: \n"
-        " {}\nRelationships are: \n"
-        "[]"
+        "Node properties are the following:\n"
+        "Movie {title: STRING},Actor {name: STRING}\n"
+        "Relationship properties are the following:\n\n"
+        "The relationships are the following:\n"
     )
     assert output == expected_schema
 
@@ -291,12 +289,11 @@ def test_include_types3() -> None:
     include_types = ["Movie", "Actor", "ACTED_IN"]
     output = construct_schema(structured_schema, include_types, [])
     expected_schema = (
-        "Node properties are the following: \n"
-        " {'Movie': [{'property': 'title', 'type': 'STRING'}], "
-        "'Actor': [{'property': 'name', 'type': 'STRING'}]}\n"
-        "Relationships properties are the following: \n"
-        " {}\nRelationships are: \n"
-        "['(:Actor)-[:ACTED_IN]->(:Movie)']"
+        "Node properties are the following:\n"
+        "Movie {title: STRING},Actor {name: STRING}\n"
+        "Relationship properties are the following:\n\n"
+        "The relationships are the following:\n"
+        "(:Actor)-[:ACTED_IN]->(:Movie)"
     )
     assert output == expected_schema