From 3eb391561b016214205bb3ea504c5fe7d3079a4e Mon Sep 17 00:00:00 2001 From: Tomaz Bratanic Date: Wed, 29 Nov 2023 20:13:12 +0100 Subject: [PATCH] langchain[minor]: Reduce the number of tokens required to describe a Cypher/Neo4j schema (#13851) Instead of using JSON-like syntax to describe node and relationship properties we changed to a shorter and more concise schema description Old: ``` Node properties are the following: [{'properties': [{'property': 'name', 'type': 'STRING'}], 'labels': 'Movie'}, {'properties': [{'property': 'name', 'type': 'STRING'}], 'labels': 'Actor'}] Relationship properties are the following: [] The relationships are the following: ['(:Actor)-[:ACTED_IN]->(:Movie)'] ``` New: ``` Node properties are the following: Movie {name: STRING},Actor {name: STRING} Relationship properties are the following: The relationships are the following: (:Actor)-[:ACTED_IN]->(:Movie) ``` --- .../use_cases/graph/graph_cypher_qa.ipynb | 37 +++++-------- .../langchain/chains/graph_qa/cypher.py | 41 ++++++++++---- .../langchain/langchain/graphs/neo4j_graph.py | 40 +++++++++++--- .../chains/test_graph_database.py | 55 +++++++++++-------- .../tests/unit_tests/chains/test_graph_qa.py | 53 +++++++++--------- 5 files changed, 132 insertions(+), 94 deletions(-) diff --git a/docs/docs/use_cases/graph/graph_cypher_qa.ipynb b/docs/docs/use_cases/graph/graph_cypher_qa.ipynb index daa06069a2a..56e30288442 100644 --- a/docs/docs/use_cases/graph/graph_cypher_qa.ipynb +++ b/docs/docs/use_cases/graph/graph_cypher_qa.ipynb @@ -48,16 +48,7 @@ "execution_count": 2, "id": "0928915d", "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/home/tomaz/neo4j/langchain/libs/langchain/langchain/graphs/neo4j_graph.py:52: ExperimentalWarning: The configuration may change in the future.\n", - " self._driver.verify_connectivity()\n" - ] - } - ], + "outputs": [], "source": [ "graph = Neo4jGraph(\n", " url=\"bolt://localhost:7687\", username=\"neo4j\", password=\"pleaseletmein\"\n", @@ -132,14 +123,12 @@ "name": "stdout", "output_type": "stream", "text": [ + "Node properties are the following:\n", + "Movie {name: STRING},Actor {name: STRING}\n", + "Relationship properties are the following:\n", "\n", - " Node properties are the following:\n", - " [{'properties': [{'property': 'name', 'type': 'STRING'}], 'labels': 'Movie'}, {'properties': [{'property': 'name', 'type': 'STRING'}], 'labels': 'Actor'}]\n", - " Relationship properties are the following:\n", - " []\n", - " The relationships are the following:\n", - " ['(:Actor)-[:ACTED_IN]->(:Movie)']\n", - " \n" + "The relationships are the following:\n", + "(:Actor)-[:ACTED_IN]->(:Movie)\n" ] } ], @@ -556,12 +545,12 @@ "name": "stdout", "output_type": "stream", "text": [ - "Node properties are the following: \n", - " {'Actor': [{'property': 'name', 'type': 'STRING'}]}\n", - "Relationships properties are the following: \n", - " {}\n", - "Relationships are: \n", - "[]\n" + "Node properties are the following:\n", + "Actor {name: STRING}\n", + "Relationship properties are the following:\n", + "\n", + "The relationships are the following:\n", + "\n" ] } ], @@ -656,7 +645,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.8" + "version": "3.10.13" } }, "nbformat": 4, diff --git a/libs/langchain/langchain/chains/graph_qa/cypher.py b/libs/langchain/langchain/chains/graph_qa/cypher.py index 91ce4e0454b..1e91d22b07b 100644 --- a/libs/langchain/langchain/chains/graph_qa/cypher.py +++ b/libs/langchain/langchain/chains/graph_qa/cypher.py @@ -46,7 +46,7 @@ def construct_schema( def filter_func(x: str) -> bool: return x in include_types if include_types else x not in exclude_types - filtered_schema = { + filtered_schema: Dict[str, Any] = { "node_props": { k: v for k, v in structured_schema.get("node_props", {}).items() @@ -64,16 +64,37 @@ def construct_schema( ], } - return ( - f"Node properties are the following: \n {filtered_schema['node_props']}\n" - f"Relationships properties are the following: \n {filtered_schema['rel_props']}" - "\nRelationships are: \n" - + str( - [ - f"(:{el['start']})-[:{el['type']}]->(:{el['end']})" - for el in filtered_schema["relationships"] - ] + # Format node properties + formatted_node_props = [] + for label, properties in filtered_schema["node_props"].items(): + props_str = ", ".join( + [f"{prop['property']}: {prop['type']}" for prop in properties] ) + formatted_node_props.append(f"{label} {{{props_str}}}") + + # Format relationship properties + formatted_rel_props = [] + for rel_type, properties in filtered_schema["rel_props"].items(): + props_str = ", ".join( + [f"{prop['property']}: {prop['type']}" for prop in properties] + ) + formatted_rel_props.append(f"{rel_type} {{{props_str}}}") + + # Format relationships + formatted_rels = [ + f"(:{el['start']})-[:{el['type']}]->(:{el['end']})" + for el in filtered_schema["relationships"] + ] + + return "\n".join( + [ + "Node properties are the following:", + ",".join(formatted_node_props), + "Relationship properties are the following:", + ",".join(formatted_rel_props), + "The relationships are the following:", + ",".join(formatted_rels), + ] ) diff --git a/libs/langchain/langchain/graphs/neo4j_graph.py b/libs/langchain/langchain/graphs/neo4j_graph.py index dfbf38bb983..ca78b7a323a 100644 --- a/libs/langchain/langchain/graphs/neo4j_graph.py +++ b/libs/langchain/langchain/graphs/neo4j_graph.py @@ -127,14 +127,38 @@ class Neo4jGraph(GraphStore): "rel_props": {el["type"]: el["properties"] for el in rel_properties}, "relationships": relationships, } - self.schema = f""" - Node properties are the following: - {node_properties} - Relationship properties are the following: - {rel_properties} - The relationships are the following: - {[f"(:{el['start']})-[:{el['type']}]->(:{el['end']})" for el in relationships]} - """ + + # Format node properties + formatted_node_props = [] + for el in node_properties: + props_str = ", ".join( + [f"{prop['property']}: {prop['type']}" for prop in el["properties"]] + ) + formatted_node_props.append(f"{el['labels']} {{{props_str}}}") + + # Format relationship properties + formatted_rel_props = [] + for el in rel_properties: + props_str = ", ".join( + [f"{prop['property']}: {prop['type']}" for prop in el["properties"]] + ) + formatted_rel_props.append(f"{el['type']} {{{props_str}}}") + + # Format relationships + formatted_rels = [ + f"(:{el['start']})-[:{el['type']}]->(:{el['end']})" for el in relationships + ] + + self.schema = "\n".join( + [ + "Node properties are the following:", + ",".join(formatted_node_props), + "Relationship properties are the following:", + ",".join(formatted_rel_props), + "The relationships are the following:", + ",".join(formatted_rels), + ] + ) def add_graph_documents( self, graph_documents: List[GraphDocument], include_source: bool = False diff --git a/libs/langchain/tests/integration_tests/chains/test_graph_database.py b/libs/langchain/tests/integration_tests/chains/test_graph_database.py index eb40972461c..f46273a43f9 100644 --- a/libs/langchain/tests/integration_tests/chains/test_graph_database.py +++ b/libs/langchain/tests/integration_tests/chains/test_graph_database.py @@ -146,11 +146,23 @@ def test_cypher_intermediate_steps() -> None: assert output["result"] == expected_output query = output["intermediate_steps"][0]["query"] - expected_query = ( - "\n\nMATCH (a:Actor)-[:ACTED_IN]->" - "(m:Movie {title: 'Pulp Fiction'}) RETURN a.name" - ) - assert query == expected_query + # LLM can return variations of the same query + expected_queries = [ + ( + "\n\nMATCH (a:Actor)-[:ACTED_IN]->" + "(m:Movie {title: 'Pulp Fiction'}) RETURN a.name" + ), + ( + "\n\nMATCH (a:Actor)-[:ACTED_IN]->" + "(m:Movie {title: 'Pulp Fiction'}) RETURN a.name;" + ), + ( + "\n\nMATCH (a:Actor)-[:ACTED_IN]->" + "(m:Movie) WHERE m.title = 'Pulp Fiction' RETURN a.name" + ), + ] + + assert query in expected_queries context = output["intermediate_steps"][1]["context"] expected_context = [{"a.name": "Bruce Willis"}] @@ -307,14 +319,12 @@ def test_exclude_types() -> None: OpenAI(temperature=0), graph=graph, exclude_types=["Person", "DIRECTED"] ) expected_schema = ( - "Node properties are the following: \n" - " {'Movie': [{'property': 'title', 'type': 'STRING'}], " - "'Actor': [{'property': 'name', 'type': 'STRING'}]}\n" - "Relationships properties are the following: \n" - " {}\nRelationships are: \n" - "['(:Actor)-[:ACTED_IN]->(:Movie)']" + "Node properties are the following:\n" + "Movie {title: STRING},Actor {name: STRING}\n" + "Relationship properties are the following:\n\n" + "The relationships are the following:\n" + "(:Actor)-[:ACTED_IN]->(:Movie)" ) - assert chain.graph_schema == expected_schema @@ -347,12 +357,11 @@ def test_include_types() -> None: OpenAI(temperature=0), graph=graph, include_types=["Movie", "Actor", "ACTED_IN"] ) expected_schema = ( - "Node properties are the following: \n" - " {'Movie': [{'property': 'title', 'type': 'STRING'}], " - "'Actor': [{'property': 'name', 'type': 'STRING'}]}\n" - "Relationships properties are the following: \n" - " {}\nRelationships are: \n" - "['(:Actor)-[:ACTED_IN]->(:Movie)']" + "Node properties are the following:\n" + "Movie {title: STRING},Actor {name: STRING}\n" + "Relationship properties are the following:\n\n" + "The relationships are the following:\n" + "(:Actor)-[:ACTED_IN]->(:Movie)" ) assert chain.graph_schema == expected_schema @@ -387,11 +396,9 @@ def test_include_types2() -> None: OpenAI(temperature=0), graph=graph, include_types=["Movie", "ACTED_IN"] ) expected_schema = ( - "Node properties are the following: \n" - " {'Movie': [{'property': 'title', 'type': 'STRING'}]}\n" - "Relationships properties are the following: \n" - " {}\nRelationships are: \n" - "[]" + "Node properties are the following:\n" + "Movie {title: STRING}\n" + "Relationship properties are the following:\n\n" + "The relationships are the following:\n" ) - assert chain.graph_schema == expected_schema diff --git a/libs/langchain/tests/unit_tests/chains/test_graph_qa.py b/libs/langchain/tests/unit_tests/chains/test_graph_qa.py index 8c034ffed0f..d7655409a48 100644 --- a/libs/langchain/tests/unit_tests/chains/test_graph_qa.py +++ b/libs/langchain/tests/unit_tests/chains/test_graph_qa.py @@ -152,16 +152,18 @@ def test_graph_cypher_qa_chain() -> None: readonlymemory = ReadOnlySharedMemory(memory=memory) prompt1 = ( "You are a nice chatbot having a conversation with a human.\n\n " - "Schema:\n Node properties are the following: \n {}\nRelationships " - "properties are the following: \n {}\nRelationships are: \n[]\n\n " + "Schema:\n Node properties are the following:\n\nRelationship " + "properties are the following:\n\nThe relationships are the " + "following:\n\n\n " "Previous conversation:\n \n\n New human question: " "Test question\n Response:" ) prompt2 = ( "You are a nice chatbot having a conversation with a human.\n\n " - "Schema:\n Node properties are the following: \n {}\nRelationships " - "properties are the following: \n {}\nRelationships are: \n[]\n\n " + "Schema:\n Node properties are the following:\n\nRelationship " + "properties are the following:\n\nThe relationships are the " + "following:\n\n\n " "Previous conversation:\n Human: Test question\nAI: foo\n\n " "New human question: Test new question\n Response:" ) @@ -213,12 +215,11 @@ def test_exclude_types() -> None: exclude_types = ["Person", "DIRECTED"] output = construct_schema(structured_schema, [], exclude_types) expected_schema = ( - "Node properties are the following: \n" - " {'Movie': [{'property': 'title', 'type': 'STRING'}], " - "'Actor': [{'property': 'name', 'type': 'STRING'}]}\n" - "Relationships properties are the following: \n" - " {}\nRelationships are: \n" - "['(:Actor)-[:ACTED_IN]->(:Movie)']" + "Node properties are the following:\n" + "Movie {title: STRING},Actor {name: STRING}\n" + "Relationship properties are the following:\n\n" + "The relationships are the following:\n" + "(:Actor)-[:ACTED_IN]->(:Movie)" ) assert output == expected_schema @@ -239,12 +240,11 @@ def test_include_types() -> None: include_types = ["Movie", "Actor", "ACTED_IN"] output = construct_schema(structured_schema, include_types, []) expected_schema = ( - "Node properties are the following: \n" - " {'Movie': [{'property': 'title', 'type': 'STRING'}], " - "'Actor': [{'property': 'name', 'type': 'STRING'}]}\n" - "Relationships properties are the following: \n" - " {}\nRelationships are: \n" - "['(:Actor)-[:ACTED_IN]->(:Movie)']" + "Node properties are the following:\n" + "Movie {title: STRING},Actor {name: STRING}\n" + "Relationship properties are the following:\n\n" + "The relationships are the following:\n" + "(:Actor)-[:ACTED_IN]->(:Movie)" ) assert output == expected_schema @@ -265,12 +265,10 @@ def test_include_types2() -> None: include_types = ["Movie", "Actor"] output = construct_schema(structured_schema, include_types, []) expected_schema = ( - "Node properties are the following: \n" - " {'Movie': [{'property': 'title', 'type': 'STRING'}], " - "'Actor': [{'property': 'name', 'type': 'STRING'}]}\n" - "Relationships properties are the following: \n" - " {}\nRelationships are: \n" - "[]" + "Node properties are the following:\n" + "Movie {title: STRING},Actor {name: STRING}\n" + "Relationship properties are the following:\n\n" + "The relationships are the following:\n" ) assert output == expected_schema @@ -291,12 +289,11 @@ def test_include_types3() -> None: include_types = ["Movie", "Actor", "ACTED_IN"] output = construct_schema(structured_schema, include_types, []) expected_schema = ( - "Node properties are the following: \n" - " {'Movie': [{'property': 'title', 'type': 'STRING'}], " - "'Actor': [{'property': 'name', 'type': 'STRING'}]}\n" - "Relationships properties are the following: \n" - " {}\nRelationships are: \n" - "['(:Actor)-[:ACTED_IN]->(:Movie)']" + "Node properties are the following:\n" + "Movie {title: STRING},Actor {name: STRING}\n" + "Relationship properties are the following:\n\n" + "The relationships are the following:\n" + "(:Actor)-[:ACTED_IN]->(:Movie)" ) assert output == expected_schema