From d8a1f1114d839b2a902ff9bd4ae668751257414a Mon Sep 17 00:00:00 2001 From: Tomaz Bratanic Date: Wed, 22 May 2024 17:21:56 +0200 Subject: [PATCH] community[patch]: Handle exceptions where node props aren't consistent in neo4j schema (#22027) --- .../langchain_community/graphs/neo4j_graph.py | 36 +++++++++++-------- .../integration_tests/graphs/test_neo4j.py | 25 +++++++++++++ 2 files changed, 47 insertions(+), 14 deletions(-) diff --git a/libs/community/langchain_community/graphs/neo4j_graph.py b/libs/community/langchain_community/graphs/neo4j_graph.py index 697b2221cb2..a8b7bf6b0bd 100644 --- a/libs/community/langchain_community/graphs/neo4j_graph.py +++ b/libs/community/langchain_community/graphs/neo4j_graph.py @@ -151,7 +151,7 @@ def _format_schema(schema: Dict, is_enhanced: bool) -> str: formatted_node_props.append(f"- **{node_type}**") for prop in properties: example = "" - if prop["type"] == "STRING": + if prop["type"] == "STRING" and prop.get("values"): if prop.get("distinct_count", 11) > DISTINCT_VALUE_LIMIT: example = ( f'Example: "{clean_string_values(prop["values"][0])}"' @@ -400,7 +400,7 @@ class Neo4jGraph(GraphStore): """ Refreshes the Neo4j graph schema information. """ - from neo4j.exceptions import ClientError + from neo4j.exceptions import ClientError, CypherTypeError node_properties = [ el["output"] @@ -461,10 +461,14 @@ class Neo4jGraph(GraphStore): enhanced_cypher = self._enhanced_schema_cypher( node["name"], node_props, node["count"] < EXHAUSTIVE_SEARCH_LIMIT ) - enhanced_info = self.query(enhanced_cypher)[0]["output"] - for prop in node_props: - if prop["property"] in enhanced_info: - prop.update(enhanced_info[prop["property"]]) + # Due to schema-flexible nature of neo4j errors can happen + try: + enhanced_info = self.query(enhanced_cypher)[0]["output"] + for prop in node_props: + if prop["property"] in enhanced_info: + prop.update(enhanced_info[prop["property"]]) + except CypherTypeError: + continue # Update rel info for rel in schema_counts[0]["relationships"]: # Skip bloom labels @@ -479,10 +483,14 @@ class Neo4jGraph(GraphStore): rel["count"] < EXHAUSTIVE_SEARCH_LIMIT, is_relationship=True, ) - enhanced_info = self.query(enhanced_cypher)[0]["output"] - for prop in rel_props: - if prop["property"] in enhanced_info: - prop.update(enhanced_info[prop["property"]]) + try: + enhanced_info = self.query(enhanced_cypher)[0]["output"] + for prop in rel_props: + if prop["property"] in enhanced_info: + prop.update(enhanced_info[prop["property"]]) + # Due to schema-flexible nature of neo4j errors can happen + except CypherTypeError: + continue schema = _format_schema(self.structured_schema, self._enhanced_schema) @@ -587,8 +595,8 @@ class Neo4jGraph(GraphStore): if prop_type == "STRING": with_clauses.append( ( - f"collect(distinct substring(n.`{prop_name}`, 0, 50)) " - f"AS `{prop_name}_values`" + f"collect(distinct substring(toString(n.`{prop_name}`)" + f", 0, 50)) AS `{prop_name}_values`" ) ) return_clauses.append( @@ -664,8 +672,8 @@ class Neo4jGraph(GraphStore): else: with_clauses.append( ( - f"collect(distinct substring(n.`{prop_name}`, 0, 50)) " - f"AS `{prop_name}_values`" + f"collect(distinct substring(toString(n.`{prop_name}`)" + f", 0, 50)) AS `{prop_name}_values`" ) ) return_clauses.append(f"values: `{prop_name}_values`") diff --git a/libs/community/tests/integration_tests/graphs/test_neo4j.py b/libs/community/tests/integration_tests/graphs/test_neo4j.py index 8fe3349ee3b..2765938905e 100644 --- a/libs/community/tests/integration_tests/graphs/test_neo4j.py +++ b/libs/community/tests/integration_tests/graphs/test_neo4j.py @@ -333,3 +333,28 @@ def test_enhanced_schema() -> None: # remove metadata portion of schema del graph.structured_schema["metadata"] assert graph.structured_schema == expected_output + + +def test_enhanced_schema_exception() -> None: + """Test no error with weird schema.""" + url = os.environ.get("NEO4J_URI") + username = os.environ.get("NEO4J_USERNAME") + password = os.environ.get("NEO4J_PASSWORD") + assert url is not None + assert username is not None + assert password is not None + + graph = Neo4jGraph( + url=url, username=username, password=password, enhanced_schema=True + ) + graph.query("MATCH (n) DETACH DELETE n") + graph.query("CREATE (:Node {foo:'bar'})," "(:Node {foo: 1}), (:Node {foo: [1,2]})") + graph.refresh_schema() + expected_output = { + "node_props": {"Node": [{"property": "foo", "type": "STRING"}]}, + "rel_props": {}, + "relationships": [], + } + # remove metadata portion of schema + del graph.structured_schema["metadata"] + assert graph.structured_schema == expected_output