community[patch]: Handle exceptions where node props aren't consistent in neo4j schema (#22027)

This commit is contained in:
Tomaz Bratanic 2024-05-22 17:21:56 +02:00 committed by GitHub
parent b0ef5e778a
commit d8a1f1114d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 47 additions and 14 deletions

View File

@ -151,7 +151,7 @@ def _format_schema(schema: Dict, is_enhanced: bool) -> str:
formatted_node_props.append(f"- **{node_type}**") formatted_node_props.append(f"- **{node_type}**")
for prop in properties: for prop in properties:
example = "" example = ""
if prop["type"] == "STRING": if prop["type"] == "STRING" and prop.get("values"):
if prop.get("distinct_count", 11) > DISTINCT_VALUE_LIMIT: if prop.get("distinct_count", 11) > DISTINCT_VALUE_LIMIT:
example = ( example = (
f'Example: "{clean_string_values(prop["values"][0])}"' f'Example: "{clean_string_values(prop["values"][0])}"'
@ -400,7 +400,7 @@ class Neo4jGraph(GraphStore):
""" """
Refreshes the Neo4j graph schema information. Refreshes the Neo4j graph schema information.
""" """
from neo4j.exceptions import ClientError from neo4j.exceptions import ClientError, CypherTypeError
node_properties = [ node_properties = [
el["output"] el["output"]
@ -461,10 +461,14 @@ class Neo4jGraph(GraphStore):
enhanced_cypher = self._enhanced_schema_cypher( enhanced_cypher = self._enhanced_schema_cypher(
node["name"], node_props, node["count"] < EXHAUSTIVE_SEARCH_LIMIT node["name"], node_props, node["count"] < EXHAUSTIVE_SEARCH_LIMIT
) )
# Due to schema-flexible nature of neo4j errors can happen
try:
enhanced_info = self.query(enhanced_cypher)[0]["output"] enhanced_info = self.query(enhanced_cypher)[0]["output"]
for prop in node_props: for prop in node_props:
if prop["property"] in enhanced_info: if prop["property"] in enhanced_info:
prop.update(enhanced_info[prop["property"]]) prop.update(enhanced_info[prop["property"]])
except CypherTypeError:
continue
# Update rel info # Update rel info
for rel in schema_counts[0]["relationships"]: for rel in schema_counts[0]["relationships"]:
# Skip bloom labels # Skip bloom labels
@ -479,10 +483,14 @@ class Neo4jGraph(GraphStore):
rel["count"] < EXHAUSTIVE_SEARCH_LIMIT, rel["count"] < EXHAUSTIVE_SEARCH_LIMIT,
is_relationship=True, is_relationship=True,
) )
try:
enhanced_info = self.query(enhanced_cypher)[0]["output"] enhanced_info = self.query(enhanced_cypher)[0]["output"]
for prop in rel_props: for prop in rel_props:
if prop["property"] in enhanced_info: if prop["property"] in enhanced_info:
prop.update(enhanced_info[prop["property"]]) prop.update(enhanced_info[prop["property"]])
# Due to schema-flexible nature of neo4j errors can happen
except CypherTypeError:
continue
schema = _format_schema(self.structured_schema, self._enhanced_schema) schema = _format_schema(self.structured_schema, self._enhanced_schema)
@ -587,8 +595,8 @@ class Neo4jGraph(GraphStore):
if prop_type == "STRING": if prop_type == "STRING":
with_clauses.append( with_clauses.append(
( (
f"collect(distinct substring(n.`{prop_name}`, 0, 50)) " f"collect(distinct substring(toString(n.`{prop_name}`)"
f"AS `{prop_name}_values`" f", 0, 50)) AS `{prop_name}_values`"
) )
) )
return_clauses.append( return_clauses.append(
@ -664,8 +672,8 @@ class Neo4jGraph(GraphStore):
else: else:
with_clauses.append( with_clauses.append(
( (
f"collect(distinct substring(n.`{prop_name}`, 0, 50)) " f"collect(distinct substring(toString(n.`{prop_name}`)"
f"AS `{prop_name}_values`" f", 0, 50)) AS `{prop_name}_values`"
) )
) )
return_clauses.append(f"values: `{prop_name}_values`") return_clauses.append(f"values: `{prop_name}_values`")

View File

@ -333,3 +333,28 @@ def test_enhanced_schema() -> None:
# remove metadata portion of schema # remove metadata portion of schema
del graph.structured_schema["metadata"] del graph.structured_schema["metadata"]
assert graph.structured_schema == expected_output assert graph.structured_schema == expected_output
def test_enhanced_schema_exception() -> None:
"""Test no error with weird schema."""
url = os.environ.get("NEO4J_URI")
username = os.environ.get("NEO4J_USERNAME")
password = os.environ.get("NEO4J_PASSWORD")
assert url is not None
assert username is not None
assert password is not None
graph = Neo4jGraph(
url=url, username=username, password=password, enhanced_schema=True
)
graph.query("MATCH (n) DETACH DELETE n")
graph.query("CREATE (:Node {foo:'bar'})," "(:Node {foo: 1}), (:Node {foo: [1,2]})")
graph.refresh_schema()
expected_output = {
"node_props": {"Node": [{"property": "foo", "type": "STRING"}]},
"rel_props": {},
"relationships": [],
}
# remove metadata portion of schema
del graph.structured_schema["metadata"]
assert graph.structured_schema == expected_output