Some more fixes to neo4j enhanced schema (#21139)

This commit is contained in:
Tomaz Bratanic 2024-05-01 22:12:43 +02:00 committed by GitHub
parent 0694538c39
commit 9e53fa7d2e
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -173,7 +173,13 @@ def _enhanced_schema_cypher(
f" distinct_count: size(`{prop_name}_values`)" f" distinct_count: size(`{prop_name}_values`)"
) )
) )
elif prop_type in ["INTEGER", "FLOAT", "DATE", "DATE_TIME"]: elif prop_type in [
"INTEGER",
"FLOAT",
"DATE",
"DATE_TIME",
"LOCAL_DATE_TIME",
]:
with_clauses.append(f"min(n.`{prop_name}`) AS `{prop_name}_min`") with_clauses.append(f"min(n.`{prop_name}`) AS `{prop_name}_min`")
with_clauses.append(f"max(n.`{prop_name}`) AS `{prop_name}_max`") with_clauses.append(f"max(n.`{prop_name}`) AS `{prop_name}_max`")
with_clauses.append( with_clauses.append(
@ -214,7 +220,13 @@ def _enhanced_schema_cypher(
) )
) )
return_clauses.append(f"values: `{prop_name}_values`") return_clauses.append(f"values: `{prop_name}_values`")
elif prop_type in ["INTEGER", "FLOAT", "DATE", "DATE_TIME"]: elif prop_type in [
"INTEGER",
"FLOAT",
"DATE",
"DATE_TIME",
"LOCAL_DATE_TIME",
]:
with_clauses.append( with_clauses.append(
f"collect(distinct toString(n.`{prop_name}`)) " f"collect(distinct toString(n.`{prop_name}`)) "
f"AS `{prop_name}_values`" f"AS `{prop_name}_values`"
@ -238,7 +250,7 @@ def _enhanced_schema_cypher(
with_clause = "WITH " + ",\n ".join(with_clauses) with_clause = "WITH " + ",\n ".join(with_clauses)
return_clause = ( return_clause = (
"RETURN {" "RETURN {"
+ ", ".join(f"{k}: {v}" for k, v in output_dict.items()) + ", ".join(f"`{k}`: {v}" for k, v in output_dict.items())
+ "} AS output" + "} AS output"
) )
@ -273,7 +285,13 @@ def _format_schema(schema: Dict, is_enhanced: bool) -> str:
else "" else ""
) )
elif prop["type"] in ["INTEGER", "FLOAT", "DATE", "DATE_TIME"]: elif prop["type"] in [
"INTEGER",
"FLOAT",
"DATE",
"DATE_TIME",
"LOCAL_DATE_TIME",
]:
if prop.get("min") is not None: if prop.get("min") is not None:
example = f'Min: {prop["min"]}, Max: {prop["max"]}' example = f'Min: {prop["min"]}, Max: {prop["max"]}'
else: else:
@ -312,7 +330,13 @@ def _format_schema(schema: Dict, is_enhanced: bool) -> str:
if prop["values"] if prop["values"]
else "" else ""
) )
elif prop["type"] in ["INTEGER", "FLOAT", "DATE", "DATE_TIME"]: elif prop["type"] in [
"INTEGER",
"FLOAT",
"DATE",
"DATE_TIME",
"LOCAL_DATE_TIME",
]:
if prop.get("min"): # If we have min/max if prop.get("min"): # If we have min/max
example = f'Min: {prop["min"]}, Max: {prop["max"]}' example = f'Min: {prop["min"]}, Max: {prop["max"]}'
else: # return a single value else: # return a single value
@ -543,7 +567,9 @@ class Neo4jGraph(GraphStore):
# Skip bloom labels # Skip bloom labels
if node["name"] in EXCLUDED_LABELS: if node["name"] in EXCLUDED_LABELS:
continue continue
node_props = self.structured_schema["node_props"][node["name"]] node_props = self.structured_schema["node_props"].get(node["name"])
if not node_props: # The node has no properties
continue
enhanced_cypher = _enhanced_schema_cypher( enhanced_cypher = _enhanced_schema_cypher(
node["name"], node_props, node["count"] < EXHAUSTIVE_SEARCH_LIMIT node["name"], node_props, node["count"] < EXHAUSTIVE_SEARCH_LIMIT
) )
@ -557,7 +583,7 @@ class Neo4jGraph(GraphStore):
if rel["name"] in EXCLUDED_RELS: if rel["name"] in EXCLUDED_RELS:
continue continue
rel_props = self.structured_schema["rel_props"].get(rel["name"]) rel_props = self.structured_schema["rel_props"].get(rel["name"])
if not rel_props: if not rel_props: # The rel has no properties
continue continue
enhanced_cypher = _enhanced_schema_cypher( enhanced_cypher = _enhanced_schema_cypher(
rel["name"], rel["name"],