community[patch]: Fix neo4j enhanced schema bugs (#21072)

This commit is contained in:
Tomaz Bratanic 2024-05-01 02:16:26 +02:00 committed by GitHub
parent 8d2909ee25
commit c9e96bb5e2
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -13,7 +13,6 @@ EXHAUSTIVE_SEARCH_LIMIT = 10000
LIST_LIMIT = 128
# Threshold for returning all available prop values in graph schema
DISTINCT_VALUE_LIMIT = 10
NL = "\n"
node_properties_query = """
CALL apoc.meta.data()
@ -52,6 +51,10 @@ include_docs_query = (
)
def clean_string_values(text: str) -> str:
return text.replace("\n", " ").replace("\r", " ")
def value_sanitize(d: Any) -> Any:
"""Sanitize the input dictionary or list.
@ -170,7 +173,7 @@ def _enhanced_schema_cypher(
f" distinct_count: size(`{prop_name}_values`)"
)
)
elif prop_type in ["INTEGER", "FLOAT", "DATE"]:
elif prop_type in ["INTEGER", "FLOAT", "DATE", "DATE_TIME"]:
with_clauses.append(f"min(n.`{prop_name}`) AS `{prop_name}_min`")
with_clauses.append(f"max(n.`{prop_name}`) AS `{prop_name}_max`")
with_clauses.append(
@ -194,7 +197,8 @@ def _enhanced_schema_cypher(
f"min_size: `{prop_name}_size_min`, "
f"max_size: `{prop_name}_size_max`"
)
elif prop_type in ["BOOLEAN", "POINT", "DURATION"]:
continue
output_dict[prop_name] = "{" + return_clauses.pop() + "}"
else:
# Just sample 5 random nodes
@ -210,7 +214,7 @@ def _enhanced_schema_cypher(
)
)
return_clauses.append(f"values: `{prop_name}_values`")
elif prop_type in ["INTEGER", "FLOAT", "DATE"]:
elif prop_type in ["INTEGER", "FLOAT", "DATE", "DATE_TIME"]:
with_clauses.append(
f"collect(distinct toString(n.`{prop_name}`)) "
f"AS `{prop_name}_values`"
@ -226,6 +230,8 @@ def _enhanced_schema_cypher(
return_clauses.append(
f"min_size: `{prop_name}_size_min`,max_size: `{prop_name}_size_max`"
)
elif prop_type in ["BOOLEAN", "POINT", "DURATION"]:
continue
output_dict[prop_name] = "{" + return_clauses.pop() + "}"
@ -253,7 +259,7 @@ def _format_schema(schema: Dict, is_enhanced: bool) -> str:
if prop["type"] == "STRING":
if prop.get("distinct_count", 11) > DISTINCT_VALUE_LIMIT:
example = (
f'Example: "{prop["values"][0].replace(NL, " ")}"'
f'Example: "{clean_string_values(prop["values"][0])}"'
if prop["values"]
else ""
)
@ -261,13 +267,13 @@ def _format_schema(schema: Dict, is_enhanced: bool) -> str:
example = (
(
"Available options: "
f'{[el.replace(NL, " ") for el in prop["values"]]}'
f'{[clean_string_values(el) for el in prop["values"]]}'
)
if prop["values"]
else ""
)
elif prop["type"] in ["INTEGER", "FLOAT", "DATE"]:
elif prop["type"] in ["INTEGER", "FLOAT", "DATE", "DATE_TIME"]:
if prop.get("min") is not None:
example = f'Min: {prop["min"]}, Max: {prop["max"]}'
else:
@ -282,7 +288,7 @@ def _format_schema(schema: Dict, is_enhanced: bool) -> str:
f'Min Size: {prop["min_size"]}, Max Size: {prop["max_size"]}'
)
formatted_node_props.append(
f" - `{prop['property']}: {prop['type']}` {example}"
f" - `{prop['property']}`: {prop['type']}` {example}"
)
# Enhanced formatting for relationships
@ -293,7 +299,7 @@ def _format_schema(schema: Dict, is_enhanced: bool) -> str:
if prop["type"] == "STRING":
if prop.get("distinct_count", 11) > DISTINCT_VALUE_LIMIT:
example = (
f'Example: "{prop["values"][0].replace(NL, " ")}"'
f'Example: "{clean_string_values(prop["values"][0])}"'
if prop["values"]
else ""
)
@ -301,12 +307,12 @@ def _format_schema(schema: Dict, is_enhanced: bool) -> str:
example = (
(
"Available options: "
f'{[el.replace(NL, " ") for el in prop["values"]]}'
f'{[clean_string_values(el) for el in prop["values"]]}'
)
if prop["values"]
else ""
)
elif prop["type"] in ["INTEGER", "FLOAT", "DATE"]:
elif prop["type"] in ["INTEGER", "FLOAT", "DATE", "DATE_TIME"]:
if prop.get("min"): # If we have min/max
example = f'Min: {prop["min"]}, Max: {prop["max"]}'
else: # return a single value