mirror of
https://github.com/hwchase17/langchain.git
synced 2025-06-04 14:13:29 +00:00
community[patch]: Fix neo4j enhanced schema bugs (#21072)
This commit is contained in:
parent
8d2909ee25
commit
c9e96bb5e2
@ -13,7 +13,6 @@ EXHAUSTIVE_SEARCH_LIMIT = 10000
|
|||||||
LIST_LIMIT = 128
|
LIST_LIMIT = 128
|
||||||
# Threshold for returning all available prop values in graph schema
|
# Threshold for returning all available prop values in graph schema
|
||||||
DISTINCT_VALUE_LIMIT = 10
|
DISTINCT_VALUE_LIMIT = 10
|
||||||
NL = "\n"
|
|
||||||
|
|
||||||
node_properties_query = """
|
node_properties_query = """
|
||||||
CALL apoc.meta.data()
|
CALL apoc.meta.data()
|
||||||
@ -52,6 +51,10 @@ include_docs_query = (
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def clean_string_values(text: str) -> str:
|
||||||
|
return text.replace("\n", " ").replace("\r", " ")
|
||||||
|
|
||||||
|
|
||||||
def value_sanitize(d: Any) -> Any:
|
def value_sanitize(d: Any) -> Any:
|
||||||
"""Sanitize the input dictionary or list.
|
"""Sanitize the input dictionary or list.
|
||||||
|
|
||||||
@ -170,7 +173,7 @@ def _enhanced_schema_cypher(
|
|||||||
f" distinct_count: size(`{prop_name}_values`)"
|
f" distinct_count: size(`{prop_name}_values`)"
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
elif prop_type in ["INTEGER", "FLOAT", "DATE"]:
|
elif prop_type in ["INTEGER", "FLOAT", "DATE", "DATE_TIME"]:
|
||||||
with_clauses.append(f"min(n.`{prop_name}`) AS `{prop_name}_min`")
|
with_clauses.append(f"min(n.`{prop_name}`) AS `{prop_name}_min`")
|
||||||
with_clauses.append(f"max(n.`{prop_name}`) AS `{prop_name}_max`")
|
with_clauses.append(f"max(n.`{prop_name}`) AS `{prop_name}_max`")
|
||||||
with_clauses.append(
|
with_clauses.append(
|
||||||
@ -194,7 +197,8 @@ def _enhanced_schema_cypher(
|
|||||||
f"min_size: `{prop_name}_size_min`, "
|
f"min_size: `{prop_name}_size_min`, "
|
||||||
f"max_size: `{prop_name}_size_max`"
|
f"max_size: `{prop_name}_size_max`"
|
||||||
)
|
)
|
||||||
|
elif prop_type in ["BOOLEAN", "POINT", "DURATION"]:
|
||||||
|
continue
|
||||||
output_dict[prop_name] = "{" + return_clauses.pop() + "}"
|
output_dict[prop_name] = "{" + return_clauses.pop() + "}"
|
||||||
else:
|
else:
|
||||||
# Just sample 5 random nodes
|
# Just sample 5 random nodes
|
||||||
@ -210,7 +214,7 @@ def _enhanced_schema_cypher(
|
|||||||
)
|
)
|
||||||
)
|
)
|
||||||
return_clauses.append(f"values: `{prop_name}_values`")
|
return_clauses.append(f"values: `{prop_name}_values`")
|
||||||
elif prop_type in ["INTEGER", "FLOAT", "DATE"]:
|
elif prop_type in ["INTEGER", "FLOAT", "DATE", "DATE_TIME"]:
|
||||||
with_clauses.append(
|
with_clauses.append(
|
||||||
f"collect(distinct toString(n.`{prop_name}`)) "
|
f"collect(distinct toString(n.`{prop_name}`)) "
|
||||||
f"AS `{prop_name}_values`"
|
f"AS `{prop_name}_values`"
|
||||||
@ -226,6 +230,8 @@ def _enhanced_schema_cypher(
|
|||||||
return_clauses.append(
|
return_clauses.append(
|
||||||
f"min_size: `{prop_name}_size_min`,max_size: `{prop_name}_size_max`"
|
f"min_size: `{prop_name}_size_min`,max_size: `{prop_name}_size_max`"
|
||||||
)
|
)
|
||||||
|
elif prop_type in ["BOOLEAN", "POINT", "DURATION"]:
|
||||||
|
continue
|
||||||
|
|
||||||
output_dict[prop_name] = "{" + return_clauses.pop() + "}"
|
output_dict[prop_name] = "{" + return_clauses.pop() + "}"
|
||||||
|
|
||||||
@ -253,7 +259,7 @@ def _format_schema(schema: Dict, is_enhanced: bool) -> str:
|
|||||||
if prop["type"] == "STRING":
|
if prop["type"] == "STRING":
|
||||||
if prop.get("distinct_count", 11) > DISTINCT_VALUE_LIMIT:
|
if prop.get("distinct_count", 11) > DISTINCT_VALUE_LIMIT:
|
||||||
example = (
|
example = (
|
||||||
f'Example: "{prop["values"][0].replace(NL, " ")}"'
|
f'Example: "{clean_string_values(prop["values"][0])}"'
|
||||||
if prop["values"]
|
if prop["values"]
|
||||||
else ""
|
else ""
|
||||||
)
|
)
|
||||||
@ -261,13 +267,13 @@ def _format_schema(schema: Dict, is_enhanced: bool) -> str:
|
|||||||
example = (
|
example = (
|
||||||
(
|
(
|
||||||
"Available options: "
|
"Available options: "
|
||||||
f'{[el.replace(NL, " ") for el in prop["values"]]}'
|
f'{[clean_string_values(el) for el in prop["values"]]}'
|
||||||
)
|
)
|
||||||
if prop["values"]
|
if prop["values"]
|
||||||
else ""
|
else ""
|
||||||
)
|
)
|
||||||
|
|
||||||
elif prop["type"] in ["INTEGER", "FLOAT", "DATE"]:
|
elif prop["type"] in ["INTEGER", "FLOAT", "DATE", "DATE_TIME"]:
|
||||||
if prop.get("min") is not None:
|
if prop.get("min") is not None:
|
||||||
example = f'Min: {prop["min"]}, Max: {prop["max"]}'
|
example = f'Min: {prop["min"]}, Max: {prop["max"]}'
|
||||||
else:
|
else:
|
||||||
@ -282,7 +288,7 @@ def _format_schema(schema: Dict, is_enhanced: bool) -> str:
|
|||||||
f'Min Size: {prop["min_size"]}, Max Size: {prop["max_size"]}'
|
f'Min Size: {prop["min_size"]}, Max Size: {prop["max_size"]}'
|
||||||
)
|
)
|
||||||
formatted_node_props.append(
|
formatted_node_props.append(
|
||||||
f" - `{prop['property']}: {prop['type']}` {example}"
|
f" - `{prop['property']}`: {prop['type']}` {example}"
|
||||||
)
|
)
|
||||||
|
|
||||||
# Enhanced formatting for relationships
|
# Enhanced formatting for relationships
|
||||||
@ -293,7 +299,7 @@ def _format_schema(schema: Dict, is_enhanced: bool) -> str:
|
|||||||
if prop["type"] == "STRING":
|
if prop["type"] == "STRING":
|
||||||
if prop.get("distinct_count", 11) > DISTINCT_VALUE_LIMIT:
|
if prop.get("distinct_count", 11) > DISTINCT_VALUE_LIMIT:
|
||||||
example = (
|
example = (
|
||||||
f'Example: "{prop["values"][0].replace(NL, " ")}"'
|
f'Example: "{clean_string_values(prop["values"][0])}"'
|
||||||
if prop["values"]
|
if prop["values"]
|
||||||
else ""
|
else ""
|
||||||
)
|
)
|
||||||
@ -301,12 +307,12 @@ def _format_schema(schema: Dict, is_enhanced: bool) -> str:
|
|||||||
example = (
|
example = (
|
||||||
(
|
(
|
||||||
"Available options: "
|
"Available options: "
|
||||||
f'{[el.replace(NL, " ") for el in prop["values"]]}'
|
f'{[clean_string_values(el) for el in prop["values"]]}'
|
||||||
)
|
)
|
||||||
if prop["values"]
|
if prop["values"]
|
||||||
else ""
|
else ""
|
||||||
)
|
)
|
||||||
elif prop["type"] in ["INTEGER", "FLOAT", "DATE"]:
|
elif prop["type"] in ["INTEGER", "FLOAT", "DATE", "DATE_TIME"]:
|
||||||
if prop.get("min"): # If we have min/max
|
if prop.get("min"): # If we have min/max
|
||||||
example = f'Min: {prop["min"]}, Max: {prop["max"]}'
|
example = f'Min: {prop["min"]}, Max: {prop["max"]}'
|
||||||
else: # return a single value
|
else: # return a single value
|
||||||
|
Loading…
Reference in New Issue
Block a user