diff --git a/libs/community/langchain_community/graphs/neo4j_graph.py b/libs/community/langchain_community/graphs/neo4j_graph.py index c1740f16e46..99ee2e38b4d 100644 --- a/libs/community/langchain_community/graphs/neo4j_graph.py +++ b/libs/community/langchain_community/graphs/neo4j_graph.py @@ -13,7 +13,6 @@ EXHAUSTIVE_SEARCH_LIMIT = 10000 LIST_LIMIT = 128 # Threshold for returning all available prop values in graph schema DISTINCT_VALUE_LIMIT = 10 -NL = "\n" node_properties_query = """ CALL apoc.meta.data() @@ -52,6 +51,10 @@ include_docs_query = ( ) +def clean_string_values(text: str) -> str: + return text.replace("\n", " ").replace("\r", " ") + + def value_sanitize(d: Any) -> Any: """Sanitize the input dictionary or list. @@ -170,7 +173,7 @@ def _enhanced_schema_cypher( f" distinct_count: size(`{prop_name}_values`)" ) ) - elif prop_type in ["INTEGER", "FLOAT", "DATE"]: + elif prop_type in ["INTEGER", "FLOAT", "DATE", "DATE_TIME"]: with_clauses.append(f"min(n.`{prop_name}`) AS `{prop_name}_min`") with_clauses.append(f"max(n.`{prop_name}`) AS `{prop_name}_max`") with_clauses.append( @@ -194,7 +197,8 @@ def _enhanced_schema_cypher( f"min_size: `{prop_name}_size_min`, " f"max_size: `{prop_name}_size_max`" ) - + elif prop_type in ["BOOLEAN", "POINT", "DURATION"]: + continue output_dict[prop_name] = "{" + return_clauses.pop() + "}" else: # Just sample 5 random nodes @@ -210,7 +214,7 @@ def _enhanced_schema_cypher( ) ) return_clauses.append(f"values: `{prop_name}_values`") - elif prop_type in ["INTEGER", "FLOAT", "DATE"]: + elif prop_type in ["INTEGER", "FLOAT", "DATE", "DATE_TIME"]: with_clauses.append( f"collect(distinct toString(n.`{prop_name}`)) " f"AS `{prop_name}_values`" @@ -226,6 +230,8 @@ def _enhanced_schema_cypher( return_clauses.append( f"min_size: `{prop_name}_size_min`,max_size: `{prop_name}_size_max`" ) + elif prop_type in ["BOOLEAN", "POINT", "DURATION"]: + continue output_dict[prop_name] = "{" + return_clauses.pop() + "}" @@ -253,7 +259,7 @@ def _format_schema(schema: Dict, is_enhanced: bool) -> str: if prop["type"] == "STRING": if prop.get("distinct_count", 11) > DISTINCT_VALUE_LIMIT: example = ( - f'Example: "{prop["values"][0].replace(NL, " ")}"' + f'Example: "{clean_string_values(prop["values"][0])}"' if prop["values"] else "" ) @@ -261,13 +267,13 @@ def _format_schema(schema: Dict, is_enhanced: bool) -> str: example = ( ( "Available options: " - f'{[el.replace(NL, " ") for el in prop["values"]]}' + f'{[clean_string_values(el) for el in prop["values"]]}' ) if prop["values"] else "" ) - elif prop["type"] in ["INTEGER", "FLOAT", "DATE"]: + elif prop["type"] in ["INTEGER", "FLOAT", "DATE", "DATE_TIME"]: if prop.get("min") is not None: example = f'Min: {prop["min"]}, Max: {prop["max"]}' else: @@ -282,7 +288,7 @@ def _format_schema(schema: Dict, is_enhanced: bool) -> str: f'Min Size: {prop["min_size"]}, Max Size: {prop["max_size"]}' ) formatted_node_props.append( - f" - `{prop['property']}: {prop['type']}` {example}" + f" - `{prop['property']}`: {prop['type']}` {example}" ) # Enhanced formatting for relationships @@ -293,7 +299,7 @@ def _format_schema(schema: Dict, is_enhanced: bool) -> str: if prop["type"] == "STRING": if prop.get("distinct_count", 11) > DISTINCT_VALUE_LIMIT: example = ( - f'Example: "{prop["values"][0].replace(NL, " ")}"' + f'Example: "{clean_string_values(prop["values"][0])}"' if prop["values"] else "" ) @@ -301,12 +307,12 @@ def _format_schema(schema: Dict, is_enhanced: bool) -> str: example = ( ( "Available options: " - f'{[el.replace(NL, " ") for el in prop["values"]]}' + f'{[clean_string_values(el) for el in prop["values"]]}' ) if prop["values"] else "" ) - elif prop["type"] in ["INTEGER", "FLOAT", "DATE"]: + elif prop["type"] in ["INTEGER", "FLOAT", "DATE", "DATE_TIME"]: if prop.get("min"): # If we have min/max example = f'Min: {prop["min"]}, Max: {prop["max"]}' else: # return a single value