mirror of
https://github.com/hwchase17/langchain.git
synced 2025-09-17 15:35:14 +00:00
comunity[patch]: Fix neo4j sanitizing values (#18750)
Fixing sanitization for when deeply nested lists appear
This commit is contained in:
@@ -46,36 +46,45 @@ include_docs_query = (
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def value_sanitize(d: Dict[str, Any]) -> Dict[str, Any]:
|
def value_sanitize(d: Any) -> Any:
|
||||||
"""Sanitize the input dictionary.
|
"""Sanitize the input dictionary or list.
|
||||||
|
|
||||||
Sanitizes the input dictionary by removing embedding-like values,
|
Sanitizes the input by removing embedding-like values,
|
||||||
lists with more than 128 elements, that are mostly irrelevant for
|
lists with more than 128 elements, that are mostly irrelevant for
|
||||||
generating answers in a LLM context. These properties, if left in
|
generating answers in a LLM context. These properties, if left in
|
||||||
results, can occupy significant context space and detract from
|
results, can occupy significant context space and detract from
|
||||||
the LLM's performance by introducing unnecessary noise and cost.
|
the LLM's performance by introducing unnecessary noise and cost.
|
||||||
"""
|
"""
|
||||||
LIST_LIMIT = 128
|
LIST_LIMIT = 128
|
||||||
# Create a new dictionary to avoid changing size during iteration
|
if isinstance(d, dict):
|
||||||
new_dict = {}
|
new_dict = {}
|
||||||
for key, value in d.items():
|
for key, value in d.items():
|
||||||
if isinstance(value, dict):
|
if isinstance(value, dict):
|
||||||
# Recurse to handle nested dictionaries
|
sanitized_value = value_sanitize(value)
|
||||||
new_dict[key] = value_sanitize(value)
|
if (
|
||||||
|
sanitized_value is not None
|
||||||
|
): # Check if the sanitized value is not None
|
||||||
|
new_dict[key] = sanitized_value
|
||||||
elif isinstance(value, list):
|
elif isinstance(value, list):
|
||||||
# check if it has less than LIST_LIMIT values
|
|
||||||
if len(value) < LIST_LIMIT:
|
if len(value) < LIST_LIMIT:
|
||||||
# if value is a list, check if it contains dictionaries to clean
|
sanitized_value = value_sanitize(value)
|
||||||
cleaned_list = []
|
if (
|
||||||
for item in value:
|
sanitized_value is not None
|
||||||
if isinstance(item, dict):
|
): # Check if the sanitized value is not None
|
||||||
cleaned_list.append(value_sanitize(item))
|
new_dict[key] = sanitized_value
|
||||||
else:
|
# Do not include the key if the list is oversized
|
||||||
cleaned_list.append(item)
|
|
||||||
new_dict[key] = cleaned_list # type: ignore[assignment]
|
|
||||||
else:
|
else:
|
||||||
new_dict[key] = value
|
new_dict[key] = value
|
||||||
return new_dict
|
return new_dict
|
||||||
|
elif isinstance(d, list):
|
||||||
|
if len(d) < LIST_LIMIT:
|
||||||
|
return [
|
||||||
|
value_sanitize(item) for item in d if value_sanitize(item) is not None
|
||||||
|
]
|
||||||
|
else:
|
||||||
|
return None
|
||||||
|
else:
|
||||||
|
return d
|
||||||
|
|
||||||
|
|
||||||
def _get_node_import_query(baseEntityLabel: bool, include_source: bool) -> str:
|
def _get_node_import_query(baseEntityLabel: bool, include_source: bool) -> str:
|
||||||
|
@@ -30,3 +30,12 @@ def test_value_sanitize_with_dict_in_list(): # type: ignore[no-untyped-def]
|
|||||||
input_dict = {"key1": "value1", "oversized_list": [1, 2, {"key": oversized_list}]}
|
input_dict = {"key1": "value1", "oversized_list": [1, 2, {"key": oversized_list}]}
|
||||||
expected_output = {"key1": "value1", "oversized_list": [1, 2, {}]}
|
expected_output = {"key1": "value1", "oversized_list": [1, 2, {}]}
|
||||||
assert value_sanitize(input_dict) == expected_output
|
assert value_sanitize(input_dict) == expected_output
|
||||||
|
|
||||||
|
|
||||||
|
def test_value_sanitize_with_dict_in_nested_list(): # type: ignore[no-untyped-def]
|
||||||
|
input_dict = {
|
||||||
|
"key1": "value1",
|
||||||
|
"deeply_nested_lists": [[[[{"final_nested_key": list(range(200))}]]]],
|
||||||
|
}
|
||||||
|
expected_output = {"key1": "value1", "deeply_nested_lists": [[[[{}]]]]}
|
||||||
|
assert value_sanitize(input_dict) == expected_output
|
||||||
|
Reference in New Issue
Block a user