From fd4f536c77d8fa521433e876f6274f4b50d00003 Mon Sep 17 00:00:00 2001 From: six17 <41807970+six17@users.noreply.github.com> Date: Sat, 16 Mar 2024 07:46:49 +0800 Subject: [PATCH] text-splitters[patch]: fix json split of RecursiveJsonSplitter (#19119) - **Description:** This modification addresses the issue of mutable default parameters in functions. In the original code, the `chunks` parameter is defaulted to a list containing an empty dictionary, which is mutable. Since default parameters in Python are evaluated only once at function definition time, modifications to the parameter would persist across future calls. By changing the default to `None` and checking/initializing within the function, a new list is created for each call, thus avoiding potential issues. --------- Co-authored-by: sixiang Co-authored-by: Bagatur --- libs/text-splitters/langchain_text_splitters/json.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/libs/text-splitters/langchain_text_splitters/json.py b/libs/text-splitters/langchain_text_splitters/json.py index 8e5f128161c..69b9c732524 100644 --- a/libs/text-splitters/langchain_text_splitters/json.py +++ b/libs/text-splitters/langchain_text_splitters/json.py @@ -48,12 +48,14 @@ class RecursiveJsonSplitter: def _json_split( self, data: Dict[str, Any], - current_path: List[str] = [], - chunks: List[Dict] = [{}], + current_path: Optional[List[str]] = None, + chunks: Optional[List[Dict]] = None, ) -> List[Dict]: """ Split json into maximum size dictionaries while preserving structure. """ + current_path = current_path or [] + chunks = chunks or [{}] if isinstance(data, dict): for key, value in data.items(): new_path = current_path + [key]