From 8951c01fe8b0c6cec28e3ef82d150a17fcf9f4d7 Mon Sep 17 00:00:00 2001 From: Maxime Grenu <69890511+cluster2600@users.noreply.github.com> Date: Wed, 18 Feb 2026 23:51:42 +0100 Subject: [PATCH] fix(text-splitters): prevent JSFrameworkTextSplitter from mutating self._separators on each split_text() call (#35316) --- .../langchain_text_splitters/jsx.py | 8 ++++-- .../tests/unit_tests/test_text_splitters.py | 28 +++++++++++++++++++ 2 files changed, 34 insertions(+), 2 deletions(-) diff --git a/libs/text-splitters/langchain_text_splitters/jsx.py b/libs/text-splitters/langchain_text_splitters/jsx.py index e335ae034bd..0e8f4256da9 100644 --- a/libs/text-splitters/langchain_text_splitters/jsx.py +++ b/libs/text-splitters/langchain_text_splitters/jsx.py @@ -92,11 +92,15 @@ class JSFrameworkTextSplitter(RecursiveCharacterTextSplitter): "\ndefault ", " default ", ] + # Build the effective separator list for this call only. + # Do NOT assign back to self._separators: doing so would permanently + # append js_separators + component_separators on every invocation, + # causing the list to grow unboundedly when split_text() is called + # multiple times on the same instance. separators = ( self._separators + js_separators + component_separators + ["<>", "\n\n", "&&\n", "||\n"] ) - self._separators = separators - return super().split_text(text) + return self._split_text(text, separators) diff --git a/libs/text-splitters/tests/unit_tests/test_text_splitters.py b/libs/text-splitters/tests/unit_tests/test_text_splitters.py index a520962beaf..a074c25b7f8 100644 --- a/libs/text-splitters/tests/unit_tests/test_text_splitters.py +++ b/libs/text-splitters/tests/unit_tests/test_text_splitters.py @@ -624,6 +624,34 @@ def test_svelte_text_splitter() -> None: assert [s.strip() for s in splits] == [s.strip() for s in expected_splits] +def test_jsx_splitter_separator_not_mutated_across_calls() -> None: + """Regression test: repeated split_text() calls must not mutate separators. + + Calling split_text() multiple times on the same JSFrameworkTextSplitter + instance must not grow the internal separator list between calls. + + Before the fix, self._separators was overwritten with the full expanded list + on every invocation, so a second call would start with the already-expanded + list and append even more separators. + """ + splitter = JSFrameworkTextSplitter(chunk_size=30, chunk_overlap=0) + + # Record separator count after constructing (should be 0 - no custom separators) + initial_sep_count = len(splitter._separators) + + # Call split_text twice; the results should be identical for identical input + splits_first = splitter.split_text(FAKE_JSX_TEXT) + splits_second = splitter.split_text(FAKE_JSX_TEXT) + + assert splits_first == splits_second, ( + "split_text() must return identical results on repeated calls with the " + "same input" + ) + assert len(splitter._separators) == initial_sep_count, ( + "split_text() must not mutate self._separators between calls" + ) + + CHUNK_SIZE = 16