diff --git a/libs/text-splitters/langchain_text_splitters/jsx.py b/libs/text-splitters/langchain_text_splitters/jsx.py index e335ae034bd..0e8f4256da9 100644 --- a/libs/text-splitters/langchain_text_splitters/jsx.py +++ b/libs/text-splitters/langchain_text_splitters/jsx.py @@ -92,11 +92,15 @@ class JSFrameworkTextSplitter(RecursiveCharacterTextSplitter): "\ndefault ", " default ", ] + # Build the effective separator list for this call only. + # Do NOT assign back to self._separators: doing so would permanently + # append js_separators + component_separators on every invocation, + # causing the list to grow unboundedly when split_text() is called + # multiple times on the same instance. separators = ( self._separators + js_separators + component_separators + ["<>", "\n\n", "&&\n", "||\n"] ) - self._separators = separators - return super().split_text(text) + return self._split_text(text, separators) diff --git a/libs/text-splitters/tests/unit_tests/test_text_splitters.py b/libs/text-splitters/tests/unit_tests/test_text_splitters.py index a520962beaf..a074c25b7f8 100644 --- a/libs/text-splitters/tests/unit_tests/test_text_splitters.py +++ b/libs/text-splitters/tests/unit_tests/test_text_splitters.py @@ -624,6 +624,34 @@ def test_svelte_text_splitter() -> None: assert [s.strip() for s in splits] == [s.strip() for s in expected_splits] +def test_jsx_splitter_separator_not_mutated_across_calls() -> None: + """Regression test: repeated split_text() calls must not mutate separators. + + Calling split_text() multiple times on the same JSFrameworkTextSplitter + instance must not grow the internal separator list between calls. + + Before the fix, self._separators was overwritten with the full expanded list + on every invocation, so a second call would start with the already-expanded + list and append even more separators. + """ + splitter = JSFrameworkTextSplitter(chunk_size=30, chunk_overlap=0) + + # Record separator count after constructing (should be 0 - no custom separators) + initial_sep_count = len(splitter._separators) + + # Call split_text twice; the results should be identical for identical input + splits_first = splitter.split_text(FAKE_JSX_TEXT) + splits_second = splitter.split_text(FAKE_JSX_TEXT) + + assert splits_first == splits_second, ( + "split_text() must return identical results on repeated calls with the " + "same input" + ) + assert len(splitter._separators) == initial_sep_count, ( + "split_text() must not mutate self._separators between calls" + ) + + CHUNK_SIZE = 16