diff --git a/libs/text-splitters/langchain_text_splitters/base.py b/libs/text-splitters/langchain_text_splitters/base.py index f9e4a92222a..8861f4c6585 100644 --- a/libs/text-splitters/langchain_text_splitters/base.py +++ b/libs/text-splitters/langchain_text_splitters/base.py @@ -47,6 +47,12 @@ class TextSplitter(BaseDocumentTransformer, ABC): strip_whitespace: If `True`, strips whitespace from the start and end of every document """ + if chunk_size <= 0: + msg = f"chunk_size must be > 0, got {chunk_size}" + raise ValueError(msg) + if chunk_overlap < 0: + msg = f"chunk_overlap must be >= 0, got {chunk_overlap}" + raise ValueError(msg) if chunk_overlap > chunk_size: msg = ( f"Got a larger chunk overlap ({chunk_overlap}) than chunk size " diff --git a/libs/text-splitters/tests/unit_tests/test_text_splitters.py b/libs/text-splitters/tests/unit_tests/test_text_splitters.py index 6f03a2d59f0..1d3304d1b84 100644 --- a/libs/text-splitters/tests/unit_tests/test_text_splitters.py +++ b/libs/text-splitters/tests/unit_tests/test_text_splitters.py @@ -212,6 +212,11 @@ def test_character_text_splitting_args() -> None: """Test invalid arguments.""" with pytest.raises(ValueError): CharacterTextSplitter(chunk_size=2, chunk_overlap=4) + for invalid_size in (0, -1): + with pytest.raises(ValueError): + CharacterTextSplitter(chunk_size=invalid_size) + with pytest.raises(ValueError): + CharacterTextSplitter(chunk_size=2, chunk_overlap=-1) def test_merge_splits() -> None: