Harrison/fix splitting (#563)

fix issue where text splitting could possibly create empty docs
This commit is contained in:
Harrison Chase
2023-01-08 19:19:32 -08:00
committed by GitHub
parent 1192cc0767
commit 1511606799
2 changed files with 23 additions and 2 deletions

View File

@@ -17,6 +17,15 @@ def test_character_text_splitter() -> None:
assert output == expected_output
def test_character_text_splitter_empty_doc() -> None:
"""Test splitting by character count doesn't create empty documents."""
text = "foo bar"
splitter = CharacterTextSplitter(separator=" ", chunk_size=2, chunk_overlap=0)
output = splitter.split_text(text)
expected_output = ["foo", "bar"]
assert output == expected_output
def test_character_text_splitter_long() -> None:
"""Test splitting by character count on long words."""
text = "foo bar baz a a"