Harrison/fix text splitter (#1511)

Co-authored-by: ajaysolanky <ajsolanky@gmail.com>
Co-authored-by: Ajay Solanky <ajaysolanky@saw-l14668307kd.myfiosgateway.com>
This commit is contained in:
Harrison Chase
2023-03-07 15:42:28 -08:00
committed by GitHub
parent e3354404ad
commit 064741db58
2 changed files with 25 additions and 7 deletions

View File

@@ -26,6 +26,15 @@ def test_character_text_splitter_empty_doc() -> None:
assert output == expected_output
def test_character_text_splitter_separtor_empty_doc() -> None:
"""Test edge cases are separators."""
text = "f b"
splitter = CharacterTextSplitter(separator=" ", chunk_size=2, chunk_overlap=0)
output = splitter.split_text(text)
expected_output = ["f", "b"]
assert output == expected_output
def test_character_text_splitter_long() -> None:
"""Test splitting by character count on long words."""
text = "foo bar baz a a"
@@ -99,7 +108,7 @@ Bye!\n\n-H."""
"Harrison.",
"How? Are?",
"You?",
"Okay then f",
"Okay then",
"f f f f.",
"This is a",
"a weird",
@@ -107,8 +116,8 @@ Bye!\n\n-H."""
"write, but",
"gotta test",
"the",
"splitting",
"gggg",
"splittingg",
"ggg",
"some how.",
"Bye!\n\n-H.",
]