mirror of
https://github.com/hwchase17/langchain.git
synced 2025-09-25 04:49:17 +00:00
Harrison/map reduce (#36)
This commit is contained in:
28
tests/unit_tests/test_text_splitter.py
Normal file
28
tests/unit_tests/test_text_splitter.py
Normal file
@@ -0,0 +1,28 @@
|
||||
"""Test text splitting functionality."""
|
||||
import pytest
|
||||
|
||||
from langchain.text_splitter import CharacterTextSplitter
|
||||
|
||||
|
||||
def test_character_text_splitter() -> None:
|
||||
"""Test splitting by character count."""
|
||||
text = "foo bar baz 123"
|
||||
splitter = CharacterTextSplitter(separator=" ", chunk_size=5, chunk_overlap=3)
|
||||
output = splitter.split_text(text)
|
||||
expected_output = ["foo bar", "bar baz", "baz 123"]
|
||||
assert output == expected_output
|
||||
|
||||
|
||||
def test_character_text_splitter_longer_words() -> None:
|
||||
"""Test splitting by characters when splits not found easily."""
|
||||
text = "foo bar baz 123"
|
||||
splitter = CharacterTextSplitter(separator=" ", chunk_size=1, chunk_overlap=1)
|
||||
output = splitter.split_text(text)
|
||||
expected_output = ["foo", "bar", "baz", "123"]
|
||||
assert output == expected_output
|
||||
|
||||
|
||||
def test_character_text_splitting_args() -> None:
|
||||
"""Test invalid arguments."""
|
||||
with pytest.raises(ValueError):
|
||||
CharacterTextSplitter(chunk_size=2, chunk_overlap=4)
|
Reference in New Issue
Block a user