text-splitters[patch]: fix typing for keep_separator (#25706)

This commit is contained in:
ccurme 2024-08-23 13:22:02 -04:00 committed by GitHub
parent 8170bd636f
commit bc557a5663
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 25 additions and 1 deletions

View File

@ -65,7 +65,7 @@ class RecursiveCharacterTextSplitter(TextSplitter):
def __init__(
self,
separators: Optional[List[str]] = None,
keep_separator: bool = True,
keep_separator: Union[bool, Literal["start", "end"]] = True,
is_separator_regex: bool = False,
**kwargs: Any,
) -> None:

View File

@ -180,6 +180,30 @@ def test_character_text_splitter_discard_separator_regex(
assert output == expected_output
def test_recursive_character_text_splitter_keep_separators() -> None:
split_tags = [",", "."]
query = "Apple,banana,orange and tomato."
# start
splitter = RecursiveCharacterTextSplitter(
chunk_size=10,
chunk_overlap=0,
separators=split_tags,
keep_separator="start",
)
result = splitter.split_text(query)
assert result == ["Apple", ",banana", ",orange and tomato", "."]
# end
splitter = RecursiveCharacterTextSplitter(
chunk_size=10,
chunk_overlap=0,
separators=split_tags,
keep_separator="end",
)
result = splitter.split_text(query)
assert result == ["Apple,", "banana,", "orange and tomato."]
def test_character_text_splitting_args() -> None:
"""Test invalid arguments."""
with pytest.raises(ValueError):