mirror of
https://github.com/hwchase17/langchain.git
synced 2025-06-28 01:19:31 +00:00
text-splitters[patch]: fix typing for keep_separator
(#25706)
This commit is contained in:
parent
8170bd636f
commit
bc557a5663
@ -65,7 +65,7 @@ class RecursiveCharacterTextSplitter(TextSplitter):
|
||||
def __init__(
|
||||
self,
|
||||
separators: Optional[List[str]] = None,
|
||||
keep_separator: bool = True,
|
||||
keep_separator: Union[bool, Literal["start", "end"]] = True,
|
||||
is_separator_regex: bool = False,
|
||||
**kwargs: Any,
|
||||
) -> None:
|
||||
|
@ -180,6 +180,30 @@ def test_character_text_splitter_discard_separator_regex(
|
||||
assert output == expected_output
|
||||
|
||||
|
||||
def test_recursive_character_text_splitter_keep_separators() -> None:
|
||||
split_tags = [",", "."]
|
||||
query = "Apple,banana,orange and tomato."
|
||||
# start
|
||||
splitter = RecursiveCharacterTextSplitter(
|
||||
chunk_size=10,
|
||||
chunk_overlap=0,
|
||||
separators=split_tags,
|
||||
keep_separator="start",
|
||||
)
|
||||
result = splitter.split_text(query)
|
||||
assert result == ["Apple", ",banana", ",orange and tomato", "."]
|
||||
|
||||
# end
|
||||
splitter = RecursiveCharacterTextSplitter(
|
||||
chunk_size=10,
|
||||
chunk_overlap=0,
|
||||
separators=split_tags,
|
||||
keep_separator="end",
|
||||
)
|
||||
result = splitter.split_text(query)
|
||||
assert result == ["Apple,", "banana,", "orange and tomato."]
|
||||
|
||||
|
||||
def test_character_text_splitting_args() -> None:
|
||||
"""Test invalid arguments."""
|
||||
with pytest.raises(ValueError):
|
||||
|
Loading…
Reference in New Issue
Block a user