mirror of
https://github.com/hwchase17/langchain.git
synced 2025-06-28 09:28:48 +00:00
text-splitters[patch]: fix typing for keep_separator
(#25706)
This commit is contained in:
parent
8170bd636f
commit
bc557a5663
@ -65,7 +65,7 @@ class RecursiveCharacterTextSplitter(TextSplitter):
|
|||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
separators: Optional[List[str]] = None,
|
separators: Optional[List[str]] = None,
|
||||||
keep_separator: bool = True,
|
keep_separator: Union[bool, Literal["start", "end"]] = True,
|
||||||
is_separator_regex: bool = False,
|
is_separator_regex: bool = False,
|
||||||
**kwargs: Any,
|
**kwargs: Any,
|
||||||
) -> None:
|
) -> None:
|
||||||
|
@ -180,6 +180,30 @@ def test_character_text_splitter_discard_separator_regex(
|
|||||||
assert output == expected_output
|
assert output == expected_output
|
||||||
|
|
||||||
|
|
||||||
|
def test_recursive_character_text_splitter_keep_separators() -> None:
|
||||||
|
split_tags = [",", "."]
|
||||||
|
query = "Apple,banana,orange and tomato."
|
||||||
|
# start
|
||||||
|
splitter = RecursiveCharacterTextSplitter(
|
||||||
|
chunk_size=10,
|
||||||
|
chunk_overlap=0,
|
||||||
|
separators=split_tags,
|
||||||
|
keep_separator="start",
|
||||||
|
)
|
||||||
|
result = splitter.split_text(query)
|
||||||
|
assert result == ["Apple", ",banana", ",orange and tomato", "."]
|
||||||
|
|
||||||
|
# end
|
||||||
|
splitter = RecursiveCharacterTextSplitter(
|
||||||
|
chunk_size=10,
|
||||||
|
chunk_overlap=0,
|
||||||
|
separators=split_tags,
|
||||||
|
keep_separator="end",
|
||||||
|
)
|
||||||
|
result = splitter.split_text(query)
|
||||||
|
assert result == ["Apple,", "banana,", "orange and tomato."]
|
||||||
|
|
||||||
|
|
||||||
def test_character_text_splitting_args() -> None:
|
def test_character_text_splitting_args() -> None:
|
||||||
"""Test invalid arguments."""
|
"""Test invalid arguments."""
|
||||||
with pytest.raises(ValueError):
|
with pytest.raises(ValueError):
|
||||||
|
Loading…
Reference in New Issue
Block a user