text-splitters: Add ruff rules FBT (#31935)

See [flake8-boolean-trap (FBT)](https://docs.astral.sh/ruff/rules/#flake8-boolean-trap-fbt)
2025-07-13 16:36:06 +00:00 · 2025-07-10 00:36:58 +02:00 · 2025-07-10 00:36:58 +02:00 · 060fc0e3c9
commit 060fc0e3c9
parent 4d9c0b0883
8 changed files with 37 additions and 27 deletions
--- a/libs/text-splitters/langchain_text_splitters/base.py
+++ b/libs/text-splitters/langchain_text_splitters/base.py
@ -31,9 +31,9 @@ class TextSplitter(BaseDocumentTransformer, ABC):
        chunk_size: int = 4000,
        chunk_overlap: int = 200,
        length_function: Callable[[str], int] = len,
-        keep_separator: Union[bool, Literal["start", "end"]] = False,
-        add_start_index: bool = False,
-        strip_whitespace: bool = True,
+        keep_separator: Union[bool, Literal["start", "end"]] = False,  # noqa: FBT001,FBT002
+        add_start_index: bool = False,  # noqa: FBT001,FBT002
+        strip_whitespace: bool = True,  # noqa: FBT001,FBT002
    ) -> None:
        """Create a new TextSplitter.

--- a/libs/text-splitters/langchain_text_splitters/character.py
+++ b/libs/text-splitters/langchain_text_splitters/character.py
@ -10,7 +10,10 @@ class CharacterTextSplitter(TextSplitter):
    """Splitting text that looks at characters."""

    def __init__(
-        self, separator: str = "\n\n", is_separator_regex: bool = False, **kwargs: Any
+        self,
+        separator: str = "\n\n",
+        is_separator_regex: bool = False,  # noqa: FBT001,FBT002
+        **kwargs: Any,
    ) -> None:
        """Create a new TextSplitter."""
        super().__init__(**kwargs)
@ -25,7 +28,9 @@ class CharacterTextSplitter(TextSplitter):
        )

        # 2. Initial split (keep separator if requested)
-        splits = _split_text_with_regex(text, sep_pattern, self._keep_separator)
+        splits = _split_text_with_regex(
+            text, sep_pattern, keep_separator=self._keep_separator
+        )

        # 3. Detect zero-width lookaround so we never re-insert it
        lookaround_prefixes = ("(?=", "(?<!", "(?<=", "(?!")
@ -45,7 +50,7 @@ class CharacterTextSplitter(TextSplitter):


 def _split_text_with_regex(
-    text: str, separator: str, keep_separator: Union[bool, Literal["start", "end"]]
+    text: str, separator: str, *, keep_separator: Union[bool, Literal["start", "end"]]
 ) -> list[str]:
    # Now that we have the separator, split the text
    if separator:
@ -81,8 +86,8 @@ class RecursiveCharacterTextSplitter(TextSplitter):
    def __init__(
        self,
        separators: Optional[list[str]] = None,
-        keep_separator: Union[bool, Literal["start", "end"]] = True,
-        is_separator_regex: bool = False,
+        keep_separator: Union[bool, Literal["start", "end"]] = True,  # noqa: FBT001,FBT002
+        is_separator_regex: bool = False,  # noqa: FBT001,FBT002
        **kwargs: Any,
    ) -> None:
        """Create a new TextSplitter."""
@ -107,7 +112,9 @@ class RecursiveCharacterTextSplitter(TextSplitter):
                break

        _separator = separator if self._is_separator_regex else re.escape(separator)
-        splits = _split_text_with_regex(text, _separator, self._keep_separator)
+        splits = _split_text_with_regex(
+            text, _separator, keep_separator=self._keep_separator
+        )

        # Now go merging things, recursively splitting longer texts.
        _good_splits = []
--- a/libs/text-splitters/langchain_text_splitters/html.py
+++ b/libs/text-splitters/langchain_text_splitters/html.py
@ -112,7 +112,7 @@ class HTMLHeaderTextSplitter:
    def __init__(
        self,
        headers_to_split_on: list[tuple[str, str]],
-        return_each_element: bool = False,
+        return_each_element: bool = False,  # noqa: FBT001,FBT002
    ) -> None:
        """Initialize with headers to split on.

@ -744,7 +744,7 @@ class HTMLSemanticPreservingSplitter(BaseDocumentTransformer):
            soup (Any): Parsed HTML content using BeautifulSoup.
        """
        if self._allowlist_tags:
-            for tag in soup.find_all(True):
+            for tag in soup.find_all(name=True):
                if tag.name not in self._allowlist_tags:
                    tag.decompose()

--- a/libs/text-splitters/langchain_text_splitters/json.py
+++ b/libs/text-splitters/langchain_text_splitters/json.py
@ -107,7 +107,7 @@ class RecursiveJsonSplitter:
    def split_json(
        self,
        json_data: dict[str, Any],
-        convert_lists: bool = False,
+        convert_lists: bool = False,  # noqa: FBT001,FBT002
    ) -> list[dict[str, Any]]:
        """Splits JSON into a list of JSON chunks."""
        if convert_lists:
@ -123,8 +123,8 @@ class RecursiveJsonSplitter:
    def split_text(
        self,
        json_data: dict[str, Any],
-        convert_lists: bool = False,
-        ensure_ascii: bool = True,
+        convert_lists: bool = False,  # noqa: FBT001,FBT002
+        ensure_ascii: bool = True,  # noqa: FBT001,FBT002
    ) -> list[str]:
        """Splits JSON into a list of JSON formatted strings."""
        chunks = self.split_json(json_data=json_data, convert_lists=convert_lists)
@ -135,8 +135,8 @@ class RecursiveJsonSplitter:
    def create_documents(
        self,
        texts: list[dict[str, Any]],
-        convert_lists: bool = False,
-        ensure_ascii: bool = True,
+        convert_lists: bool = False,  # noqa: FBT001,FBT002
+        ensure_ascii: bool = True,  # noqa: FBT001,FBT002
        metadatas: Optional[list[dict[Any, Any]]] = None,
    ) -> list[Document]:
        """Create documents from a list of json objects (Dict)."""
--- a/libs/text-splitters/langchain_text_splitters/markdown.py
+++ b/libs/text-splitters/langchain_text_splitters/markdown.py
@ -24,8 +24,8 @@ class MarkdownHeaderTextSplitter:
    def __init__(
        self,
        headers_to_split_on: list[tuple[str, str]],
-        return_each_line: bool = False,
-        strip_headers: bool = True,
+        return_each_line: bool = False,  # noqa: FBT001,FBT002
+        strip_headers: bool = True,  # noqa: FBT001,FBT002
    ):
        """Create a new MarkdownHeaderTextSplitter.

@ -279,8 +279,8 @@ class ExperimentalMarkdownSyntaxTextSplitter:
    def __init__(
        self,
        headers_to_split_on: Union[list[tuple[str, str]], None] = None,
-        return_each_line: bool = False,
-        strip_headers: bool = True,
+        return_each_line: bool = False,  # noqa: FBT001,FBT002
+        strip_headers: bool = True,  # noqa: FBT001,FBT002
    ):
        """Initialize the text splitter with header splitting and formatting options.

--- a/libs/text-splitters/pyproject.toml
+++ b/libs/text-splitters/pyproject.toml
@ -61,7 +61,7 @@ ignore_missing_imports = "True"
 target-version = "py39"

 [tool.ruff.lint]
-select = ["E", "F", "I", "UP", "PGH003", "T201", "D"]
+select = ["E", "F", "FBT", "I", "UP", "PGH003", "T201", "D"]
 ignore = ["D100",]
 pyupgrade.keep-runtime-typing = true

--- a/libs/text-splitters/tests/unit_tests/test_text_splitters.py
+++ b/libs/text-splitters/tests/unit_tests/test_text_splitters.py
@ -101,7 +101,7 @@ def test_character_text_splitter_longer_words() -> None:
    ("separator", "is_separator_regex"), [(re.escape("."), True), (".", False)]
 )
 def test_character_text_splitter_keep_separator_regex(
-    separator: str, is_separator_regex: bool
+    *, separator: str, is_separator_regex: bool
 ) -> None:
    """Test splitting by characters while keeping the separator
    that is a regex special character.
@ -123,7 +123,7 @@ def test_character_text_splitter_keep_separator_regex(
    ("separator", "is_separator_regex"), [(re.escape("."), True), (".", False)]
 )
 def test_character_text_splitter_keep_separator_regex_start(
-    separator: str, is_separator_regex: bool
+    *, separator: str, is_separator_regex: bool
 ) -> None:
    """Test splitting by characters while keeping the separator
    that is a regex special character and placing it at the start of each chunk.
@ -145,7 +145,7 @@ def test_character_text_splitter_keep_separator_regex_start(
    ("separator", "is_separator_regex"), [(re.escape("."), True), (".", False)]
 )
 def test_character_text_splitter_keep_separator_regex_end(
-    separator: str, is_separator_regex: bool
+    *, separator: str, is_separator_regex: bool
 ) -> None:
    """Test splitting by characters while keeping the separator
    that is a regex special character and placing it at the end of each chunk.
@ -167,7 +167,7 @@ def test_character_text_splitter_keep_separator_regex_end(
    ("separator", "is_separator_regex"), [(re.escape("."), True), (".", False)]
 )
 def test_character_text_splitter_discard_separator_regex(
-    separator: str, is_separator_regex: bool
+    *, separator: str, is_separator_regex: bool
 ) -> None:
    """Test splitting by characters discarding the separator
    that is a regex special character."""
@ -338,7 +338,9 @@ def test_iterative_text_splitter_discard_separator() -> None:
    ]


-def __test_iterative_text_splitter(chunk_size: int, keep_separator: bool) -> list[str]:
+def __test_iterative_text_splitter(
+    *, chunk_size: int, keep_separator: bool
+) -> list[str]:
    chunk_size += 1 if keep_separator else 0

    splitter = RecursiveCharacterTextSplitter(
@ -3574,6 +3576,7 @@ def test_character_text_splitter_discard_regex_separator_on_merge() -> None:
 )
 def test_character_text_splitter_chunk_size_effect(
    separator: str,
+    *,
    is_regex: bool,
    text: str,
    chunk_size: int,
--- a/libs/text-splitters/uv.lock
+++ b/libs/text-splitters/uv.lock
@ -1119,7 +1119,7 @@ dev = [
    { name = "jupyter", specifier = ">=1.0.0,<2.0.0" },
    { name = "setuptools", specifier = ">=67.6.1,<68.0.0" },
 ]
-lint = [{ name = "ruff", specifier = ">=0.11.2,<0.12.0" }]
+lint = [{ name = "ruff", specifier = ">=0.12.2,<0.13" }]
 test = [
    { name = "blockbuster", specifier = "~=1.5.18" },
    { name = "freezegun", specifier = ">=1.2.2,<2.0.0" },