chore(text-splitters): select ALL rules with exclusions (#32325)

Co-authored-by: Mason Daugherty <mason@langchain.dev>
2025-09-15 22:44:36 +00:00 · 2025-09-08 16:46:09 +02:00
parent 20401df25d
commit 0c3e8ccd0e
19 changed files with 444 additions and 366 deletions
--- a/libs/text-splitters/tests/integration_tests/test_nlp_text_splitters.py
+++ b/libs/text-splitters/tests/integration_tests/test_nlp_text_splitters.py
@@ -1,6 +1,6 @@
 """Test text splitting functionality using NLTK and Spacy based sentence splitters."""

-from typing import Any
+import re

 import nltk
 import pytest
@@ -15,11 +15,8 @@ def setup_module() -> None:


@pytest.fixture
-def spacy() -> Any:
-    try:
-        import spacy
-    except ImportError:
-        pytest.skip("Spacy not installed.")
+def spacy() -> None:
+    spacy = pytest.importorskip("spacy")

    # Check if en_core_web_sm model is available
    try:
@@ -32,18 +29,27 @@ def spacy() -> Any:
            "en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl"
        )

-    return spacy
-

 def test_nltk_text_splitting_args() -> None:
    """Test invalid arguments."""
-    with pytest.raises(ValueError):
+    with pytest.raises(
+        ValueError,
+        match=re.escape(
+            "Got a larger chunk overlap (4) than chunk size (2), should be smaller."
+        ),
+    ):
        NLTKTextSplitter(chunk_size=2, chunk_overlap=4)


-def test_spacy_text_splitting_args(spacy: Any) -> None:
+@pytest.mark.usefixtures("spacy")
+def test_spacy_text_splitting_args() -> None:
    """Test invalid arguments."""
-    with pytest.raises(ValueError):
+    with pytest.raises(
+        ValueError,
+        match=re.escape(
+            "Got a larger chunk overlap (4) than chunk size (2), should be smaller."
+        ),
+    ):
        SpacyTextSplitter(chunk_size=2, chunk_overlap=4)


@@ -57,8 +63,9 @@ def test_nltk_text_splitter() -> None:
    assert output == expected_output


+@pytest.mark.usefixtures("spacy")
@pytest.mark.parametrize("pipeline", ["sentencizer", "en_core_web_sm"])
-def test_spacy_text_splitter(pipeline: str, spacy: Any) -> None:
+def test_spacy_text_splitter(pipeline: str) -> None:
    """Test splitting by sentence using Spacy."""
    text = "This is sentence one. And this is sentence two."
    separator = "|||"
@@ -68,8 +75,9 @@ def test_spacy_text_splitter(pipeline: str, spacy: Any) -> None:
    assert output == expected_output


+@pytest.mark.usefixtures("spacy")
@pytest.mark.parametrize("pipeline", ["sentencizer", "en_core_web_sm"])
-def test_spacy_text_splitter_strip_whitespace(pipeline: str, spacy: Any) -> None:
+def test_spacy_text_splitter_strip_whitespace(pipeline: str) -> None:
    """Test splitting by sentence using Spacy."""
    text = "This is sentence one. And this is sentence two."
    separator = "|||"
@@ -83,7 +91,9 @@ def test_spacy_text_splitter_strip_whitespace(pipeline: str, spacy: Any) -> None

 def test_nltk_text_splitter_args() -> None:
    """Test invalid arguments for NLTKTextSplitter."""
-    with pytest.raises(ValueError):
+    with pytest.raises(
+        ValueError, match="When use_span_tokenize is True, separator should be ''"
+    ):
        NLTKTextSplitter(
            chunk_size=80,
            chunk_overlap=0,
--- a/libs/text-splitters/tests/integration_tests/test_text_splitter.py
+++ b/libs/text-splitters/tests/integration_tests/test_text_splitter.py
@@ -1,8 +1,7 @@
 """Test text splitters that require an integration."""

-from typing import Any
-
 import pytest
+from transformers import GPT2TokenizerFast

 from langchain_text_splitters import (
    TokenTextSplitter,
@@ -13,25 +12,17 @@ from langchain_text_splitters.sentence_transformers import (
 )


-@pytest.fixture
-def sentence_transformers() -> Any:
-    try:
-        import sentence_transformers
-    except ImportError:
-        pytest.skip("SentenceTransformers not installed.")
-    return sentence_transformers
-
-
 def test_huggingface_type_check() -> None:
    """Test that type checks are done properly on input."""
-    with pytest.raises(ValueError):
-        CharacterTextSplitter.from_huggingface_tokenizer("foo")
+    with pytest.raises(
+        ValueError,
+        match="Tokenizer received was not an instance of PreTrainedTokenizerBase",
+    ):
+        CharacterTextSplitter.from_huggingface_tokenizer("foo")  # type: ignore[arg-type]


 def test_huggingface_tokenizer() -> None:
    """Test text splitter that uses a HuggingFace tokenizer."""
-    from transformers import GPT2TokenizerFast
-
    tokenizer = GPT2TokenizerFast.from_pretrained("gpt2")
    text_splitter = CharacterTextSplitter.from_huggingface_tokenizer(
        tokenizer, separator=" ", chunk_size=1, chunk_overlap=0
@@ -63,7 +54,8 @@ def test_token_text_splitter_from_tiktoken() -> None:
    assert expected_tokenizer == actual_tokenizer


-def test_sentence_transformers_count_tokens(sentence_transformers: Any) -> None:
+@pytest.mark.requires("sentence_transformers")
+def test_sentence_transformers_count_tokens() -> None:
    splitter = SentenceTransformersTokenTextSplitter(
        model_name="sentence-transformers/paraphrase-albert-small-v2"
    )
@@ -78,7 +70,8 @@ def test_sentence_transformers_count_tokens(sentence_transformers: Any) -> None:
    assert expected_token_count == token_count


-def test_sentence_transformers_split_text(sentence_transformers: Any) -> None:
+@pytest.mark.requires("sentence_transformers")
+def test_sentence_transformers_split_text() -> None:
    splitter = SentenceTransformersTokenTextSplitter(
        model_name="sentence-transformers/paraphrase-albert-small-v2"
    )
@@ -88,7 +81,8 @@ def test_sentence_transformers_split_text(sentence_transformers: Any) -> None:
    assert expected_text_chunks == text_chunks


-def test_sentence_transformers_multiple_tokens(sentence_transformers: Any) -> None:
+@pytest.mark.requires("sentence_transformers")
+def test_sentence_transformers_multiple_tokens() -> None:
    splitter = SentenceTransformersTokenTextSplitter(chunk_overlap=0)
    text = "Lorem "

--- a/libs/text-splitters/tests/unit_tests/conftest.py
+++ b/libs/text-splitters/tests/unit_tests/conftest.py
@@ -4,10 +4,9 @@ from collections.abc import Sequence
 from importlib import util

 import pytest
-from pytest import Config, Function, Parser


-def pytest_addoption(parser: Parser) -> None:
+def pytest_addoption(parser: pytest.Parser) -> None:
    """Add custom command line options to pytest."""
    parser.addoption(
        "--only-extended",
@@ -21,7 +20,9 @@ def pytest_addoption(parser: Parser) -> None:
    )


-def pytest_collection_modifyitems(config: Config, items: Sequence[Function]) -> None:
+def pytest_collection_modifyitems(
+    config: pytest.Config, items: Sequence[pytest.Function]
+) -> None:
    """Add implementations for handling custom markers.

    At the moment, this adds support for a custom `requires` marker.
@@ -64,7 +65,7 @@ def pytest_collection_modifyitems(config: Config, items: Sequence[Function]) ->
                if pkg not in required_pkgs_info:
                    try:
                        installed = util.find_spec(pkg) is not None
-                    except Exception:
+                    except (ImportError, ValueError):
                        installed = False
                    required_pkgs_info[pkg] = installed

--- a/libs/text-splitters/tests/unit_tests/test_text_splitters.py
+++ b/libs/text-splitters/tests/unit_tests/test_text_splitters.py
@@ -1,11 +1,14 @@
 """Test text splitting functionality."""

+from __future__ import annotations
+
 import random
 import re
 import string
 from typing import Any, Callable

 import pytest
+from bs4 import Tag
 from langchain_core.documents import Document

 from langchain_text_splitters import (
@@ -103,7 +106,9 @@ def test_character_text_splitter_longer_words() -> None:
 def test_character_text_splitter_keep_separator_regex(
    *, separator: str, is_separator_regex: bool
 ) -> None:
-    """Test splitting by characters while keeping the separator
+    """Test CharacterTextSplitter keep separator regex.
+
+    Test splitting by characters while keeping the separator
    that is a regex special character.
    """
    text = "foo.bar.baz.123"
@@ -125,7 +130,9 @@ def test_character_text_splitter_keep_separator_regex(
 def test_character_text_splitter_keep_separator_regex_start(
    *, separator: str, is_separator_regex: bool
 ) -> None:
-    """Test splitting by characters while keeping the separator
+    """Test CharacterTextSplitter keep separator regex and put at start.
+
+    Test splitting by characters while keeping the separator
    that is a regex special character and placing it at the start of each chunk.
    """
    text = "foo.bar.baz.123"
@@ -147,7 +154,9 @@ def test_character_text_splitter_keep_separator_regex_start(
 def test_character_text_splitter_keep_separator_regex_end(
    *, separator: str, is_separator_regex: bool
 ) -> None:
-    """Test splitting by characters while keeping the separator
+    """Test CharacterTextSplitter keep separator regex and put at end.
+
+    Test splitting by characters while keeping the separator
    that is a regex special character and placing it at the end of each chunk.
    """
    text = "foo.bar.baz.123"
@@ -169,8 +178,11 @@ def test_character_text_splitter_keep_separator_regex_end(
 def test_character_text_splitter_discard_separator_regex(
    *, separator: str, is_separator_regex: bool
 ) -> None:
-    """Test splitting by characters discarding the separator
-    that is a regex special character."""
+    """Test CharacterTextSplitter discard separator regex.
+
+    Test splitting by characters discarding the separator
+    that is a regex special character.
+    """
    text = "foo.bar.baz.123"
    splitter = CharacterTextSplitter(
        separator=separator,
@@ -210,12 +222,17 @@ def test_recursive_character_text_splitter_keep_separators() -> None:

 def test_character_text_splitting_args() -> None:
    """Test invalid arguments."""
-    with pytest.raises(ValueError):
+    with pytest.raises(
+        ValueError,
+        match=re.escape(
+            "Got a larger chunk overlap (4) than chunk size (2), should be smaller."
+        ),
+    ):
        CharacterTextSplitter(chunk_size=2, chunk_overlap=4)
    for invalid_size in (0, -1):
-        with pytest.raises(ValueError):
+        with pytest.raises(ValueError, match="chunk_size must be > 0, got"):
            CharacterTextSplitter(chunk_size=invalid_size)
-    with pytest.raises(ValueError):
+    with pytest.raises(ValueError, match="chunk_overlap must be >= 0, got -1"):
        CharacterTextSplitter(chunk_size=2, chunk_overlap=-1)


@@ -1164,7 +1181,6 @@ def test_html_code_splitter() -> None:

 def test_md_header_text_splitter_1() -> None:
    """Test markdown splitter by header: Case 1."""
-
    markdown_document = (
        "# Foo\n\n"
        "    ## Bar\n\n"
@@ -1235,7 +1251,6 @@ def test_md_header_text_splitter_2() -> None:

 def test_md_header_text_splitter_3() -> None:
    """Test markdown splitter by header: Case 3."""
-
    markdown_document = (
        "# Foo\n\n"
        "    ## Bar\n\n"
@@ -1290,7 +1305,6 @@ def test_md_header_text_splitter_3() -> None:

 def test_md_header_text_splitter_preserve_headers_1() -> None:
    """Test markdown splitter by header: Preserve Headers."""
-
    markdown_document = (
        "# Foo\n\n"
        "    ## Bat\n\n"
@@ -1324,7 +1338,6 @@ def test_md_header_text_splitter_preserve_headers_1() -> None:

 def test_md_header_text_splitter_preserve_headers_2() -> None:
    """Test markdown splitter by header: Preserve Headers."""
-
    markdown_document = (
        "# Foo\n\n"
        "    ## Bar\n\n"
@@ -1372,7 +1385,6 @@ def test_md_header_text_splitter_preserve_headers_2() -> None:
@pytest.mark.parametrize("fence", [("```"), ("~~~")])
 def test_md_header_text_splitter_fenced_code_block(fence: str) -> None:
    """Test markdown splitter by header: Fenced code block."""
-
    markdown_document = (
        f"# This is a Header\n\n{fence}\nfoo()\n# Not a header\nbar()\n{fence}"
    )
@@ -1402,7 +1414,6 @@ def test_md_header_text_splitter_fenced_code_block_interleaved(
    fence: str, other_fence: str
 ) -> None:
    """Test markdown splitter by header: Interleaved fenced code block."""
-
    markdown_document = (
        "# This is a Header\n\n"
        f"{fence}\n"
@@ -1438,7 +1449,6 @@ def test_md_header_text_splitter_fenced_code_block_interleaved(
@pytest.mark.parametrize("characters", ["\ufeff"])
 def test_md_header_text_splitter_with_invisible_characters(characters: str) -> None:
    """Test markdown splitter by header: Fenced code block."""
-
    markdown_document = f"{characters}# Foo\n\nfoo()\n{characters}## Bar\n\nbar()"

    headers_to_split_on = [
@@ -1609,7 +1619,6 @@ EXPERIMENTAL_MARKDOWN_DOCUMENT = (

 def test_experimental_markdown_syntax_text_splitter() -> None:
    """Test experimental markdown syntax splitter."""
-
    markdown_splitter = ExperimentalMarkdownSyntaxTextSplitter()
    output = markdown_splitter.split_text(EXPERIMENTAL_MARKDOWN_DOCUMENT)

@@ -1663,7 +1672,6 @@ def test_experimental_markdown_syntax_text_splitter() -> None:

 def test_experimental_markdown_syntax_text_splitter_header_configuration() -> None:
    """Test experimental markdown syntax splitter."""
-
    headers_to_split_on = [("#", "Encabezamiento 1")]

    markdown_splitter = ExperimentalMarkdownSyntaxTextSplitter(
@@ -1709,7 +1717,6 @@ def test_experimental_markdown_syntax_text_splitter_header_configuration() -> No

 def test_experimental_markdown_syntax_text_splitter_with_headers() -> None:
    """Test experimental markdown syntax splitter."""
-
    markdown_splitter = ExperimentalMarkdownSyntaxTextSplitter(strip_headers=False)
    output = markdown_splitter.split_text(EXPERIMENTAL_MARKDOWN_DOCUMENT)

@@ -1768,7 +1775,6 @@ def test_experimental_markdown_syntax_text_splitter_with_headers() -> None:

 def test_experimental_markdown_syntax_text_splitter_split_lines() -> None:
    """Test experimental markdown syntax splitter."""
-
    markdown_splitter = ExperimentalMarkdownSyntaxTextSplitter(return_each_line=True)
    output = markdown_splitter.split_text(EXPERIMENTAL_MARKDOWN_DOCUMENT)

@@ -1876,8 +1882,11 @@ EXPERIMENTAL_MARKDOWN_DOCUMENTS = [


 def test_experimental_markdown_syntax_text_splitter_on_multi_files() -> None:
-    """Test experimental markdown syntax splitter split
-    on default called consecutively on two files."""
+    """Test ExperimentalMarkdownSyntaxTextSplitter on multiple files.
+
+    Test experimental markdown syntax splitter split on default called consecutively
+    on two files.
+    """
    markdown_splitter = ExperimentalMarkdownSyntaxTextSplitter()
    output = []
    for experimental_markdown_document in EXPERIMENTAL_MARKDOWN_DOCUMENTS:
@@ -1958,8 +1967,11 @@ def test_experimental_markdown_syntax_text_splitter_on_multi_files() -> None:
 def test_experimental_markdown_syntax_text_splitter_split_lines_on_multi_files() -> (
    None
 ):
-    """Test experimental markdown syntax splitter split
-    on each line called consecutively on two files."""
+    """Test ExperimentalMarkdownSyntaxTextSplitter split lines on multiple files.
+
+    Test experimental markdown syntax splitter split on each line called consecutively
+    on two files.
+    """
    markdown_splitter = ExperimentalMarkdownSyntaxTextSplitter(return_each_line=True)
    output = []
    for experimental_markdown_document in EXPERIMENTAL_MARKDOWN_DOCUMENTS:
@@ -2083,9 +2095,10 @@ def test_experimental_markdown_syntax_text_splitter_split_lines_on_multi_files()
 def test_experimental_markdown_syntax_text_splitter_with_header_on_multi_files() -> (
    None
 ):
-    """Test experimental markdown splitter
-    by header called consecutively on two files"""
+    """Test ExperimentalMarkdownSyntaxTextSplitter with header on multiple files.

+    Test experimental markdown splitter by header called consecutively on two files.
+    """
    markdown_splitter = ExperimentalMarkdownSyntaxTextSplitter(strip_headers=False)
    output = []
    for experimental_markdown_document in EXPERIMENTAL_MARKDOWN_DOCUMENTS:
@@ -2171,9 +2184,11 @@ def test_experimental_markdown_syntax_text_splitter_with_header_on_multi_files()
 def test_experimental_markdown_syntax_text_splitter_header_config_on_multi_files() -> (
    None
 ):
-    """Test experimental markdown splitter
-    by header configuration called consecutively on two files"""
+    """Test ExperimentalMarkdownSyntaxTextSplitter header config on multiple files.

+    Test experimental markdown splitter by header configuration called consecutively
+    on two files.
+    """
    headers_to_split_on = [("#", "Encabezamiento 1")]
    markdown_splitter = ExperimentalMarkdownSyntaxTextSplitter(
        headers_to_split_on=headers_to_split_on
@@ -2354,8 +2369,8 @@ def test_haskell_code_splitter() -> None:
 def html_header_splitter_splitter_factory() -> Callable[
    [list[tuple[str, str]]], HTMLHeaderTextSplitter
 ]:
-    """
-    Fixture to create an HTMLHeaderTextSplitter instance with given headers.
+    """Fixture to create an HTMLHeaderTextSplitter instance with given headers.
+
    This factory allows dynamic creation of splitters with different headers.
    """

@@ -2553,14 +2568,15 @@ def html_header_splitter_splitter_factory() -> Callable[
 )
@pytest.mark.requires("bs4")
 def test_html_header_text_splitter(
-    html_header_splitter_splitter_factory: Any,
+    html_header_splitter_splitter_factory: Callable[
+        [list[tuple[str, str]]], HTMLHeaderTextSplitter
+    ],
    headers_to_split_on: list[tuple[str, str]],
    html_input: str,
    expected_documents: list[Document],
    test_case: str,
 ) -> None:
-    """
-    Test the HTML header text splitter.
+    """Test the HTML header text splitter.

    Args:
        html_header_splitter_splitter_factory (Any): Factory function to create
@@ -2574,10 +2590,7 @@ def test_html_header_text_splitter(
        AssertionError: If the number of documents or their content/metadata
            does not match the expected values.
    """
-
-    splitter = html_header_splitter_splitter_factory(
-        headers_to_split_on=headers_to_split_on
-    )
+    splitter = html_header_splitter_splitter_factory(headers_to_split_on)
    docs = splitter.split_text(html_input)

    assert len(docs) == len(expected_documents), (
@@ -2709,14 +2722,15 @@ def test_html_header_text_splitter(
 )
@pytest.mark.requires("bs4")
 def test_additional_html_header_text_splitter(
-    html_header_splitter_splitter_factory: Any,
+    html_header_splitter_splitter_factory: Callable[
+        [list[tuple[str, str]]], HTMLHeaderTextSplitter
+    ],
    headers_to_split_on: list[tuple[str, str]],
    html_content: str,
    expected_output: list[Document],
    test_case: str,
 ) -> None:
-    """
-    Test the HTML header text splitter.
+    """Test the HTML header text splitter.

    Args:
        html_header_splitter_splitter_factory (Any): Factory function to create
@@ -2730,9 +2744,7 @@ def test_additional_html_header_text_splitter(
        AssertionError: If the number of documents or their content/metadata
            does not match the expected output.
    """
-    splitter = html_header_splitter_splitter_factory(
-        headers_to_split_on=headers_to_split_on
-    )
+    splitter = html_header_splitter_splitter_factory(headers_to_split_on)
    docs = splitter.split_text(html_content)

    assert len(docs) == len(expected_output), (
@@ -2780,14 +2792,16 @@ def test_additional_html_header_text_splitter(
 )
@pytest.mark.requires("bs4")
 def test_html_no_headers_with_multiple_splitters(
-    html_header_splitter_splitter_factory: Any,
+    html_header_splitter_splitter_factory: Callable[
+        [list[tuple[str, str]]], HTMLHeaderTextSplitter
+    ],
    headers_to_split_on: list[tuple[str, str]],
    html_content: str,
    expected_output: list[Document],
    test_case: str,
 ) -> None:
-    """
-    Test HTML content splitting without headers using multiple splitters.
+    """Test HTML content splitting without headers using multiple splitters.
+
    Args:
        html_header_splitter_splitter_factory (Any): Factory to create the
            HTML header splitter.
@@ -2796,13 +2810,12 @@ def test_html_no_headers_with_multiple_splitters(
        expected_output (List[Document]): Expected list of Document objects
            after splitting.
        test_case (str): Description of the test case.
+
    Raises:
        AssertionError: If the number of documents or their content/metadata
            does not match the expected output.
    """
-    splitter = html_header_splitter_splitter_factory(
-        headers_to_split_on=headers_to_split_on
-    )
+    splitter = html_header_splitter_splitter_factory(headers_to_split_on)
    docs = splitter.split_text(html_content)

    assert len(docs) == len(expected_output), (
@@ -3046,7 +3059,7 @@ def test_happy_path_splitting_with_duplicate_header_tag() -> None:


 def test_split_json() -> None:
-    """Test json text splitter"""
+    """Test json text splitter."""
    max_chunk = 800
    splitter = RecursiveJsonSplitter(max_chunk_size=max_chunk)

@@ -3068,7 +3081,7 @@ def test_split_json() -> None:


 def test_split_json_with_lists() -> None:
-    """Test json text splitter with list conversion"""
+    """Test json text splitter with list conversion."""
    max_chunk = 800
    splitter = RecursiveJsonSplitter(max_chunk_size=max_chunk)

@@ -3240,7 +3253,7 @@ def test_visualbasic6_code_splitter() -> None:
    ]


-def custom_iframe_extractor(iframe_tag: Any) -> str:
+def custom_iframe_extractor(iframe_tag: Tag) -> str:
    iframe_src = iframe_tag.get("src", "")
    return f"[iframe:{iframe_src}]({iframe_src})"

@@ -3324,8 +3337,11 @@ def test_html_splitter_with_nested_elements() -> None:

@pytest.mark.requires("bs4")
 def test_html_splitter_with_preserved_elements() -> None:
-    """Test HTML splitting with preserved elements like <table>, <ul> with low chunk
-    size."""
+    """Test HTML splitter with preserved elements.
+
+    Test HTML splitting with preserved elements like <table>, <ul> with low chunk
+    size.
+    """
    html_content = """
    <h1>Section 1</h1>
    <table>
@@ -3563,8 +3579,11 @@ def test_html_splitter_with_no_headers() -> None:

@pytest.mark.requires("bs4")
 def test_html_splitter_with_media_preservation() -> None:
-    """Test HTML splitting with media elements preserved and converted to Markdown-like
-    links."""
+    """Test HTML splitter with media preservation.
+
+    Test HTML splitting with media elements preserved and converted to Markdown-like
+    links.
+    """
    html_content = """
    <h1>Section 1</h1>
    <p>This is an image:</p>
@@ -3600,7 +3619,7 @@ def test_html_splitter_with_media_preservation() -> None:

@pytest.mark.requires("bs4")
 def test_html_splitter_keep_separator_true() -> None:
-    """Test HTML splitting with keep_separator=True"""
+    """Test HTML splitting with keep_separator=True."""
    html_content = """
    <h1>Section 1</h1>
    <p>This is some text. This is some other text.</p>
@@ -3629,7 +3648,7 @@ def test_html_splitter_keep_separator_true() -> None:

@pytest.mark.requires("bs4")
 def test_html_splitter_keep_separator_false() -> None:
-    """Test HTML splitting with keep_separator=False"""
+    """Test HTML splitting with keep_separator=False."""
    html_content = """
    <h1>Section 1</h1>
    <p>This is some text. This is some other text.</p>
@@ -3658,7 +3677,7 @@ def test_html_splitter_keep_separator_false() -> None:

@pytest.mark.requires("bs4")
 def test_html_splitter_keep_separator_start() -> None:
-    """Test HTML splitting with keep_separator="start" """
+    """Test HTML splitting with keep_separator="start"."""
    html_content = """
    <h1>Section 1</h1>
    <p>This is some text. This is some other text.</p>
@@ -3687,7 +3706,7 @@ def test_html_splitter_keep_separator_start() -> None:

@pytest.mark.requires("bs4")
 def test_html_splitter_keep_separator_end() -> None:
-    """Test HTML splitting with keep_separator="end" """
+    """Test HTML splitting with keep_separator="end"."""
    html_content = """
    <h1>Section 1</h1>
    <p>This is some text. This is some other text.</p>
@@ -3716,7 +3735,7 @@ def test_html_splitter_keep_separator_end() -> None:

@pytest.mark.requires("bs4")
 def test_html_splitter_keep_separator_default() -> None:
-    """Test HTML splitting with keep_separator not set"""
+    """Test HTML splitting with keep_separator not set."""
    html_content = """
    <h1>Section 1</h1>
    <p>This is some text. This is some other text.</p>