From 802d2bf249da83b7d0411d7d58127daeb7e102f3 Mon Sep 17 00:00:00 2001
From: Christophe Bornet <cbornet@hotmail.com>
Date: Thu, 3 Jul 2025 16:11:35 +0200
Subject: [PATCH] text-splitters: Add ruff rule UP (pyupgrade) (#31841)

See https://docs.astral.sh/ruff/rules/#pyupgrade-up
All auto-fixed except `typing.AbstractSet` -> `collections.abc.Set`
---
 .../langchain_text_splitters/base.py          | 37 ++++-----
 .../langchain_text_splitters/character.py     | 14 ++--
 .../langchain_text_splitters/html.py          | 76 +++++++++----------
 .../langchain_text_splitters/json.py          |  6 +-
 .../langchain_text_splitters/jsx.py           |  6 +-
 .../langchain_text_splitters/konlpy.py        |  4 +-
 .../langchain_text_splitters/markdown.py      | 32 ++++----
 .../langchain_text_splitters/nltk.py          |  4 +-
 .../sentence_transformers.py                  |  6 +-
 .../langchain_text_splitters/spacy.py         |  4 +-
 libs/text-splitters/pyproject.toml            |  4 +-
 .../tests/unit_tests/conftest.py              |  4 +-
 .../tests/unit_tests/test_text_splitters.py   | 24 +++---
 13 files changed, 106 insertions(+), 115 deletions(-)

diff --git a/libs/text-splitters/langchain_text_splitters/base.py b/libs/text-splitters/langchain_text_splitters/base.py
index 95bb1d9965d..ac115e8f94f 100644
--- a/libs/text-splitters/langchain_text_splitters/base.py
+++ b/libs/text-splitters/langchain_text_splitters/base.py
@@ -3,19 +3,14 @@ from __future__ import annotations
 import copy
 import logging
 from abc import ABC, abstractmethod
+from collections.abc import Collection, Iterable, Sequence, Set
 from dataclasses import dataclass
 from enum import Enum
 from typing import (
-    AbstractSet,
     Any,
     Callable,
-    Collection,
-    Iterable,
-    List,
     Literal,
     Optional,
-    Sequence,
-    Type,
     TypeVar,
     Union,
 )
@@ -64,12 +59,12 @@ class TextSplitter(BaseDocumentTransformer, ABC):
         self._strip_whitespace = strip_whitespace
 
     @abstractmethod
-    def split_text(self, text: str) -> List[str]:
+    def split_text(self, text: str) -> list[str]:
         """Split text into multiple components."""
 
     def create_documents(
         self, texts: list[str], metadatas: Optional[list[dict[Any, Any]]] = None
-    ) -> List[Document]:
+    ) -> list[Document]:
         """Create documents from a list of texts."""
         _metadatas = metadatas or [{}] * len(texts)
         documents = []
@@ -87,7 +82,7 @@ class TextSplitter(BaseDocumentTransformer, ABC):
                 documents.append(new_doc)
         return documents
 
-    def split_documents(self, documents: Iterable[Document]) -> List[Document]:
+    def split_documents(self, documents: Iterable[Document]) -> list[Document]:
         """Split documents."""
         texts, metadatas = [], []
         for doc in documents:
@@ -95,7 +90,7 @@ class TextSplitter(BaseDocumentTransformer, ABC):
             metadatas.append(doc.metadata)
         return self.create_documents(texts, metadatas=metadatas)
 
-    def _join_docs(self, docs: List[str], separator: str) -> Optional[str]:
+    def _join_docs(self, docs: list[str], separator: str) -> Optional[str]:
         text = separator.join(docs)
         if self._strip_whitespace:
             text = text.strip()
@@ -104,13 +99,13 @@ class TextSplitter(BaseDocumentTransformer, ABC):
         else:
             return text
 
-    def _merge_splits(self, splits: Iterable[str], separator: str) -> List[str]:
+    def _merge_splits(self, splits: Iterable[str], separator: str) -> list[str]:
         # We now want to combine these smaller pieces into medium size
         # chunks to send to the LLM.
         separator_len = self._length_function(separator)
 
         docs = []
-        current_doc: List[str] = []
+        current_doc: list[str] = []
         total = 0
         for d in splits:
             _len = self._length_function(d)
@@ -169,10 +164,10 @@ class TextSplitter(BaseDocumentTransformer, ABC):
 
     @classmethod
     def from_tiktoken_encoder(
-        cls: Type[TS],
+        cls: type[TS],
         encoding_name: str = "gpt2",
         model_name: Optional[str] = None,
-        allowed_special: Union[Literal["all"], AbstractSet[str]] = set(),
+        allowed_special: Union[Literal["all"], Set[str]] = set(),
         disallowed_special: Union[Literal["all"], Collection[str]] = "all",
         **kwargs: Any,
     ) -> TS:
@@ -225,7 +220,7 @@ class TokenTextSplitter(TextSplitter):
         self,
         encoding_name: str = "gpt2",
         model_name: Optional[str] = None,
-        allowed_special: Union[Literal["all"], AbstractSet[str]] = set(),
+        allowed_special: Union[Literal["all"], Set[str]] = set(),
         disallowed_special: Union[Literal["all"], Collection[str]] = "all",
         **kwargs: Any,
     ) -> None:
@@ -248,7 +243,7 @@ class TokenTextSplitter(TextSplitter):
         self._allowed_special = allowed_special
         self._disallowed_special = disallowed_special
 
-    def split_text(self, text: str) -> List[str]:
+    def split_text(self, text: str) -> list[str]:
         """Splits the input text into smaller chunks based on tokenization.
 
         This method uses a custom tokenizer configuration to encode the input text
@@ -264,7 +259,7 @@ class TokenTextSplitter(TextSplitter):
             of the input text based on the tokenization and chunking rules.
         """
 
-        def _encode(_text: str) -> List[int]:
+        def _encode(_text: str) -> list[int]:
             return self._tokenizer.encode(
                 _text,
                 allowed_special=self._allowed_special,
@@ -320,15 +315,15 @@ class Tokenizer:
     """Overlap in tokens between chunks"""
     tokens_per_chunk: int
     """Maximum number of tokens per chunk"""
-    decode: Callable[[List[int]], str]
+    decode: Callable[[list[int]], str]
     """ Function to decode a list of token ids to a string"""
-    encode: Callable[[str], List[int]]
+    encode: Callable[[str], list[int]]
     """ Function to encode a string to a list of token ids"""
 
 
-def split_text_on_tokens(*, text: str, tokenizer: Tokenizer) -> List[str]:
+def split_text_on_tokens(*, text: str, tokenizer: Tokenizer) -> list[str]:
     """Split incoming text and return chunks using tokenizer."""
-    splits: List[str] = []
+    splits: list[str] = []
     input_ids = tokenizer.encode(text)
     start_idx = 0
     cur_idx = min(start_idx + tokenizer.tokens_per_chunk, len(input_ids))
diff --git a/libs/text-splitters/langchain_text_splitters/character.py b/libs/text-splitters/langchain_text_splitters/character.py
index f3c25b89e73..0060a6462f9 100644
--- a/libs/text-splitters/langchain_text_splitters/character.py
+++ b/libs/text-splitters/langchain_text_splitters/character.py
@@ -1,7 +1,7 @@
 from __future__ import annotations
 
 import re
-from typing import Any, List, Literal, Optional, Union
+from typing import Any, Literal, Optional, Union
 
 from langchain_text_splitters.base import Language, TextSplitter
 
@@ -17,7 +17,7 @@ class CharacterTextSplitter(TextSplitter):
         self._separator = separator
         self._is_separator_regex = is_separator_regex
 
-    def split_text(self, text: str) -> List[str]:
+    def split_text(self, text: str) -> list[str]:
         """Split into chunks without re-inserting lookaround separators."""
         # 1. Determine split pattern: raw regex or escaped literal
         sep_pattern = (
@@ -46,7 +46,7 @@ class CharacterTextSplitter(TextSplitter):
 
 def _split_text_with_regex(
     text: str, separator: str, keep_separator: Union[bool, Literal["start", "end"]]
-) -> List[str]:
+) -> list[str]:
     # Now that we have the separator, split the text
     if separator:
         if keep_separator:
@@ -80,7 +80,7 @@ class RecursiveCharacterTextSplitter(TextSplitter):
 
     def __init__(
         self,
-        separators: Optional[List[str]] = None,
+        separators: Optional[list[str]] = None,
         keep_separator: Union[bool, Literal["start", "end"]] = True,
         is_separator_regex: bool = False,
         **kwargs: Any,
@@ -90,7 +90,7 @@ class RecursiveCharacterTextSplitter(TextSplitter):
         self._separators = separators or ["\n\n", "\n", " ", ""]
         self._is_separator_regex = is_separator_regex
 
-    def _split_text(self, text: str, separators: List[str]) -> List[str]:
+    def _split_text(self, text: str, separators: list[str]) -> list[str]:
         """Split incoming text and return chunks."""
         final_chunks = []
         # Get appropriate separator to use
@@ -130,7 +130,7 @@ class RecursiveCharacterTextSplitter(TextSplitter):
             final_chunks.extend(merged_text)
         return final_chunks
 
-    def split_text(self, text: str) -> List[str]:
+    def split_text(self, text: str) -> list[str]:
         """Split the input text into smaller chunks based on predefined separators.
 
         Args:
@@ -161,7 +161,7 @@ class RecursiveCharacterTextSplitter(TextSplitter):
         return cls(separators=separators, is_separator_regex=True, **kwargs)
 
     @staticmethod
-    def get_separators_for_language(language: Language) -> List[str]:
+    def get_separators_for_language(language: Language) -> list[str]:
         """Retrieve a list of separators specific to the given language.
 
         Args:
diff --git a/libs/text-splitters/langchain_text_splitters/html.py b/libs/text-splitters/langchain_text_splitters/html.py
index 06026bf31c1..3421f95a1b5 100644
--- a/libs/text-splitters/langchain_text_splitters/html.py
+++ b/libs/text-splitters/langchain_text_splitters/html.py
@@ -3,17 +3,13 @@ from __future__ import annotations
 import copy
 import pathlib
 import re
+from collections.abc import Iterable, Sequence
 from io import StringIO
 from typing import (
     Any,
     Callable,
-    Dict,
-    Iterable,
-    List,
     Literal,
     Optional,
-    Sequence,
-    Tuple,
     TypedDict,
     Union,
     cast,
@@ -32,7 +28,7 @@ class ElementType(TypedDict):
     url: str
     xpath: str
     content: str
-    metadata: Dict[str, str]
+    metadata: dict[str, str]
 
 
 class HTMLHeaderTextSplitter:
@@ -115,7 +111,7 @@ class HTMLHeaderTextSplitter:
 
     def __init__(
         self,
-        headers_to_split_on: List[Tuple[str, str]],
+        headers_to_split_on: list[tuple[str, str]],
         return_each_element: bool = False,
     ) -> None:
         """Initialize with headers to split on.
@@ -134,7 +130,7 @@ class HTMLHeaderTextSplitter:
         self.header_tags = [tag for tag, _ in self.headers_to_split_on]
         self.return_each_element = return_each_element
 
-    def split_text(self, text: str) -> List[Document]:
+    def split_text(self, text: str) -> list[Document]:
         """Split the given text into a list of Document objects.
 
         Args:
@@ -147,7 +143,7 @@ class HTMLHeaderTextSplitter:
 
     def split_text_from_url(
         self, url: str, timeout: int = 10, **kwargs: Any
-    ) -> List[Document]:
+    ) -> list[Document]:
         """Fetch text content from a URL and split it into documents.
 
         Args:
@@ -166,7 +162,7 @@ class HTMLHeaderTextSplitter:
         response.raise_for_status()
         return self.split_text(response.text)
 
-    def split_text_from_file(self, file: Any) -> List[Document]:
+    def split_text_from_file(self, file: Any) -> list[Document]:
         """Split HTML content from a file into a list of Document objects.
 
         Args:
@@ -176,7 +172,7 @@ class HTMLHeaderTextSplitter:
             A list of split Document objects.
         """
         if isinstance(file, str):
-            with open(file, "r", encoding="utf-8") as f:
+            with open(file, encoding="utf-8") as f:
                 html_content = f.read()
         else:
             html_content = file.read()
@@ -208,8 +204,8 @@ class HTMLHeaderTextSplitter:
         # Dictionary of active headers:
         #   key = user-defined header name (e.g. "Header 1")
         #   value = (header_text, level, dom_depth)
-        active_headers: Dict[str, Tuple[str, int, int]] = {}
-        current_chunk: List[str] = []
+        active_headers: dict[str, tuple[str, int, int]] = {}
+        current_chunk: list[str] = []
 
         def finalize_chunk() -> Optional[Document]:
             """Finalize the accumulated chunk into a single Document."""
@@ -308,7 +304,7 @@ class HTMLSectionSplitter:
 
     def __init__(
         self,
-        headers_to_split_on: List[Tuple[str, str]],
+        headers_to_split_on: list[tuple[str, str]],
         **kwargs: Any,
     ) -> None:
         """Create a new HTMLSectionSplitter.
@@ -326,7 +322,7 @@ class HTMLSectionSplitter:
         ).absolute()
         self.kwargs = kwargs
 
-    def split_documents(self, documents: Iterable[Document]) -> List[Document]:
+    def split_documents(self, documents: Iterable[Document]) -> list[Document]:
         """Split documents."""
         texts, metadatas = [], []
         for doc in documents:
@@ -338,7 +334,7 @@ class HTMLSectionSplitter:
 
         return text_splitter.split_documents(results)
 
-    def split_text(self, text: str) -> List[Document]:
+    def split_text(self, text: str) -> list[Document]:
         """Split HTML text string.
 
         Args:
@@ -364,7 +360,7 @@ class HTMLSectionSplitter:
                 documents.append(new_doc)
         return documents
 
-    def split_html_by_headers(self, html_doc: str) -> List[Dict[str, Optional[str]]]:
+    def split_html_by_headers(self, html_doc: str) -> list[dict[str, Optional[str]]]:
         """Split an HTML document into sections based on specified header tags.
 
         This method uses BeautifulSoup to parse the HTML content and divides it into
@@ -466,7 +462,7 @@ class HTMLSectionSplitter:
         result = transform(tree)
         return str(result)
 
-    def split_text_from_file(self, file: Any) -> List[Document]:
+    def split_text_from_file(self, file: Any) -> list[Document]:
         """Split HTML content from a file into a list of Document objects.
 
         Args:
@@ -571,23 +567,23 @@ class HTMLSemanticPreservingSplitter(BaseDocumentTransformer):
 
     def __init__(
         self,
-        headers_to_split_on: List[Tuple[str, str]],
+        headers_to_split_on: list[tuple[str, str]],
         *,
         max_chunk_size: int = 1000,
         chunk_overlap: int = 0,
-        separators: Optional[List[str]] = None,
-        elements_to_preserve: Optional[List[str]] = None,
+        separators: Optional[list[str]] = None,
+        elements_to_preserve: Optional[list[str]] = None,
         preserve_links: bool = False,
         preserve_images: bool = False,
         preserve_videos: bool = False,
         preserve_audio: bool = False,
-        custom_handlers: Optional[Dict[str, Callable[[Any], str]]] = None,
+        custom_handlers: Optional[dict[str, Callable[[Any], str]]] = None,
         stopword_removal: bool = False,
         stopword_lang: str = "english",
         normalize_text: bool = False,
-        external_metadata: Optional[Dict[str, str]] = None,
-        allowlist_tags: Optional[List[str]] = None,
-        denylist_tags: Optional[List[str]] = None,
+        external_metadata: Optional[dict[str, str]] = None,
+        allowlist_tags: Optional[list[str]] = None,
+        denylist_tags: Optional[list[str]] = None,
         preserve_parent_metadata: bool = False,
         keep_separator: Union[bool, Literal["start", "end"]] = True,
     ):
@@ -654,7 +650,7 @@ class HTMLSemanticPreservingSplitter(BaseDocumentTransformer):
                     "Could not import nltk. Please install it with 'pip install nltk'."
                 )
 
-    def split_text(self, text: str) -> List[Document]:
+    def split_text(self, text: str) -> list[Document]:
         """Splits the provided HTML text into smaller chunks based on the configuration.
 
         Args:
@@ -677,7 +673,7 @@ class HTMLSemanticPreservingSplitter(BaseDocumentTransformer):
 
     def transform_documents(
         self, documents: Sequence[Document], **kwargs: Any
-    ) -> List[Document]:
+    ) -> list[Document]:
         """Transform sequence of documents by splitting them."""
         transformed = []
         for doc in documents:
@@ -776,7 +772,7 @@ class HTMLSemanticPreservingSplitter(BaseDocumentTransformer):
 
         return text
 
-    def _process_html(self, soup: Any) -> List[Document]:
+    def _process_html(self, soup: Any) -> list[Document]:
         """Processes the HTML content using BeautifulSoup and splits it using headers.
 
         Args:
@@ -785,10 +781,10 @@ class HTMLSemanticPreservingSplitter(BaseDocumentTransformer):
         Returns:
             List[Document]: A list of Document objects containing the split content.
         """
-        documents: List[Document] = []
-        current_headers: Dict[str, str] = {}
-        current_content: List[str] = []
-        preserved_elements: Dict[str, str] = {}
+        documents: list[Document] = []
+        current_headers: dict[str, str] = {}
+        current_content: list[str] = []
+        preserved_elements: dict[str, str] = {}
         placeholder_count: int = 0
 
         def _get_element_text(element: Any) -> str:
@@ -821,13 +817,13 @@ class HTMLSemanticPreservingSplitter(BaseDocumentTransformer):
         elements = soup.find_all(recursive=False)
 
         def _process_element(
-            element: List[Any],
-            documents: List[Document],
-            current_headers: Dict[str, str],
-            current_content: List[str],
-            preserved_elements: Dict[str, str],
+            element: list[Any],
+            documents: list[Document],
+            current_headers: dict[str, str],
+            current_content: list[str],
+            preserved_elements: dict[str, str],
             placeholder_count: int,
-        ) -> Tuple[List[Document], Dict[str, str], List[str], Dict[str, str], int]:
+        ) -> tuple[list[Document], dict[str, str], list[str], dict[str, str], int]:
             for elem in element:
                 if elem.name.lower() in ["html", "body", "div", "main"]:
                     children = elem.find_all(recursive=False)
@@ -910,7 +906,7 @@ class HTMLSemanticPreservingSplitter(BaseDocumentTransformer):
 
     def _create_documents(
         self, headers: dict[str, str], content: str, preserved_elements: dict[str, str]
-    ) -> List[Document]:
+    ) -> list[Document]:
         """Creates Document objects from the provided headers, content, and elements.
 
         Args:
@@ -936,7 +932,7 @@ class HTMLSemanticPreservingSplitter(BaseDocumentTransformer):
 
     def _further_split_chunk(
         self, content: str, metadata: dict[Any, Any], preserved_elements: dict[str, str]
-    ) -> List[Document]:
+    ) -> list[Document]:
         """Further splits the content into smaller chunks.
 
         Args:
diff --git a/libs/text-splitters/langchain_text_splitters/json.py b/libs/text-splitters/langchain_text_splitters/json.py
index 15df83446b3..0f71942e9a4 100644
--- a/libs/text-splitters/langchain_text_splitters/json.py
+++ b/libs/text-splitters/langchain_text_splitters/json.py
@@ -2,7 +2,7 @@ from __future__ import annotations
 
 import copy
 import json
-from typing import Any, Dict, List, Optional
+from typing import Any, Optional
 
 from langchain_core.documents import Document
 
@@ -123,10 +123,10 @@ class RecursiveJsonSplitter:
 
     def split_text(
         self,
-        json_data: Dict[str, Any],
+        json_data: dict[str, Any],
         convert_lists: bool = False,
         ensure_ascii: bool = True,
-    ) -> List[str]:
+    ) -> list[str]:
         """Splits JSON into a list of JSON formatted strings."""
         chunks = self.split_json(json_data=json_data, convert_lists=convert_lists)
 
diff --git a/libs/text-splitters/langchain_text_splitters/jsx.py b/libs/text-splitters/langchain_text_splitters/jsx.py
index fc13a58c5f7..3c0b73ebd28 100644
--- a/libs/text-splitters/langchain_text_splitters/jsx.py
+++ b/libs/text-splitters/langchain_text_splitters/jsx.py
@@ -1,5 +1,5 @@
 import re
-from typing import Any, List, Optional
+from typing import Any, Optional
 
 from langchain_text_splitters import RecursiveCharacterTextSplitter
 
@@ -23,7 +23,7 @@ class JSFrameworkTextSplitter(RecursiveCharacterTextSplitter):
 
     def __init__(
         self,
-        separators: Optional[List[str]] = None,
+        separators: Optional[list[str]] = None,
         chunk_size: int = 2000,
         chunk_overlap: int = 0,
         **kwargs: Any,
@@ -39,7 +39,7 @@ class JSFrameworkTextSplitter(RecursiveCharacterTextSplitter):
         super().__init__(chunk_size=chunk_size, chunk_overlap=chunk_overlap, **kwargs)
         self._separators = separators or []
 
-    def split_text(self, text: str) -> List[str]:
+    def split_text(self, text: str) -> list[str]:
         """Split text into chunks.
 
         This method splits the text into chunks by:
diff --git a/libs/text-splitters/langchain_text_splitters/konlpy.py b/libs/text-splitters/langchain_text_splitters/konlpy.py
index 374c2c56f0e..60b35091677 100644
--- a/libs/text-splitters/langchain_text_splitters/konlpy.py
+++ b/libs/text-splitters/langchain_text_splitters/konlpy.py
@@ -1,6 +1,6 @@
 from __future__ import annotations
 
-from typing import Any, List
+from typing import Any
 
 from langchain_text_splitters.base import TextSplitter
 
@@ -30,7 +30,7 @@ class KonlpyTextSplitter(TextSplitter):
             )
         self.kkma = konlpy.tag.Kkma()
 
-    def split_text(self, text: str) -> List[str]:
+    def split_text(self, text: str) -> list[str]:
         """Split incoming text and return chunks."""
         splits = self.kkma.sentences(text)
         return self._merge_splits(splits, self._separator)
diff --git a/libs/text-splitters/langchain_text_splitters/markdown.py b/libs/text-splitters/langchain_text_splitters/markdown.py
index ae885bbb0ab..bbf10828ed4 100644
--- a/libs/text-splitters/langchain_text_splitters/markdown.py
+++ b/libs/text-splitters/langchain_text_splitters/markdown.py
@@ -1,7 +1,7 @@
 from __future__ import annotations
 
 import re
-from typing import Any, Dict, List, Tuple, TypedDict, Union
+from typing import Any, TypedDict, Union
 
 from langchain_core.documents import Document
 
@@ -23,7 +23,7 @@ class MarkdownHeaderTextSplitter:
 
     def __init__(
         self,
-        headers_to_split_on: List[Tuple[str, str]],
+        headers_to_split_on: list[tuple[str, str]],
         return_each_line: bool = False,
         strip_headers: bool = True,
     ):
@@ -44,13 +44,13 @@ class MarkdownHeaderTextSplitter:
         # Strip headers split headers from the content of the chunk
         self.strip_headers = strip_headers
 
-    def aggregate_lines_to_chunks(self, lines: List[LineType]) -> List[Document]:
+    def aggregate_lines_to_chunks(self, lines: list[LineType]) -> list[Document]:
         """Combine lines with common metadata into chunks.
 
         Args:
             lines: Line of text / associated header metadata
         """
-        aggregated_chunks: List[LineType] = []
+        aggregated_chunks: list[LineType] = []
 
         for line in lines:
             if (
@@ -87,7 +87,7 @@ class MarkdownHeaderTextSplitter:
             for chunk in aggregated_chunks
         ]
 
-    def split_text(self, text: str) -> List[Document]:
+    def split_text(self, text: str) -> list[Document]:
         """Split markdown file.
 
         Args:
@@ -96,14 +96,14 @@ class MarkdownHeaderTextSplitter:
         # Split the input text by newline character ("\n").
         lines = text.split("\n")
         # Final output
-        lines_with_metadata: List[LineType] = []
+        lines_with_metadata: list[LineType] = []
         # Content and metadata of the chunk currently being processed
-        current_content: List[str] = []
-        current_metadata: Dict[str, str] = {}
+        current_content: list[str] = []
+        current_metadata: dict[str, str] = {}
         # Keep track of the nested header structure
         # header_stack: List[Dict[str, Union[int, str]]] = []
-        header_stack: List[HeaderType] = []
-        initial_metadata: Dict[str, str] = {}
+        header_stack: list[HeaderType] = []
+        initial_metadata: dict[str, str] = {}
 
         in_code_block = False
         opening_fence = ""
@@ -217,7 +217,7 @@ class MarkdownHeaderTextSplitter:
 class LineType(TypedDict):
     """Line type as typed dict."""
 
-    metadata: Dict[str, str]
+    metadata: dict[str, str]
     content: str
 
 
@@ -280,7 +280,7 @@ class ExperimentalMarkdownSyntaxTextSplitter:
 
     def __init__(
         self,
-        headers_to_split_on: Union[List[Tuple[str, str]], None] = None,
+        headers_to_split_on: Union[list[tuple[str, str]], None] = None,
         return_each_line: bool = False,
         strip_headers: bool = True,
     ):
@@ -300,9 +300,9 @@ class ExperimentalMarkdownSyntaxTextSplitter:
                 Whether to exclude headers from the resulting chunks.
                 Defaults to True.
         """
-        self.chunks: List[Document] = []
+        self.chunks: list[Document] = []
         self.current_chunk = Document(page_content="")
-        self.current_header_stack: List[Tuple[int, str]] = []
+        self.current_header_stack: list[tuple[int, str]] = []
         self.strip_headers = strip_headers
         if headers_to_split_on:
             self.splittable_headers = dict(headers_to_split_on)
@@ -311,7 +311,7 @@ class ExperimentalMarkdownSyntaxTextSplitter:
 
         self.return_each_line = return_each_line
 
-    def split_text(self, text: str) -> List[Document]:
+    def split_text(self, text: str) -> list[Document]:
         """Split the input text into structured chunks.
 
         This method processes the input text line by line, identifying and handling
@@ -382,7 +382,7 @@ class ExperimentalMarkdownSyntaxTextSplitter:
                 break
         self.current_header_stack.append((header_depth, header_text))
 
-    def _resolve_code_chunk(self, current_line: str, raw_lines: List[str]) -> str:
+    def _resolve_code_chunk(self, current_line: str, raw_lines: list[str]) -> str:
         chunk = current_line
         while raw_lines:
             raw_line = raw_lines.pop(0)
diff --git a/libs/text-splitters/langchain_text_splitters/nltk.py b/libs/text-splitters/langchain_text_splitters/nltk.py
index c6c7eab481e..931e7b8cf3e 100644
--- a/libs/text-splitters/langchain_text_splitters/nltk.py
+++ b/libs/text-splitters/langchain_text_splitters/nltk.py
@@ -1,6 +1,6 @@
 from __future__ import annotations
 
-from typing import Any, List
+from typing import Any
 
 from langchain_text_splitters.base import TextSplitter
 
@@ -35,7 +35,7 @@ class NLTKTextSplitter(TextSplitter):
                 "NLTK is not installed, please install it with `pip install nltk`."
             )
 
-    def split_text(self, text: str) -> List[str]:
+    def split_text(self, text: str) -> list[str]:
         """Split incoming text and return chunks."""
         # First we naively split the large input into a bunch of smaller ones.
         if self._use_span_tokenize:
diff --git a/libs/text-splitters/langchain_text_splitters/sentence_transformers.py b/libs/text-splitters/langchain_text_splitters/sentence_transformers.py
index d22ce1036ae..b3c88331d96 100644
--- a/libs/text-splitters/langchain_text_splitters/sentence_transformers.py
+++ b/libs/text-splitters/langchain_text_splitters/sentence_transformers.py
@@ -1,6 +1,6 @@
 from __future__ import annotations
 
-from typing import Any, List, Optional, cast
+from typing import Any, Optional, cast
 
 from langchain_text_splitters.base import TextSplitter, Tokenizer, split_text_on_tokens
 
@@ -50,7 +50,7 @@ class SentenceTransformersTokenTextSplitter(TextSplitter):
                 f" > maximum token limit."
             )
 
-    def split_text(self, text: str) -> List[str]:
+    def split_text(self, text: str) -> list[str]:
         """Splits the input text into smaller components by splitting text on tokens.
 
         This method encodes the input text using a private `_encode` method, then
@@ -65,7 +65,7 @@ class SentenceTransformersTokenTextSplitter(TextSplitter):
             encoding and processing.
         """
 
-        def encode_strip_start_and_stop_token_ids(text: str) -> List[int]:
+        def encode_strip_start_and_stop_token_ids(text: str) -> list[int]:
             return self._encode(text)[1:-1]
 
         tokenizer = Tokenizer(
diff --git a/libs/text-splitters/langchain_text_splitters/spacy.py b/libs/text-splitters/langchain_text_splitters/spacy.py
index fb8dee3c212..4d39caab398 100644
--- a/libs/text-splitters/langchain_text_splitters/spacy.py
+++ b/libs/text-splitters/langchain_text_splitters/spacy.py
@@ -1,6 +1,6 @@
 from __future__ import annotations
 
-from typing import Any, List
+from typing import Any
 
 from langchain_text_splitters.base import TextSplitter
 
@@ -31,7 +31,7 @@ class SpacyTextSplitter(TextSplitter):
         self._separator = separator
         self._strip_whitespace = strip_whitespace
 
-    def split_text(self, text: str) -> List[str]:
+    def split_text(self, text: str) -> list[str]:
         """Split incoming text and return chunks."""
         splits = (
             s.text if self._strip_whitespace else s.text_with_ws
diff --git a/libs/text-splitters/pyproject.toml b/libs/text-splitters/pyproject.toml
index e365e655ce1..f7eaade65c0 100644
--- a/libs/text-splitters/pyproject.toml
+++ b/libs/text-splitters/pyproject.toml
@@ -61,8 +61,8 @@ ignore_missing_imports = "True"
 target-version = "py39"
 
 [tool.ruff.lint]
-select = ["E", "F", "I", "PGH003", "T201", "D"]
-ignore = ["D100"]
+select = ["E", "F", "I", "UP", "PGH003", "T201", "D"]
+ignore = ["D100", "UP007"]
 
 [tool.coverage.run]
 omit = ["tests/*"]
diff --git a/libs/text-splitters/tests/unit_tests/conftest.py b/libs/text-splitters/tests/unit_tests/conftest.py
index dd4080cfca1..f6219faaa18 100644
--- a/libs/text-splitters/tests/unit_tests/conftest.py
+++ b/libs/text-splitters/tests/unit_tests/conftest.py
@@ -1,7 +1,7 @@
 """Configuration for unit tests."""
 
+from collections.abc import Sequence
 from importlib import util
-from typing import Dict, Sequence
 
 import pytest
 from pytest import Config, Function, Parser
@@ -39,7 +39,7 @@ def pytest_collection_modifyitems(config: Config, items: Sequence[Function]) ->
     """
     # Mapping from the name of a package to whether it is installed or not.
     # Used to avoid repeated calls to `util.find_spec`
-    required_pkgs_info: Dict[str, bool] = {}
+    required_pkgs_info: dict[str, bool] = {}
 
     only_extended = config.getoption("--only-extended") or False
     only_core = config.getoption("--only-core") or False
diff --git a/libs/text-splitters/tests/unit_tests/test_text_splitters.py b/libs/text-splitters/tests/unit_tests/test_text_splitters.py
index f66fbcdd306..935092f56c5 100644
--- a/libs/text-splitters/tests/unit_tests/test_text_splitters.py
+++ b/libs/text-splitters/tests/unit_tests/test_text_splitters.py
@@ -3,7 +3,7 @@
 import random
 import re
 import string
-from typing import Any, Callable, List, Tuple
+from typing import Any, Callable
 
 import pytest
 from langchain_core.documents import Document
@@ -282,7 +282,7 @@ def test_create_documents_with_metadata() -> None:
     ],
 )
 def test_create_documents_with_start_index(
-    splitter: TextSplitter, text: str, expected_docs: List[Document]
+    splitter: TextSplitter, text: str, expected_docs: list[Document]
 ) -> None:
     """Test create documents method."""
     docs = splitter.create_documents([text])
@@ -333,7 +333,7 @@ def test_iterative_text_splitter_discard_separator() -> None:
     ]
 
 
-def __test_iterative_text_splitter(chunk_size: int, keep_separator: bool) -> List[str]:
+def __test_iterative_text_splitter(chunk_size: int, keep_separator: bool) -> list[str]:
     chunk_size += 1 if keep_separator else 0
 
     splitter = RecursiveCharacterTextSplitter(
@@ -2224,7 +2224,7 @@ def test_haskell_code_splitter() -> None:
 @pytest.fixture
 @pytest.mark.requires("bs4")
 def html_header_splitter_splitter_factory() -> Callable[
-    [List[Tuple[str, str]]], HTMLHeaderTextSplitter
+    [list[tuple[str, str]]], HTMLHeaderTextSplitter
 ]:
     """
     Fixture to create an HTMLHeaderTextSplitter instance with given headers.
@@ -2232,7 +2232,7 @@ def html_header_splitter_splitter_factory() -> Callable[
     """
 
     def _create_splitter(
-        headers_to_split_on: List[Tuple[str, str]],
+        headers_to_split_on: list[tuple[str, str]],
     ) -> HTMLHeaderTextSplitter:
         return HTMLHeaderTextSplitter(headers_to_split_on=headers_to_split_on)
 
@@ -2426,9 +2426,9 @@ def html_header_splitter_splitter_factory() -> Callable[
 @pytest.mark.requires("bs4")
 def test_html_header_text_splitter(
     html_header_splitter_splitter_factory: Any,
-    headers_to_split_on: List[Tuple[str, str]],
+    headers_to_split_on: list[tuple[str, str]],
     html_input: str,
-    expected_documents: List[Document],
+    expected_documents: list[Document],
     test_case: str,
 ) -> None:
     """
@@ -2582,9 +2582,9 @@ def test_html_header_text_splitter(
 @pytest.mark.requires("bs4")
 def test_additional_html_header_text_splitter(
     html_header_splitter_splitter_factory: Any,
-    headers_to_split_on: List[Tuple[str, str]],
+    headers_to_split_on: list[tuple[str, str]],
     html_content: str,
-    expected_output: List[Document],
+    expected_output: list[Document],
     test_case: str,
 ) -> None:
     """
@@ -2653,9 +2653,9 @@ def test_additional_html_header_text_splitter(
 @pytest.mark.requires("bs4")
 def test_html_no_headers_with_multiple_splitters(
     html_header_splitter_splitter_factory: Any,
-    headers_to_split_on: List[Tuple[str, str]],
+    headers_to_split_on: list[tuple[str, str]],
     html_content: str,
-    expected_output: List[Document],
+    expected_output: list[Document],
     test_case: str,
 ) -> None:
     """
@@ -3572,7 +3572,7 @@ def test_character_text_splitter_chunk_size_effect(
     is_regex: bool,
     text: str,
     chunk_size: int,
-    expected: List[str],
+    expected: list[str],
 ) -> None:
     splitter = CharacterTextSplitter(
         separator=separator,