text-splitters: Set strict mypy rules (#30900)

* Add strict mypy rules
* Fix mypy violations
* Add error codes to all type ignores
* Add ruff rule PGH003
* Bump mypy version to 1.15
This commit is contained in:
Christophe Bornet
2025-04-23 06:41:24 +03:00
committed by GitHub
parent eedda164c6
commit 8c5ae108dd
9 changed files with 81 additions and 77 deletions

View File

@@ -35,7 +35,7 @@ class SentenceTransformersTokenTextSplitter(TextSplitter):
def _initialize_chunk_configuration(
self, *, tokens_per_chunk: Optional[int]
) -> None:
self.maximum_tokens_per_chunk = cast(int, self._model.max_seq_length)
self.maximum_tokens_per_chunk = self._model.max_seq_length
if tokens_per_chunk is None:
self.tokens_per_chunk = self.maximum_tokens_per_chunk
@@ -93,10 +93,10 @@ class SentenceTransformersTokenTextSplitter(TextSplitter):
_max_length_equal_32_bit_integer: int = 2**32
def _encode(self, text: str) -> List[int]:
def _encode(self, text: str) -> list[int]:
token_ids_with_start_and_end_token_ids = self.tokenizer.encode(
text,
max_length=self._max_length_equal_32_bit_integer,
truncation="do_not_truncate",
)
return token_ids_with_start_and_end_token_ids
return cast("list[int]", token_ids_with_start_and_end_token_ids)