mirror of
https://github.com/hwchase17/langchain.git
synced 2025-09-22 11:00:37 +00:00
text-splitters: Set strict mypy rules (#30900)
* Add strict mypy rules * Fix mypy violations * Add error codes to all type ignores * Add ruff rule PGH003 * Bump mypy version to 1.15
This commit is contained in:
committed by
GitHub
parent
eedda164c6
commit
8c5ae108dd
@@ -35,7 +35,7 @@ class SentenceTransformersTokenTextSplitter(TextSplitter):
|
||||
def _initialize_chunk_configuration(
|
||||
self, *, tokens_per_chunk: Optional[int]
|
||||
) -> None:
|
||||
self.maximum_tokens_per_chunk = cast(int, self._model.max_seq_length)
|
||||
self.maximum_tokens_per_chunk = self._model.max_seq_length
|
||||
|
||||
if tokens_per_chunk is None:
|
||||
self.tokens_per_chunk = self.maximum_tokens_per_chunk
|
||||
@@ -93,10 +93,10 @@ class SentenceTransformersTokenTextSplitter(TextSplitter):
|
||||
|
||||
_max_length_equal_32_bit_integer: int = 2**32
|
||||
|
||||
def _encode(self, text: str) -> List[int]:
|
||||
def _encode(self, text: str) -> list[int]:
|
||||
token_ids_with_start_and_end_token_ids = self.tokenizer.encode(
|
||||
text,
|
||||
max_length=self._max_length_equal_32_bit_integer,
|
||||
truncation="do_not_truncate",
|
||||
)
|
||||
return token_ids_with_start_and_end_token_ids
|
||||
return cast("list[int]", token_ids_with_start_and_end_token_ids)
|
||||
|
Reference in New Issue
Block a user