diff --git a/libs/text-splitters/langchain_text_splitters/base.py b/libs/text-splitters/langchain_text_splitters/base.py index 10dd6903ba1..c94c171c3cc 100644 --- a/libs/text-splitters/langchain_text_splitters/base.py +++ b/libs/text-splitters/langchain_text_splitters/base.py @@ -158,7 +158,7 @@ class TextSplitter(BaseDocumentTransformer, ABC): ) def _huggingface_tokenizer_length(text: str) -> int: - return len(tokenizer.encode(text)) + return len(tokenizer.tokenize(text)) except ImportError: raise ValueError(