fix huggingface tokenizer default length function (#30185)

#30184
This commit is contained in:
keshavshrikant 2025-03-31 21:24:30 +05:30 committed by GitHub
parent 4419340039
commit e8be3cca5c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -158,7 +158,7 @@ class TextSplitter(BaseDocumentTransformer, ABC):
)
def _huggingface_tokenizer_length(text: str) -> int:
return len(tokenizer.encode(text))
return len(tokenizer.tokenize(text))
except ImportError:
raise ValueError(