openai: Add skip_tokenization option to OpenAIEmbeddings (Fixes #30574)

This commit is contained in:
ArmaanjeetSandhu 2025-04-04 07:47:32 +05:30
parent 56c0460d6e
commit 584ce0aa22

View File

@ -239,8 +239,9 @@ class OpenAIEmbeddings(BaseModel, Embeddings):
API but with different models. In those cases, in order to avoid erroring API but with different models. In those cases, in order to avoid erroring
when tiktoken is called, you can specify a model name to use here.""" when tiktoken is called, you can specify a model name to use here."""
skip_tokenization: bool = False skip_tokenization: bool = False
"""Set this to True to skip tokenization entirely and pass texts directly to the API. """Whether to skip tokenization entirely and pass texts directly to the API.
Use this for OpenAI-compatible APIs that don't support tiktoken or huggingface tokenizers.""" Use this for OpenAI-compatible APIs that don't support tiktoken or
huggingface tokenizers."""
show_progress_bar: bool = False show_progress_bar: bool = False
"""Whether to show a progress bar when embedding.""" """Whether to show a progress bar when embedding."""
model_kwargs: Dict[str, Any] = Field(default_factory=dict) model_kwargs: Dict[str, Any] = Field(default_factory=dict)
@ -403,7 +404,8 @@ class OpenAIEmbeddings(BaseModel, Embeddings):
"Could not import transformers python package. " "Could not import transformers python package. "
"This is needed for OpenAIEmbeddings to work without " "This is needed for OpenAIEmbeddings to work without "
"`tiktoken`. Please install it with `pip install transformers`. " "`tiktoken`. Please install it with `pip install transformers`. "
"Alternatively, set `skip_tokenization=True` to bypass tokenization entirely." "Alternatively, set `skip_tokenization=True` to bypass "
"tokenization entirely."
) )
tokenizer = AutoTokenizer.from_pretrained( tokenizer = AutoTokenizer.from_pretrained(