mirror of
https://github.com/hwchase17/langchain.git
synced 2025-08-20 01:49:51 +00:00
openai: Add skip_tokenization option to OpenAIEmbeddings (Fixes #30574)
This commit is contained in:
parent
56c0460d6e
commit
584ce0aa22
@ -239,8 +239,9 @@ class OpenAIEmbeddings(BaseModel, Embeddings):
|
|||||||
API but with different models. In those cases, in order to avoid erroring
|
API but with different models. In those cases, in order to avoid erroring
|
||||||
when tiktoken is called, you can specify a model name to use here."""
|
when tiktoken is called, you can specify a model name to use here."""
|
||||||
skip_tokenization: bool = False
|
skip_tokenization: bool = False
|
||||||
"""Set this to True to skip tokenization entirely and pass texts directly to the API.
|
"""Whether to skip tokenization entirely and pass texts directly to the API.
|
||||||
Use this for OpenAI-compatible APIs that don't support tiktoken or huggingface tokenizers."""
|
Use this for OpenAI-compatible APIs that don't support tiktoken or
|
||||||
|
huggingface tokenizers."""
|
||||||
show_progress_bar: bool = False
|
show_progress_bar: bool = False
|
||||||
"""Whether to show a progress bar when embedding."""
|
"""Whether to show a progress bar when embedding."""
|
||||||
model_kwargs: Dict[str, Any] = Field(default_factory=dict)
|
model_kwargs: Dict[str, Any] = Field(default_factory=dict)
|
||||||
@ -403,7 +404,8 @@ class OpenAIEmbeddings(BaseModel, Embeddings):
|
|||||||
"Could not import transformers python package. "
|
"Could not import transformers python package. "
|
||||||
"This is needed for OpenAIEmbeddings to work without "
|
"This is needed for OpenAIEmbeddings to work without "
|
||||||
"`tiktoken`. Please install it with `pip install transformers`. "
|
"`tiktoken`. Please install it with `pip install transformers`. "
|
||||||
"Alternatively, set `skip_tokenization=True` to bypass tokenization entirely."
|
"Alternatively, set `skip_tokenization=True` to bypass "
|
||||||
|
"tokenization entirely."
|
||||||
)
|
)
|
||||||
|
|
||||||
tokenizer = AutoTokenizer.from_pretrained(
|
tokenizer = AutoTokenizer.from_pretrained(
|
||||||
|
Loading…
Reference in New Issue
Block a user