openai: Add skip_tokenization option to OpenAIEmbeddings (Fixes #30574)

2025-08-19 01:21:50 +00:00 · 2025-04-04 07:47:32 +05:30 · 2025-04-04 07:47:32 +05:30 · 584ce0aa22
commit 584ce0aa22
parent 56c0460d6e
1 changed files with 5 additions and 3 deletions
--- a/libs/partners/openai/langchain_openai/embeddings/base.py
+++ b/libs/partners/openai/langchain_openai/embeddings/base.py
@ -239,8 +239,9 @@ class OpenAIEmbeddings(BaseModel, Embeddings):
    API but with different models. In those cases, in order to avoid erroring
    when tiktoken is called, you can specify a model name to use here."""
    skip_tokenization: bool = False
-    """Set this to True to skip tokenization entirely and pass texts directly to the API.
-    Use this for OpenAI-compatible APIs that don't support tiktoken or huggingface tokenizers."""
+    """Whether to skip tokenization entirely and pass texts directly to the API.
+    Use this for OpenAI-compatible APIs that don't support tiktoken or
+    huggingface tokenizers."""
    show_progress_bar: bool = False
    """Whether to show a progress bar when embedding."""
    model_kwargs: Dict[str, Any] = Field(default_factory=dict)
@ -403,7 +404,8 @@ class OpenAIEmbeddings(BaseModel, Embeddings):
                    "Could not import transformers python package. "
                    "This is needed for OpenAIEmbeddings to work without "
                    "`tiktoken`. Please install it with `pip install transformers`. "
-                    "Alternatively, set `skip_tokenization=True` to bypass tokenization entirely."
+                    "Alternatively, set `skip_tokenization=True` to bypass "
+                    "tokenization entirely."
                )

            tokenizer = AutoTokenizer.from_pretrained(