From 56c0460d6e8c71021082d4c19ae88c87e68c49eb Mon Sep 17 00:00:00 2001
From: ArmaanjeetSandhu <armaanjeetsandhu430@gmail.com>
Date: Fri, 4 Apr 2025 06:43:42 +0530
Subject: [PATCH] openai: Add skip_tokenization option to OpenAIEmbeddings
 (Fixes #30574)

---
 .../openai/langchain_openai/embeddings/base.py       | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/libs/partners/openai/langchain_openai/embeddings/base.py b/libs/partners/openai/langchain_openai/embeddings/base.py
index 4e2f7c74cfb..84ddd5ae071 100644
--- a/libs/partners/openai/langchain_openai/embeddings/base.py
+++ b/libs/partners/openai/langchain_openai/embeddings/base.py
@@ -238,6 +238,9 @@ class OpenAIEmbeddings(BaseModel, Embeddings):
     when using one of the many model providers that expose an OpenAI-like
     API but with different models. In those cases, in order to avoid erroring
     when tiktoken is called, you can specify a model name to use here."""
+    skip_tokenization: bool = False
+    """Set this to True to skip tokenization entirely and pass texts directly to the API.
+    Use this for OpenAI-compatible APIs that don't support tiktoken or huggingface tokenizers."""
     show_progress_bar: bool = False
     """Whether to show a progress bar when embedding."""
     model_kwargs: Dict[str, Any] = Field(default_factory=dict)
@@ -387,8 +390,12 @@ class OpenAIEmbeddings(BaseModel, Embeddings):
         indices: List[int] = []
         model_name = self.tiktoken_model_name or self.model
 
-        # If tiktoken flag set to False
-        if not self.tiktoken_enabled:
+        # Skip tokenization entirely and use raw text
+        if self.skip_tokenization:
+            for i, text in enumerate(texts):
+                tokens.append(text)
+                indices.append(i)
+        elif not self.tiktoken_enabled:
             try:
                 from transformers import AutoTokenizer
             except ImportError:
@@ -396,6 +403,7 @@ class OpenAIEmbeddings(BaseModel, Embeddings):
                     "Could not import transformers python package. "
                     "This is needed for OpenAIEmbeddings to work without "
                     "`tiktoken`. Please install it with `pip install transformers`. "
+                    "Alternatively, set `skip_tokenization=True` to bypass tokenization entirely."
                 )
 
             tokenizer = AutoTokenizer.from_pretrained(