From a60fd06784e8a5a28c380aec659297f3f15f7e34 Mon Sep 17 00:00:00 2001
From: ccurme <chester.curme@gmail.com>
Date: Fri, 25 Apr 2025 15:10:25 -0400
Subject: [PATCH] docs: document OpenAI flex processing (#31023)

Following https://github.com/langchain-ai/langchain/pull/31005
---
 docs/docs/integrations/chat/openai.ipynb      | 19 ++++++++++++++++-
 .../langchain_openai/chat_models/base.py      | 21 +++++++++++++++++++
 2 files changed, 39 insertions(+), 1 deletion(-)

diff --git a/docs/docs/integrations/chat/openai.ipynb b/docs/docs/integrations/chat/openai.ipynb
index 457293aac9e..a1b3adeb821 100644
--- a/docs/docs/integrations/chat/openai.ipynb
+++ b/docs/docs/integrations/chat/openai.ipynb
@@ -1413,6 +1413,23 @@
     "second_output_message = llm.invoke(history)"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "id": "90c18d18-b25c-4509-a639-bd652b92f518",
+   "metadata": {},
+   "source": [
+    "## Flex processing\n",
+    "\n",
+    "OpenAI offers a variety of [service tiers](https://platform.openai.com/docs/guides/flex-processing). The \"flex\" tier offers cheaper pricing for requests, with the trade-off that responses may take longer and resources might not always be available. This approach is best suited for non-critical tasks, including model testing, data enhancement, or jobs that can be run asynchronously.\n",
+    "\n",
+    "To use it, initialize the model with `service_tier=\"flex\"`:\n",
+    "```python\n",
+    "llm = ChatOpenAI(model=\"o4-mini\", service_tier=\"flex\")\n",
+    "```\n",
+    "\n",
+    "Note that this is a beta feature that is only available for a subset of models. See OpenAI [docs](https://platform.openai.com/docs/guides/flex-processing) for more detail."
+   ]
+  },
   {
    "cell_type": "markdown",
    "id": "a796d728-971b-408b-88d5-440015bbb941",
@@ -1420,7 +1437,7 @@
    "source": [
     "## API reference\n",
     "\n",
-    "For detailed documentation of all ChatOpenAI features and configurations head to the API reference: https://python.langchain.com/api_reference/openai/chat_models/langchain_openai.chat_models.base.ChatOpenAI.html"
+    "For detailed documentation of all ChatOpenAI features and configurations head to the [API reference](https://python.langchain.com/api_reference/openai/chat_models/langchain_openai.chat_models.base.ChatOpenAI.html)."
    ]
   }
  ],
diff --git a/libs/partners/openai/langchain_openai/chat_models/base.py b/libs/partners/openai/langchain_openai/chat_models/base.py
index 9e740f38a95..f1b3bc2104d 100644
--- a/libs/partners/openai/langchain_openai/chat_models/base.py
+++ b/libs/partners/openai/langchain_openai/chat_models/base.py
@@ -2331,6 +2331,27 @@ class ChatOpenAI(BaseChatOpenAI):  # type: ignore[override]
                 "logprobs": None,
             }
 
+    .. dropdown:: Flex processing
+
+        OpenAI offers a variety of
+        `service tiers <https://platform.openai.com/docs/guides/flex-processing>`_.
+        The "flex" tier offers cheaper pricing for requests, with the trade-off that
+        responses may take longer and resources might not always be available.
+        This approach is best suited for non-critical tasks, including model testing,
+        data enhancement, or jobs that can be run asynchronously.
+
+        To use it, initialize the model with ``service_tier="flex"``:
+
+        .. code-block:: python
+
+            from langchain_openai import ChatOpenAI
+
+            llm = ChatOpenAI(model="o4-mini", service_tier="flex")
+
+        Note that this is a beta feature that is only available for a subset of models.
+        See OpenAI `docs <https://platform.openai.com/docs/guides/flex-processing>`_
+        for more detail.
+
     """  # noqa: E501
 
     max_tokens: Optional[int] = Field(default=None, alias="max_completion_tokens")