docs: document OpenAI flex processing (#31023)

Following https://github.com/langchain-ai/langchain/pull/31005
2025-09-07 22:11:51 +00:00 · 2025-04-25 15:10:25 -04:00
parent 629b7a5a43
commit a60fd06784
2 changed files with 39 additions and 1 deletions
--- a/libs/partners/openai/langchain_openai/chat_models/base.py
+++ b/libs/partners/openai/langchain_openai/chat_models/base.py
@@ -2331,6 +2331,27 @@ class ChatOpenAI(BaseChatOpenAI):  # type: ignore[override]
                "logprobs": None,
            }

+    .. dropdown:: Flex processing
+
+        OpenAI offers a variety of
+        `service tiers <https://platform.openai.com/docs/guides/flex-processing>`_.
+        The "flex" tier offers cheaper pricing for requests, with the trade-off that
+        responses may take longer and resources might not always be available.
+        This approach is best suited for non-critical tasks, including model testing,
+        data enhancement, or jobs that can be run asynchronously.
+
+        To use it, initialize the model with ``service_tier="flex"``:
+
+        .. code-block:: python
+
+            from langchain_openai import ChatOpenAI
+
+            llm = ChatOpenAI(model="o4-mini", service_tier="flex")
+
+        Note that this is a beta feature that is only available for a subset of models.
+        See OpenAI `docs <https://platform.openai.com/docs/guides/flex-processing>`_
+        for more detail.
+
    """  # noqa: E501

    max_tokens: Optional[int] = Field(default=None, alias="max_completion_tokens")