From a60fd06784e8a5a28c380aec659297f3f15f7e34 Mon Sep 17 00:00:00 2001 From: ccurme <chester.curme@gmail.com> Date: Fri, 25 Apr 2025 15:10:25 -0400 Subject: [PATCH] docs: document OpenAI flex processing (#31023) Following https://github.com/langchain-ai/langchain/pull/31005 --- docs/docs/integrations/chat/openai.ipynb | 19 ++++++++++++++++- .../langchain_openai/chat_models/base.py | 21 +++++++++++++++++++ 2 files changed, 39 insertions(+), 1 deletion(-) diff --git a/docs/docs/integrations/chat/openai.ipynb b/docs/docs/integrations/chat/openai.ipynb index 457293aac9e..a1b3adeb821 100644 --- a/docs/docs/integrations/chat/openai.ipynb +++ b/docs/docs/integrations/chat/openai.ipynb @@ -1413,6 +1413,23 @@ "second_output_message = llm.invoke(history)" ] }, + { + "cell_type": "markdown", + "id": "90c18d18-b25c-4509-a639-bd652b92f518", + "metadata": {}, + "source": [ + "## Flex processing\n", + "\n", + "OpenAI offers a variety of [service tiers](https://platform.openai.com/docs/guides/flex-processing). The \"flex\" tier offers cheaper pricing for requests, with the trade-off that responses may take longer and resources might not always be available. This approach is best suited for non-critical tasks, including model testing, data enhancement, or jobs that can be run asynchronously.\n", + "\n", + "To use it, initialize the model with `service_tier=\"flex\"`:\n", + "```python\n", + "llm = ChatOpenAI(model=\"o4-mini\", service_tier=\"flex\")\n", + "```\n", + "\n", + "Note that this is a beta feature that is only available for a subset of models. See OpenAI [docs](https://platform.openai.com/docs/guides/flex-processing) for more detail." + ] + }, { "cell_type": "markdown", "id": "a796d728-971b-408b-88d5-440015bbb941", @@ -1420,7 +1437,7 @@ "source": [ "## API reference\n", "\n", - "For detailed documentation of all ChatOpenAI features and configurations head to the API reference: https://python.langchain.com/api_reference/openai/chat_models/langchain_openai.chat_models.base.ChatOpenAI.html" + "For detailed documentation of all ChatOpenAI features and configurations head to the [API reference](https://python.langchain.com/api_reference/openai/chat_models/langchain_openai.chat_models.base.ChatOpenAI.html)." ] } ], diff --git a/libs/partners/openai/langchain_openai/chat_models/base.py b/libs/partners/openai/langchain_openai/chat_models/base.py index 9e740f38a95..f1b3bc2104d 100644 --- a/libs/partners/openai/langchain_openai/chat_models/base.py +++ b/libs/partners/openai/langchain_openai/chat_models/base.py @@ -2331,6 +2331,27 @@ class ChatOpenAI(BaseChatOpenAI): # type: ignore[override] "logprobs": None, } + .. dropdown:: Flex processing + + OpenAI offers a variety of + `service tiers <https://platform.openai.com/docs/guides/flex-processing>`_. + The "flex" tier offers cheaper pricing for requests, with the trade-off that + responses may take longer and resources might not always be available. + This approach is best suited for non-critical tasks, including model testing, + data enhancement, or jobs that can be run asynchronously. + + To use it, initialize the model with ``service_tier="flex"``: + + .. code-block:: python + + from langchain_openai import ChatOpenAI + + llm = ChatOpenAI(model="o4-mini", service_tier="flex") + + Note that this is a beta feature that is only available for a subset of models. + See OpenAI `docs <https://platform.openai.com/docs/guides/flex-processing>`_ + for more detail. + """ # noqa: E501 max_tokens: Optional[int] = Field(default=None, alias="max_completion_tokens")