fix(openai): source LLM context size from model profiles (#37489)

`BaseOpenAI.modelname_to_contextsize` now reads `max_input_tokens` from the partner's model profile registry instead of a hand-maintained dict, with a `@deprecated(since="1.2", removal="2.0")` marker steering callers at the profile directly. The legacy mapping is trimmed to models that have no profile (completion-only and dated chat variants) so existing lookups keep working.
2026-06-09 10:17:00 +00:00 · 2026-05-17 23:49:06 -07:00
parent 6c091564ac
commit 992c613b51
2 changed files with 46 additions and 24 deletions
--- a/libs/partners/openai/langchain_openai/llms/base.py
+++ b/libs/partners/openai/langchain_openai/llms/base.py
@@ -9,6 +9,7 @@ from typing import Any, Literal

 import openai
 import tiktoken
+from langchain_core._api.deprecation import deprecated
 from langchain_core.callbacks import (
    AsyncCallbackManagerForLLMRun,
    CallbackManagerForLLMRun,
@@ -20,6 +21,8 @@ from langchain_core.utils.utils import _build_model_kwargs, from_env, secret_fro
 from pydantic import ConfigDict, Field, SecretStr, model_validator
 from typing_extensions import Self

+from langchain_openai.data._profiles import _PROFILES
+
 logger = logging.getLogger(__name__)


@@ -663,41 +666,48 @@ class BaseOpenAI(BaseLLM):
        )

    @staticmethod
+    @deprecated(
+        since="1.2",
+        removal="2.0",
+        alternative=(
+            "the model profile's `max_input_tokens` field "
+            "(e.g. `ChatOpenAI(model=...).profile['max_input_tokens']`)"
+        ),
+    )
    def modelname_to_contextsize(modelname: str) -> int:
-        """Calculate the maximum number of tokens possible to generate for a model.
+        """Return the maximum input context size for a model.
+
+        Prefers the model's profile (`max_input_tokens`) and falls back to a
+        mapping of legacy models that have no profile.
+
+        !!! warning "Changed in 1.2"
+
+            Now returns `max_input_tokens` from the model profile, which is the
+            input context window. Earlier releases returned a hand-maintained
+            number that for some newer models (e.g. `gpt-5`) reflected the
+            *total* context (input + output). Callers using the result as an
+            input-token budget are unaffected; callers using it as a combined
+            input+output budget should switch to the profile fields directly.

        Args:
            modelname: The modelname we want to know the context size for.

        Returns:
-            The maximum context size
+            The maximum input context size.

        Example:
            ```python
            max_tokens = openai.modelname_to_contextsize("gpt-3.5-turbo-instruct")
            ```
        """
-        model_token_mapping = {
-            "gpt-5.2": 400_000,
-            "gpt-5.2-2025-12-11": 400_000,
-            "gpt-5.1": 400_000,
-            "gpt-5.1-2025-11-13": 400_000,
-            "gpt-5": 400_000,
-            "gpt-5-2025-08-07": 400_000,
-            "gpt-5-mini": 400_000,
-            "gpt-5-mini-2025-08-07": 400_000,
-            "gpt-5-nano": 400_000,
-            "gpt-5-nano-2025-08-07": 400_000,
-            "gpt-4o-mini": 128_000,
-            "gpt-4o": 128_000,
-            "gpt-4o-2024-05-13": 128_000,
-            "gpt-4": 8192,
+        # Legacy models without a model profile.
+        legacy_token_mapping = {
            "gpt-4-0314": 8192,
            "gpt-4-0613": 8192,
            "gpt-4-32k": 32768,
            "gpt-4-32k-0314": 32768,
            "gpt-4-32k-0613": 32768,
-            "gpt-3.5-turbo": 4096,
+            "gpt-4o-2024-05-13": 128_000,
            "gpt-3.5-turbo-0301": 4096,
            "gpt-3.5-turbo-0613": 4096,
            "gpt-3.5-turbo-16k": 16385,
@@ -720,15 +730,27 @@ class BaseOpenAI(BaseLLM):

        # handling finetuned models
        if "ft-" in modelname:
-            modelname = modelname.split(":")[0]
+            modelname = modelname.split(":", maxsplit=1)[0]

-        context_size = model_token_mapping.get(modelname)
+        profile = _PROFILES.get(modelname)
+        context_size = profile.get("max_input_tokens") if profile else None
+        if profile is not None and context_size is None:
+            logger.warning(
+                "Profile for model %s is missing `max_input_tokens`; "
+                "falling back to legacy mapping.",
+                modelname,
+            )
+        if context_size is None:
+            context_size = legacy_token_mapping.get(modelname)

        if context_size is None:
-            raise ValueError(
-                f"Unknown model: {modelname}. Please provide a valid OpenAI model name."
-                "Known models are: " + ", ".join(model_token_mapping.keys())
+            known = sorted({*_PROFILES.keys(), *legacy_token_mapping.keys()})
+            msg = (
+                f"Unknown model: {modelname}. Please provide a valid OpenAI model "
+                "name, or read `max_input_tokens` from the model profile directly. "
+                "Known models are: " + ", ".join(known)
            )
+            raise ValueError(msg)

        return context_size

--- a/libs/partners/openai/uv.lock
+++ b/libs/partners/openai/uv.lock
@@ -559,7 +559,7 @@ wheels = [

 [[package]]
 name = "langchain"
-version = "1.3.0"
+version = "1.3.1"
 source = { editable = "../../langchain_v1" }
 dependencies = [
    { name = "langchain-core" },