feat(openai): minimal and verbosity (#32455)

2025-08-17 08:29:28 +00:00 · 2025-08-07 22:24:21 -04:00 · 2025-08-07 22:24:21 -04:00 · 00244122bd
commit 00244122bd
parent 6727d6e8c8
9 changed files with 354 additions and 55 deletions
--- a/libs/langchain/tests/unit_tests/chat_models/test_base.py
+++ b/libs/langchain/tests/unit_tests/chat_models/test_base.py
@ -68,6 +68,32 @@ def test_init_unknown_provider() -> None:
    clear=True,
 )
 def test_configurable() -> None:
    """Test configurable chat model behavior without default parameters.
    Verifies that a configurable chat model initialized without default parameters:
    - Has access to all standard runnable methods (``invoke``, ``stream``, etc.)
    - Blocks access to non-configurable methods until configuration is provided
    - Supports declarative operations (``bind_tools``) without mutating original model
    - Can chain declarative operations and configuration to access full functionality
    - Properly resolves to the configured model type when parameters are provided
    Example:
    .. python::
        # This creates a configurable model without specifying which model
        model = init_chat_model()
        # This will FAIL - no model specified yet
        model.get_num_tokens("hello")  # AttributeError!
        # This works - provides model at runtime
        response = model.invoke(
            "Hello",
            config={"configurable": {"model": "gpt-4o"}}
        )
    """
    model = init_chat_model()
    for method in (
@ -125,6 +151,7 @@ def test_configurable() -> None:
            "presence_penalty": None,
            "reasoning": None,
            "reasoning_effort": None,
            "verbosity": None,
            "frequency_penalty": None,
            "include": None,
            "seed": None,
@ -170,6 +197,32 @@ def test_configurable() -> None:
    clear=True,
 )
 def test_configurable_with_default() -> None:
    """Test configurable chat model behavior with default parameters.
    Verifies that a configurable chat model initialized with default parameters:
    - Has access to all standard runnable methods (``invoke``, ``stream``, etc.)
    - Provides immediate access to non-configurable methods (e.g. ``get_num_tokens``)
    - Supports model switching through runtime configuration using ``config_prefix``
    - Maintains proper model identity and attributes when reconfigured
    - Can be used in chains with different model providers via configuration
    Example:
    .. python::
        # This creates a configurable model with default parameters (model)
        model = init_chat_model("gpt-4o", configurable_fields="any", config_prefix="bar")
        # This works immediately - uses default gpt-4o
        tokens = model.get_num_tokens("hello")
        # This also works - switches to Claude at runtime
        response = model.invoke(
            "Hello",
            config={"configurable": {"my_model_model": "claude-3-sonnet-20240229"}}
        )
    """  # noqa: E501
    model = init_chat_model("gpt-4o", configurable_fields="any", config_prefix="bar")
    for method in (
        "invoke",
--- a/libs/langchain_v1/tests/unit_tests/chat_models/test_chat_models.py
+++ b/libs/langchain_v1/tests/unit_tests/chat_models/test_chat_models.py
@ -68,6 +68,32 @@ def test_init_unknown_provider() -> None:
    clear=True,
 )
 def test_configurable() -> None:
    """Test configurable chat model behavior without default parameters.
    Verifies that a configurable chat model initialized without default parameters:
    - Has access to all standard runnable methods (``invoke``, ``stream``, etc.)
    - Blocks access to non-configurable methods until configuration is provided
    - Supports declarative operations (``bind_tools``) without mutating original model
    - Can chain declarative operations and configuration to access full functionality
    - Properly resolves to the configured model type when parameters are provided
    Example:
    .. python::
        # This creates a configurable model without specifying which model
        model = init_chat_model()
        # This will FAIL - no model specified yet
        model.get_num_tokens("hello")  # AttributeError!
        # This works - provides model at runtime
        response = model.invoke(
            "Hello",
            config={"configurable": {"model": "gpt-4o"}}
        )
    """
    model = init_chat_model()
    for method in (
@ -125,6 +151,7 @@ def test_configurable() -> None:
            "presence_penalty": None,
            "reasoning": None,
            "reasoning_effort": None,
            "verbosity": None,
            "frequency_penalty": None,
            "include": None,
            "seed": None,
@ -170,6 +197,32 @@ def test_configurable() -> None:
    clear=True,
 )
 def test_configurable_with_default() -> None:
    """Test configurable chat model behavior with default parameters.
    Verifies that a configurable chat model initialized with default parameters:
    - Has access to all standard runnable methods (``invoke``, ``stream``, etc.)
    - Provides immediate access to non-configurable methods (e.g. ``get_num_tokens``)
    - Supports model switching through runtime configuration using ``config_prefix``
    - Maintains proper model identity and attributes when reconfigured
    - Can be used in chains with different model providers via configuration
    Example:
    .. python::
        # This creates a configurable model with default parameters (model)
        model = init_chat_model("gpt-4o", configurable_fields="any", config_prefix="bar")
        # This works immediately - uses default gpt-4o
        tokens = model.get_num_tokens("hello")
        # This also works - switches to Claude at runtime
        response = model.invoke(
            "Hello",
            config={"configurable": {"my_model_model": "claude-3-sonnet-20240229"}}
        )
    """  # noqa: E501
    model = init_chat_model("gpt-4o", configurable_fields="any", config_prefix="bar")
    for method in (
        "invoke",
--- a/libs/partners/openai/langchain_openai/chat_models/base.py
+++ b/libs/partners/openai/langchain_openai/chat_models/base.py
@ -458,8 +458,7 @@ class BaseChatOpenAI(BaseChatModel):
        alias="api_key", default_factory=secret_from_env("OPENAI_API_KEY", default=None)
    )
    openai_api_base: Optional[str] = Field(default=None, alias="base_url")
-    """Base URL path for API requests, leave blank if not using a proxy or service
+    """Base URL path for API requests, leave blank if not using a proxy or service emulator."""  # noqa: E501
        emulator."""
    openai_organization: Optional[str] = Field(default=None, alias="organization")
    """Automatically inferred from env var ``OPENAI_ORG_ID`` if not provided."""
    # to support explicit proxy for OpenAI
@ -507,8 +506,9 @@ class BaseChatOpenAI(BaseChatModel):
    Reasoning models only, like OpenAI o1, o3, and o4-mini.
-    Currently supported values are low, medium, and high. Reducing reasoning effort
+    Currently supported values are ``'minimal'``, ``'low'``, ``'medium'``, and
-    can result in faster responses and fewer tokens used on reasoning in a response.
+    ``'high'``. Reducing reasoning effort can result in faster responses and fewer
    tokens used on reasoning in a response.
    .. versionadded:: 0.2.14
    """
@ -527,6 +527,17 @@ class BaseChatOpenAI(BaseChatModel):
    .. versionadded:: 0.3.24
    """
    verbosity: Optional[str] = None
    """Controls the verbosity level of responses for reasoning models. For use with the
    Responses API.
    Currently supported values are ``'low'``, ``'medium'``, and ``'high'``.
    Controls how detailed the model's responses are.
    .. versionadded:: 0.3.28
    """
    tiktoken_model_name: Optional[str] = None
    """The model name to pass to tiktoken when using this class.
@ -654,6 +665,7 @@ class BaseChatOpenAI(BaseChatModel):
        llm = ChatOpenAI(
            model="o4-mini",
            use_responses_api=True,
            output_version="responses/v1",
        )
        llm.invoke([HumanMessage("How are you?")], previous_response_id="resp_123")
@ -701,10 +713,24 @@ class BaseChatOpenAI(BaseChatModel):
    @model_validator(mode="before")
    @classmethod
    def validate_temperature(cls, values: dict[str, Any]) -> Any:
-        """Currently o1 models only allow temperature=1."""
+        """Validate temperature parameter for different models.
        - o1 models only allow temperature=1
        - gpt-5 models only allow temperature=1 or unset (defaults to 1)
        """
        model = values.get("model_name") or values.get("model") or ""
        # For o1 models, set temperature=1 if not provided
        if model.startswith("o1") and "temperature" not in values:
            values["temperature"] = 1
        # For gpt-5 models, handle temperature restrictions
        if model.startswith("gpt-5"):
            temperature = values.get("temperature")
            if temperature is not None and temperature != 1:
                # For gpt-5, only temperature=1 is supported, so remove non-defaults
                values.pop("temperature", None)
        return values
    @model_validator(mode="after")
@ -805,6 +831,7 @@ class BaseChatOpenAI(BaseChatModel):
            "temperature": self.temperature,
            "reasoning_effort": self.reasoning_effort,
            "reasoning": self.reasoning,
            "verbosity": self.verbosity,
            "include": self.include,
            "service_tier": self.service_tier,
            "truncation": self.truncation,
@ -1178,6 +1205,7 @@ class BaseChatOpenAI(BaseChatModel):
            kwargs["stop"] = stop
        payload = {**self._default_params, **kwargs}
        if self._use_responses_api(payload):
            if self.use_previous_response_id:
                last_messages, previous_response_id = _get_last_messages(messages)
@ -2366,7 +2394,11 @@ class ChatOpenAI(BaseChatOpenAI):  # type: ignore[override]
            from langchain_openai import ChatOpenAI
-            llm = ChatOpenAI(model="gpt-4.1-mini", use_responses_api=True)
+            llm = ChatOpenAI(
                model="gpt-4.1-mini",
                use_responses_api=True,
                output_version="responses/v1",
            )
            response = llm.invoke("Hi, I'm Bob.")
            response.text()
@ -3486,6 +3518,11 @@ def _construct_responses_api_payload(
    if "reasoning_effort" in payload and "reasoning" not in payload:
        payload["reasoning"] = {"effort": payload.pop("reasoning_effort")}
    # Remove temperature parameter for models that don't support it in responses API
    model = payload.get("model", "")
    if model.startswith("gpt-5"):
        payload.pop("temperature", None)
    payload["input"] = _construct_responses_api_input(messages)
    if tools := payload.pop("tools", None):
        new_tools: list = []
--- a/libs/partners/openai/tests/integration_tests/chat_models/test_base.py
+++ b/libs/partners/openai/tests/integration_tests/chat_models/test_base.py
@ -32,7 +32,7 @@ from pydantic import BaseModel, Field
 from langchain_openai import ChatOpenAI
 from tests.unit_tests.fake.callbacks import FakeCallbackHandler
-MAX_TOKEN_COUNT = 16
+MAX_TOKEN_COUNT = 100
@pytest.mark.scheduled
@ -219,7 +219,7 @@ async def test_openai_abatch_tags(use_responses_api: bool) -> None:
 def test_openai_invoke() -> None:
    """Test invoke tokens from ChatOpenAI."""
    llm = ChatOpenAI(
-        model="o4-mini",
+        model="gpt-5-nano",
        service_tier="flex",  # Also test service_tier
        max_retries=3,  # Add retries for 503 capacity errors
    )
@ -418,7 +418,7 @@ class MakeASandwich(BaseModel):
 def test_tool_use() -> None:
-    llm = ChatOpenAI(model="gpt-4-turbo", temperature=0)
+    llm = ChatOpenAI(model="gpt-5-nano", temperature=0)
    llm_with_tool = llm.bind_tools(tools=[GenerateUsername], tool_choice=True)
    msgs: list = [HumanMessage("Sally has green hair, what would her username be?")]
    ai_msg = llm_with_tool.invoke(msgs)
@ -462,7 +462,7 @@ def test_tool_use() -> None:
 def test_manual_tool_call_msg(use_responses_api: bool) -> None:
    """Test passing in manually construct tool call message."""
    llm = ChatOpenAI(
-        model="gpt-3.5-turbo-0125", temperature=0, use_responses_api=use_responses_api
+        model="gpt-5-nano", temperature=0, use_responses_api=use_responses_api
    )
    llm_with_tool = llm.bind_tools(tools=[GenerateUsername])
    msgs: list = [
@ -508,7 +508,7 @@ def test_manual_tool_call_msg(use_responses_api: bool) -> None:
 def test_bind_tools_tool_choice(use_responses_api: bool) -> None:
    """Test passing in manually construct tool call message."""
    llm = ChatOpenAI(
-        model="gpt-3.5-turbo-0125", temperature=0, use_responses_api=use_responses_api
+        model="gpt-5-nano", temperature=0, use_responses_api=use_responses_api
    )
    for tool_choice in ("any", "required"):
        llm_with_tools = llm.bind_tools(
@ -523,7 +523,7 @@ def test_bind_tools_tool_choice(use_responses_api: bool) -> None:
 def test_disable_parallel_tool_calling() -> None:
-    llm = ChatOpenAI(model="gpt-4o-mini")
+    llm = ChatOpenAI(model="gpt-5-nano")
    llm_with_tools = llm.bind_tools([GenerateUsername], parallel_tool_calls=False)
    result = llm_with_tools.invoke(
        "Use the GenerateUsername tool to generate user names for:\n\n"
@ -534,7 +534,7 @@ def test_disable_parallel_tool_calling() -> None:
    assert len(result.tool_calls) == 1
-@pytest.mark.parametrize("model", ["gpt-4o-mini", "o1", "gpt-4"])
+@pytest.mark.parametrize("model", ["gpt-4o-mini", "o1", "gpt-4", "gpt-5-nano"])
 def test_openai_structured_output(model: str) -> None:
    class MyModel(BaseModel):
        """A Person"""
@ -694,7 +694,7 @@ def test_tool_calling_strict(use_responses_api: bool) -> None:
        input: Optional[int] = Field(default=None)
    model = ChatOpenAI(
-        model="gpt-4.1", temperature=0, use_responses_api=use_responses_api
+        model="gpt-5-nano", temperature=0, use_responses_api=use_responses_api
    )
    # N.B. magic_function adds metadata to schema (min/max for number fields)
    model_with_tools = model.bind_tools([magic_function], strict=True)
@ -818,7 +818,7 @@ def test_json_schema_openai_format(
    strict: bool, method: Literal["json_schema", "function_calling"]
 ) -> None:
    """Test we can pass in OpenAI schema format specifying strict."""
-    llm = ChatOpenAI(model="gpt-4o-mini")
+    llm = ChatOpenAI(model="gpt-5-nano")
    schema = {
        "name": "get_weather",
        "description": "Fetches the weather in the given location",
@ -939,7 +939,7 @@ def test_prediction_tokens() -> None:
    """
    )
-    llm = ChatOpenAI(model="gpt-4o")
+    llm = ChatOpenAI(model="gpt-4.1-nano")
    query = (
        "Replace the Username property with an Email property. "
        "Respond only with code, and with no markdown formatting."
@ -981,7 +981,7 @@ class Foo(BaseModel):
 def test_stream_response_format() -> None:
    full: Optional[BaseMessageChunk] = None
    chunks = []
-    for chunk in ChatOpenAI(model="gpt-4o-mini").stream(
+    for chunk in ChatOpenAI(model="gpt-5-nano").stream(
        "how are ya", response_format=Foo
    ):
        chunks.append(chunk)
@ -998,7 +998,7 @@ def test_stream_response_format() -> None:
 async def test_astream_response_format() -> None:
    full: Optional[BaseMessageChunk] = None
    chunks = []
-    async for chunk in ChatOpenAI(model="gpt-4o-mini").astream(
+    async for chunk in ChatOpenAI(model="gpt-5-nano").astream(
        "how are ya", response_format=Foo
    ):
        chunks.append(chunk)
@ -1042,7 +1042,7 @@ def test_o1_stream_default_works() -> None:
 def test_multi_party_conversation() -> None:
-    llm = ChatOpenAI(model="gpt-4o")
+    llm = ChatOpenAI(model="gpt-5-nano")
    messages = [
        HumanMessage("Hi, I have black hair.", name="Alice"),
        HumanMessage("Hi, I have brown hair.", name="Bob"),
@ -1057,7 +1057,7 @@ def test_structured_output_and_tools() -> None:
        response: str
        explanation: str
-    llm = ChatOpenAI(model="gpt-4o-mini").bind_tools(
+    llm = ChatOpenAI(model="gpt-5-nano").bind_tools(
        [GenerateUsername], strict=True, response_format=ResponseFormat
    )
@ -1082,7 +1082,7 @@ def test_tools_and_structured_output() -> None:
        response: str
        explanation: str
-    llm = ChatOpenAI(model="gpt-4o-mini").with_structured_output(
+    llm = ChatOpenAI(model="gpt-5-nano").with_structured_output(
        ResponseFormat, strict=True, include_raw=True, tools=[GenerateUsername]
    )
@ -1114,8 +1114,8 @@ def test_tools_and_structured_output() -> None:
@pytest.mark.scheduled
 def test_prompt_cache_key_invoke() -> None:
-    """Test that prompt_cache_key works with invoke calls."""
+    """Test that `prompt_cache_key` works with invoke calls."""
-    chat = ChatOpenAI(model="gpt-4o-mini", max_completion_tokens=20)
+    chat = ChatOpenAI(model="gpt-5-nano", max_completion_tokens=500)
    messages = [HumanMessage("Say hello")]
    # Test that invoke works with prompt_cache_key parameter
@ -1135,18 +1135,18 @@ def test_prompt_cache_key_invoke() -> None:
@pytest.mark.scheduled
 def test_prompt_cache_key_usage_methods_integration() -> None:
-    """Integration test for prompt_cache_key usage methods."""
+    """Integration test for `prompt_cache_key` usage methods."""
    messages = [HumanMessage("Say hi")]
    # Test keyword argument method
-    chat = ChatOpenAI(model="gpt-4o-mini", max_completion_tokens=10)
+    chat = ChatOpenAI(model="gpt-5-nano", max_completion_tokens=10)
    response = chat.invoke(messages, prompt_cache_key="integration-test-v1")
    assert isinstance(response, AIMessage)
    assert isinstance(response.content, str)
    # Test model-level via model_kwargs
    chat_model_level = ChatOpenAI(
-        model="gpt-4o-mini",
+        model="gpt-5-nano",
        max_completion_tokens=10,
        model_kwargs={"prompt_cache_key": "integration-model-level-v1"},
    )
--- a/libs/partners/openai/tests/integration_tests/chat_models/test_responses_api.py
+++ b/libs/partners/openai/tests/integration_tests/chat_models/test_responses_api.py
@ -175,7 +175,9 @@ class FooDict(TypedDict):
 def test_parsed_pydantic_schema() -> None:
-    llm = ChatOpenAI(model=MODEL_NAME, use_responses_api=True)
+    llm = ChatOpenAI(
        model=MODEL_NAME, use_responses_api=True, output_version="responses/v1"
    )
    response = llm.invoke("how are ya", response_format=Foo)
    parsed = Foo(**json.loads(response.text()))
    assert parsed == response.additional_kwargs["parsed"]
@ -193,7 +195,9 @@ def test_parsed_pydantic_schema() -> None:
 async def test_parsed_pydantic_schema_async() -> None:
-    llm = ChatOpenAI(model=MODEL_NAME, use_responses_api=True)
+    llm = ChatOpenAI(
        model=MODEL_NAME, use_responses_api=True, output_version="responses/v1"
    )
    response = await llm.ainvoke("how are ya", response_format=Foo)
    parsed = Foo(**json.loads(response.text()))
    assert parsed == response.additional_kwargs["parsed"]
@ -213,7 +217,9 @@ async def test_parsed_pydantic_schema_async() -> None:
@pytest.mark.flaky(retries=3, delay=1)
@pytest.mark.parametrize("schema", [Foo.model_json_schema(), FooDict])
 def test_parsed_dict_schema(schema: Any) -> None:
-    llm = ChatOpenAI(model=MODEL_NAME, use_responses_api=True)
+    llm = ChatOpenAI(
        model=MODEL_NAME, use_responses_api=True, output_version="responses/v1"
    )
    response = llm.invoke("how are ya", response_format=schema)
    parsed = json.loads(response.text())
    assert parsed == response.additional_kwargs["parsed"]
@ -231,7 +237,9 @@ def test_parsed_dict_schema(schema: Any) -> None:
 def test_parsed_strict() -> None:
-    llm = ChatOpenAI(model=MODEL_NAME, use_responses_api=True)
+    llm = ChatOpenAI(
        model=MODEL_NAME, use_responses_api=True, output_version="responses/v1"
    )
    class InvalidJoke(TypedDict):
        setup: Annotated[str, ..., "The setup of the joke"]
@ -258,7 +266,9 @@ def test_parsed_strict() -> None:
@pytest.mark.flaky(retries=3, delay=1)
@pytest.mark.parametrize("schema", [Foo.model_json_schema(), FooDict])
 async def test_parsed_dict_schema_async(schema: Any) -> None:
-    llm = ChatOpenAI(model=MODEL_NAME, use_responses_api=True)
+    llm = ChatOpenAI(
        model=MODEL_NAME, use_responses_api=True, output_version="responses/v1"
    )
    response = await llm.ainvoke("how are ya", response_format=schema)
    parsed = json.loads(response.text())
    assert parsed == response.additional_kwargs["parsed"]
@ -280,7 +290,9 @@ def test_function_calling_and_structured_output() -> None:
        """return x * y"""
        return x * y
-    llm = ChatOpenAI(model=MODEL_NAME, use_responses_api=True)
+    llm = ChatOpenAI(
        model=MODEL_NAME, use_responses_api=True, output_version="responses/v1"
    )
    bound_llm = llm.bind_tools([multiply], response_format=Foo, strict=True)
    # Test structured output
    response = llm.invoke("how are ya", response_format=Foo)
@ -324,7 +336,9 @@ def test_reasoning(output_version: Literal["v0", "responses/v1"]) -> None:
 def test_stateful_api() -> None:
-    llm = ChatOpenAI(model=MODEL_NAME, use_responses_api=True)
+    llm = ChatOpenAI(
        model=MODEL_NAME, use_responses_api=True, output_version="responses/v1"
    )
    response = llm.invoke("how are you, my name is Bobo")
    assert "id" in response.response_metadata
@ -421,7 +435,9 @@ def test_stream_reasoning_summary(
@pytest.mark.vcr
 def test_code_interpreter() -> None:
-    llm = ChatOpenAI(model="o4-mini", use_responses_api=True)
+    llm = ChatOpenAI(
        model="o4-mini", use_responses_api=True, output_version="responses/v1"
    )
    llm_with_tools = llm.bind_tools(
        [{"type": "code_interpreter", "container": {"type": "auto"}}]
    )
@ -431,13 +447,16 @@ def test_code_interpreter() -> None:
    }
    response = llm_with_tools.invoke([input_message])
    _check_response(response)
-    tool_outputs = response.additional_kwargs["tool_outputs"]
+    tool_outputs = [
        block
        for block in response.content
        if isinstance(block, dict) and block.get("type") == "code_interpreter_call"
    ]
    assert tool_outputs
    assert any(output["type"] == "code_interpreter_call" for output in tool_outputs)
    # Test streaming
    # Use same container
    tool_outputs = response.additional_kwargs["tool_outputs"]
    assert len(tool_outputs) == 1
    container_id = tool_outputs[0]["container_id"]
    llm_with_tools = llm.bind_tools(
@ -449,7 +468,11 @@ def test_code_interpreter() -> None:
        assert isinstance(chunk, AIMessageChunk)
        full = chunk if full is None else full + chunk
    assert isinstance(full, AIMessageChunk)
-    tool_outputs = full.additional_kwargs["tool_outputs"]
+    tool_outputs = [
        block
        for block in full.content
        if isinstance(block, dict) and block.get("type") == "code_interpreter_call"
    ]
    assert tool_outputs
    assert any(output["type"] == "code_interpreter_call" for output in tool_outputs)
@ -460,7 +483,9 @@ def test_code_interpreter() -> None:
@pytest.mark.vcr
 def test_mcp_builtin() -> None:
-    llm = ChatOpenAI(model="o4-mini", use_responses_api=True)
+    llm = ChatOpenAI(
        model="o4-mini", use_responses_api=True, output_version="responses/v1"
    )
    llm_with_tools = llm.bind_tools(
        [
@ -489,8 +514,8 @@ def test_mcp_builtin() -> None:
                "approve": True,
                "approval_request_id": output["id"],
            }
-            for output in response.additional_kwargs["tool_outputs"]
+            for output in response.content
-            if output["type"] == "mcp_approval_request"
+            if isinstance(output, dict) and output.get("type") == "mcp_approval_request"
        ]
    )
    _ = llm_with_tools.invoke(
@ -549,7 +574,9 @@ def test_mcp_builtin_zdr() -> None:
@pytest.mark.vcr()
 def test_image_generation_streaming() -> None:
    """Test image generation streaming."""
-    llm = ChatOpenAI(model="gpt-4.1", use_responses_api=True)
+    llm = ChatOpenAI(
        model="gpt-4.1", use_responses_api=True, output_version="responses/v1"
    )
    tool = {
        "type": "image_generation",
        # For testing purposes let's keep the quality low, so the test runs faster.
@ -596,7 +623,13 @@ def test_image_generation_streaming() -> None:
    # At the moment, the streaming API does not pick up annotations fully.
    # So the following check is commented out.
    # _check_response(complete_ai_message)
-    tool_output = complete_ai_message.additional_kwargs["tool_outputs"][0]
+    tool_outputs = [
        block
        for block in complete_ai_message.content
        if isinstance(block, dict) and block.get("type") == "image_generation_call"
    ]
    assert len(tool_outputs) == 1
    tool_output = tool_outputs[0]
    assert set(tool_output.keys()).issubset(expected_keys)
@ -604,7 +637,9 @@ def test_image_generation_streaming() -> None:
 def test_image_generation_multi_turn() -> None:
    """Test multi-turn editing of image generation by passing in history."""
    # Test multi-turn
-    llm = ChatOpenAI(model="gpt-4.1", use_responses_api=True)
+    llm = ChatOpenAI(
        model="gpt-4.1", use_responses_api=True, output_version="responses/v1"
    )
    # Test invocation
    tool = {
        "type": "image_generation",
@ -621,7 +656,13 @@ def test_image_generation_multi_turn() -> None:
    ]
    ai_message = llm_with_tools.invoke(chat_history)
    _check_response(ai_message)
-    tool_output = ai_message.additional_kwargs["tool_outputs"][0]
+    tool_outputs = [
        block
        for block in ai_message.content
        if isinstance(block, dict) and block.get("type") == "image_generation_call"
    ]
    assert len(tool_outputs) == 1
    tool_output = tool_outputs[0]
    # Example tool output for an image
    # {
@ -670,10 +711,40 @@ def test_image_generation_multi_turn() -> None:
    ai_message2 = llm_with_tools.invoke(chat_history)
    _check_response(ai_message2)
-    tool_output2 = ai_message2.additional_kwargs["tool_outputs"][0]
+    tool_outputs2 = [
        block
        for block in ai_message2.content
        if isinstance(block, dict) and block.get("type") == "image_generation_call"
    ]
    assert len(tool_outputs2) == 1
    tool_output2 = tool_outputs2[0]
    assert set(tool_output2.keys()).issubset(expected_keys)
@pytest.mark.xfail(
    reason="verbosity parameter not yet supported by OpenAI Responses API"
 )
 def test_verbosity_parameter() -> None:
    """Test verbosity parameter with Responses API.
    TODO: This test is expected to fail until OpenAI enables verbosity support
    in the Responses API for available models. The parameter is properly implemented
    in the codebase but the API currently returns 'Unknown parameter: verbosity'.
    Remove @pytest.mark.xfail when OpenAI adds support.
    """
    llm = ChatOpenAI(
        model=MODEL_NAME,
        verbosity="medium",
        use_responses_api=True,
        output_version="responses/v1",
    )
    response = llm.invoke([HumanMessage(content="Hello, explain quantum computing.")])
    assert isinstance(response, AIMessage)
    assert response.content
    # When verbosity works, we expect the response to respect the verbosity level
@pytest.mark.vcr()
 def test_custom_tool() -> None:
    @custom_tool
--- a/libs/partners/openai/tests/unit_tests/chat_models/test_base.py
+++ b/libs/partners/openai/tests/unit_tests/chat_models/test_base.py
@ -874,8 +874,13 @@ def test_get_num_tokens_from_messages() -> None:
        ),
        ToolMessage("foobar", tool_call_id="foo"),
    ]
-    expected = 176
+    expected = 431  # Updated to match token count with mocked 100x100 image
-    actual = llm.get_num_tokens_from_messages(messages)
+
    # Mock _url_to_size to avoid PIL dependency in unit tests
    with patch("langchain_openai.chat_models.base._url_to_size") as mock_url_to_size:
        mock_url_to_size.return_value = (100, 100)  # 100x100 pixel image
        actual = llm.get_num_tokens_from_messages(messages)
    assert expected == actual
    # Test file inputs
@ -1131,6 +1136,73 @@ def test_init_o1() -> None:
    assert len(record) == 0
 def test_init_minimal_reasoning_effort() -> None:
    with pytest.warns(None) as record:  # type: ignore[call-overload]
        ChatOpenAI(model="gpt-5", reasoning_effort="minimal")
    assert len(record) == 0
@pytest.mark.parametrize("use_responses_api", [False, True])
@pytest.mark.parametrize("use_max_completion_tokens", [True, False])
 def test_minimal_reasoning_effort_payload(
    use_max_completion_tokens: bool, use_responses_api: bool
 ) -> None:
    """Test that minimal reasoning effort is included in request payload."""
    if use_max_completion_tokens:
        kwargs = {"max_completion_tokens": 100}
    else:
        kwargs = {"max_tokens": 100}
    init_kwargs: dict[str, Any] = {
        "model": "gpt-5",
        "reasoning_effort": "minimal",
        "use_responses_api": use_responses_api,
        **kwargs,
    }
    if use_responses_api:
        init_kwargs["output_version"] = "responses/v1"
    llm = ChatOpenAI(**init_kwargs)
    messages = [
        {"role": "developer", "content": "respond with just 'test'"},
        {"role": "user", "content": "hello"},
    ]
    payload = llm._get_request_payload(messages, stop=None)
    # When using responses API, reasoning_effort becomes reasoning.effort
    if use_responses_api:
        assert "reasoning" in payload
        assert payload["reasoning"]["effort"] == "minimal"
        # For responses API, tokens param becomes max_output_tokens
        assert payload["max_output_tokens"] == 100
    else:
        # For non-responses API, reasoning_effort remains as is
        assert payload["reasoning_effort"] == "minimal"
        if use_max_completion_tokens:
            assert payload["max_completion_tokens"] == 100
        else:
            # max_tokens gets converted to max_completion_tokens in non-responses API
            assert payload["max_completion_tokens"] == 100
 def test_verbosity_parameter_payload() -> None:
    """Test verbosity parameter is included in request payload for Responses API."""
    llm = ChatOpenAI(
        model="gpt-5",
        verbosity="high",
        use_responses_api=True,
        output_version="responses/v1",
    )
    messages = [{"role": "user", "content": "hello"}]
    payload = llm._get_request_payload(messages, stop=None)
    assert payload["verbosity"] == "high"
 def test_structured_output_old_model() -> None:
    class Output(TypedDict):
        """output."""
@ -2198,7 +2270,9 @@ def test__construct_responses_api_input_multiple_message_types() -> None:
    assert messages_copy == messages
    # Test dict messages
-    llm = ChatOpenAI(model="o4-mini", use_responses_api=True)
+    llm = ChatOpenAI(
        model="o4-mini", use_responses_api=True, output_version="responses/v1"
    )
    message_dicts: list = [
        {"role": "developer", "content": "This is a developer message."},
        {
@ -2239,7 +2313,9 @@ class FakeTracer(BaseTracer):
 def test_mcp_tracing() -> None:
    # Test we exclude sensitive information from traces
-    llm = ChatOpenAI(model="o4-mini", use_responses_api=True)
+    llm = ChatOpenAI(
        model="o4-mini", use_responses_api=True, output_version="responses/v1"
    )
    tracer = FakeTracer()
    mock_client = MagicMock()
@ -2430,7 +2506,9 @@ def test_get_last_messages() -> None:
 def test_get_request_payload_use_previous_response_id() -> None:
    # Default - don't use previous_response ID
-    llm = ChatOpenAI(model="o4-mini", use_responses_api=True)
+    llm = ChatOpenAI(
        model="o4-mini", use_responses_api=True, output_version="responses/v1"
    )
    messages = [
        HumanMessage("Hello"),
        AIMessage("Hi there!", response_metadata={"id": "resp_123"}),
--- a/libs/partners/openai/tests/unit_tests/chat_models/test_prompt_cache_key.py
+++ b/libs/partners/openai/tests/unit_tests/chat_models/test_prompt_cache_key.py
@ -71,7 +71,10 @@ def test_prompt_cache_key_model_kwargs() -> None:
 def test_prompt_cache_key_responses_api() -> None:
    """Test that prompt_cache_key works with Responses API."""
    chat = ChatOpenAI(
-        model="gpt-4o-mini", use_responses_api=True, max_completion_tokens=10
+        model="gpt-4o-mini",
        use_responses_api=True,
        output_version="responses/v1",
        max_completion_tokens=10,
    )
    messages = [HumanMessage("Hello")]
--- a/libs/partners/openai/tests/unit_tests/test_tools.py
+++ b/libs/partners/openai/tests/unit_tests/test_tools.py
@ -37,7 +37,9 @@ def test_custom_tool() -> None:
        """Do thing."""
        pass
-    llm = ChatOpenAI(model="gpt-4.1", use_responses_api=True).bind_tools([another_tool])
+    llm = ChatOpenAI(
        model="gpt-4.1", use_responses_api=True, output_version="responses/v1"
    ).bind_tools([another_tool])
    assert llm.kwargs == {  # type: ignore[attr-defined]
        "tools": [
            {
@ -49,7 +51,9 @@ def test_custom_tool() -> None:
        ]
    }
-    llm = ChatOpenAI(model="gpt-4.1", use_responses_api=True).bind_tools([my_tool])
+    llm = ChatOpenAI(
        model="gpt-4.1", use_responses_api=True, output_version="responses/v1"
    ).bind_tools([my_tool])
    assert llm.kwargs == {  # type: ignore[attr-defined]
        "tools": [{"type": "custom", "name": "my_tool", "description": "Do thing."}]
    }
--- a/libs/partners/openai/uv.lock
+++ b/libs/partners/openai/uv.lock
@ -1,5 +1,5 @@
 version = 1
-revision = 2
+revision = 3
 requires-python = ">=3.9"
 resolution-markers = [
    "python_full_version >= '3.13' and platform_python_implementation == 'PyPy'",
@ -480,7 +480,7 @@ wheels = [
 [[package]]
 name = "langchain-core"
-version = "0.3.73"
+version = "0.3.74"
 source = { editable = "../../core" }
 dependencies = [
    { name = "jsonpatch" },