feat(openai): minimal and verbosity (#32455)

2025-08-16 08:06:14 +00:00 · 2025-08-07 22:24:21 -04:00 · 2025-08-07 22:24:21 -04:00 · 00244122bd
commit 00244122bd
parent 6727d6e8c8
9 changed files with 354 additions and 55 deletions
--- a/libs/langchain/tests/unit_tests/chat_models/test_base.py
+++ b/libs/langchain/tests/unit_tests/chat_models/test_base.py
@ -68,6 +68,32 @@ def test_init_unknown_provider() -> None:
    clear=True,
 )
 def test_configurable() -> None:
+    """Test configurable chat model behavior without default parameters.
+
+    Verifies that a configurable chat model initialized without default parameters:
+    - Has access to all standard runnable methods (``invoke``, ``stream``, etc.)
+    - Blocks access to non-configurable methods until configuration is provided
+    - Supports declarative operations (``bind_tools``) without mutating original model
+    - Can chain declarative operations and configuration to access full functionality
+    - Properly resolves to the configured model type when parameters are provided
+
+    Example:
+
+    .. python::
+
+        # This creates a configurable model without specifying which model
+        model = init_chat_model()
+
+        # This will FAIL - no model specified yet
+        model.get_num_tokens("hello")  # AttributeError!
+
+        # This works - provides model at runtime
+        response = model.invoke(
+            "Hello",
+            config={"configurable": {"model": "gpt-4o"}}
+        )
+
+    """
    model = init_chat_model()

    for method in (
@ -125,6 +151,7 @@ def test_configurable() -> None:
            "presence_penalty": None,
            "reasoning": None,
            "reasoning_effort": None,
+            "verbosity": None,
            "frequency_penalty": None,
            "include": None,
            "seed": None,
@ -170,6 +197,32 @@ def test_configurable() -> None:
    clear=True,
 )
 def test_configurable_with_default() -> None:
+    """Test configurable chat model behavior with default parameters.
+
+    Verifies that a configurable chat model initialized with default parameters:
+    - Has access to all standard runnable methods (``invoke``, ``stream``, etc.)
+    - Provides immediate access to non-configurable methods (e.g. ``get_num_tokens``)
+    - Supports model switching through runtime configuration using ``config_prefix``
+    - Maintains proper model identity and attributes when reconfigured
+    - Can be used in chains with different model providers via configuration
+
+    Example:
+
+    .. python::
+
+        # This creates a configurable model with default parameters (model)
+        model = init_chat_model("gpt-4o", configurable_fields="any", config_prefix="bar")
+
+        # This works immediately - uses default gpt-4o
+        tokens = model.get_num_tokens("hello")
+
+        # This also works - switches to Claude at runtime
+        response = model.invoke(
+            "Hello",
+            config={"configurable": {"my_model_model": "claude-3-sonnet-20240229"}}
+        )
+
+    """  # noqa: E501
    model = init_chat_model("gpt-4o", configurable_fields="any", config_prefix="bar")
    for method in (
        "invoke",
--- a/libs/langchain_v1/tests/unit_tests/chat_models/test_chat_models.py
+++ b/libs/langchain_v1/tests/unit_tests/chat_models/test_chat_models.py
@ -68,6 +68,32 @@ def test_init_unknown_provider() -> None:
    clear=True,
 )
 def test_configurable() -> None:
+    """Test configurable chat model behavior without default parameters.
+
+    Verifies that a configurable chat model initialized without default parameters:
+    - Has access to all standard runnable methods (``invoke``, ``stream``, etc.)
+    - Blocks access to non-configurable methods until configuration is provided
+    - Supports declarative operations (``bind_tools``) without mutating original model
+    - Can chain declarative operations and configuration to access full functionality
+    - Properly resolves to the configured model type when parameters are provided
+
+    Example:
+
+    .. python::
+
+        # This creates a configurable model without specifying which model
+        model = init_chat_model()
+
+        # This will FAIL - no model specified yet
+        model.get_num_tokens("hello")  # AttributeError!
+
+        # This works - provides model at runtime
+        response = model.invoke(
+            "Hello",
+            config={"configurable": {"model": "gpt-4o"}}
+        )
+
+    """
    model = init_chat_model()

    for method in (
@ -125,6 +151,7 @@ def test_configurable() -> None:
            "presence_penalty": None,
            "reasoning": None,
            "reasoning_effort": None,
+            "verbosity": None,
            "frequency_penalty": None,
            "include": None,
            "seed": None,
@ -170,6 +197,32 @@ def test_configurable() -> None:
    clear=True,
 )
 def test_configurable_with_default() -> None:
+    """Test configurable chat model behavior with default parameters.
+
+    Verifies that a configurable chat model initialized with default parameters:
+    - Has access to all standard runnable methods (``invoke``, ``stream``, etc.)
+    - Provides immediate access to non-configurable methods (e.g. ``get_num_tokens``)
+    - Supports model switching through runtime configuration using ``config_prefix``
+    - Maintains proper model identity and attributes when reconfigured
+    - Can be used in chains with different model providers via configuration
+
+    Example:
+
+    .. python::
+
+        # This creates a configurable model with default parameters (model)
+        model = init_chat_model("gpt-4o", configurable_fields="any", config_prefix="bar")
+
+        # This works immediately - uses default gpt-4o
+        tokens = model.get_num_tokens("hello")
+
+        # This also works - switches to Claude at runtime
+        response = model.invoke(
+            "Hello",
+            config={"configurable": {"my_model_model": "claude-3-sonnet-20240229"}}
+        )
+
+    """  # noqa: E501
    model = init_chat_model("gpt-4o", configurable_fields="any", config_prefix="bar")
    for method in (
        "invoke",
--- a/libs/partners/openai/langchain_openai/chat_models/base.py
+++ b/libs/partners/openai/langchain_openai/chat_models/base.py
@ -458,8 +458,7 @@ class BaseChatOpenAI(BaseChatModel):
        alias="api_key", default_factory=secret_from_env("OPENAI_API_KEY", default=None)
    )
    openai_api_base: Optional[str] = Field(default=None, alias="base_url")
-    """Base URL path for API requests, leave blank if not using a proxy or service
-        emulator."""
+    """Base URL path for API requests, leave blank if not using a proxy or service emulator."""  # noqa: E501
    openai_organization: Optional[str] = Field(default=None, alias="organization")
    """Automatically inferred from env var ``OPENAI_ORG_ID`` if not provided."""
    # to support explicit proxy for OpenAI
@ -507,8 +506,9 @@ class BaseChatOpenAI(BaseChatModel):

    Reasoning models only, like OpenAI o1, o3, and o4-mini.

-    Currently supported values are low, medium, and high. Reducing reasoning effort
-    can result in faster responses and fewer tokens used on reasoning in a response.
+    Currently supported values are ``'minimal'``, ``'low'``, ``'medium'``, and
+    ``'high'``. Reducing reasoning effort can result in faster responses and fewer
+    tokens used on reasoning in a response.

    .. versionadded:: 0.2.14
    """
@ -527,6 +527,17 @@ class BaseChatOpenAI(BaseChatModel):

    .. versionadded:: 0.3.24

+    """
+    verbosity: Optional[str] = None
+    """Controls the verbosity level of responses for reasoning models. For use with the
+    Responses API.
+
+    Currently supported values are ``'low'``, ``'medium'``, and ``'high'``.
+
+    Controls how detailed the model's responses are.
+
+    .. versionadded:: 0.3.28
+
    """
    tiktoken_model_name: Optional[str] = None
    """The model name to pass to tiktoken when using this class.
@ -654,6 +665,7 @@ class BaseChatOpenAI(BaseChatModel):
        llm = ChatOpenAI(
            model="o4-mini",
            use_responses_api=True,
+            output_version="responses/v1",
        )
        llm.invoke([HumanMessage("How are you?")], previous_response_id="resp_123")

@ -701,10 +713,24 @@ class BaseChatOpenAI(BaseChatModel):
    @model_validator(mode="before")
    @classmethod
    def validate_temperature(cls, values: dict[str, Any]) -> Any:
-        """Currently o1 models only allow temperature=1."""
+        """Validate temperature parameter for different models.
+
+        - o1 models only allow temperature=1
+        - gpt-5 models only allow temperature=1 or unset (defaults to 1)
+        """
        model = values.get("model_name") or values.get("model") or ""
+
+        # For o1 models, set temperature=1 if not provided
        if model.startswith("o1") and "temperature" not in values:
            values["temperature"] = 1
+
+        # For gpt-5 models, handle temperature restrictions
+        if model.startswith("gpt-5"):
+            temperature = values.get("temperature")
+            if temperature is not None and temperature != 1:
+                # For gpt-5, only temperature=1 is supported, so remove non-defaults
+                values.pop("temperature", None)
+
        return values

    @model_validator(mode="after")
@ -805,6 +831,7 @@ class BaseChatOpenAI(BaseChatModel):
            "temperature": self.temperature,
            "reasoning_effort": self.reasoning_effort,
            "reasoning": self.reasoning,
+            "verbosity": self.verbosity,
            "include": self.include,
            "service_tier": self.service_tier,
            "truncation": self.truncation,
@ -1178,6 +1205,7 @@ class BaseChatOpenAI(BaseChatModel):
            kwargs["stop"] = stop

        payload = {**self._default_params, **kwargs}
+
        if self._use_responses_api(payload):
            if self.use_previous_response_id:
                last_messages, previous_response_id = _get_last_messages(messages)
@ -2366,7 +2394,11 @@ class ChatOpenAI(BaseChatOpenAI):  # type: ignore[override]

            from langchain_openai import ChatOpenAI

-            llm = ChatOpenAI(model="gpt-4.1-mini", use_responses_api=True)
+            llm = ChatOpenAI(
+                model="gpt-4.1-mini",
+                use_responses_api=True,
+                output_version="responses/v1",
+            )
            response = llm.invoke("Hi, I'm Bob.")
            response.text()

@ -3486,6 +3518,11 @@ def _construct_responses_api_payload(
    if "reasoning_effort" in payload and "reasoning" not in payload:
        payload["reasoning"] = {"effort": payload.pop("reasoning_effort")}

+    # Remove temperature parameter for models that don't support it in responses API
+    model = payload.get("model", "")
+    if model.startswith("gpt-5"):
+        payload.pop("temperature", None)
+
    payload["input"] = _construct_responses_api_input(messages)
    if tools := payload.pop("tools", None):
        new_tools: list = []
--- a/libs/partners/openai/tests/integration_tests/chat_models/test_base.py
+++ b/libs/partners/openai/tests/integration_tests/chat_models/test_base.py
@ -32,7 +32,7 @@ from pydantic import BaseModel, Field
 from langchain_openai import ChatOpenAI
 from tests.unit_tests.fake.callbacks import FakeCallbackHandler

-MAX_TOKEN_COUNT = 16
+MAX_TOKEN_COUNT = 100


@pytest.mark.scheduled
@ -219,7 +219,7 @@ async def test_openai_abatch_tags(use_responses_api: bool) -> None:
 def test_openai_invoke() -> None:
    """Test invoke tokens from ChatOpenAI."""
    llm = ChatOpenAI(
-        model="o4-mini",
+        model="gpt-5-nano",
        service_tier="flex",  # Also test service_tier
        max_retries=3,  # Add retries for 503 capacity errors
    )
@ -418,7 +418,7 @@ class MakeASandwich(BaseModel):


 def test_tool_use() -> None:
-    llm = ChatOpenAI(model="gpt-4-turbo", temperature=0)
+    llm = ChatOpenAI(model="gpt-5-nano", temperature=0)
    llm_with_tool = llm.bind_tools(tools=[GenerateUsername], tool_choice=True)
    msgs: list = [HumanMessage("Sally has green hair, what would her username be?")]
    ai_msg = llm_with_tool.invoke(msgs)
@ -462,7 +462,7 @@ def test_tool_use() -> None:
 def test_manual_tool_call_msg(use_responses_api: bool) -> None:
    """Test passing in manually construct tool call message."""
    llm = ChatOpenAI(
-        model="gpt-3.5-turbo-0125", temperature=0, use_responses_api=use_responses_api
+        model="gpt-5-nano", temperature=0, use_responses_api=use_responses_api
    )
    llm_with_tool = llm.bind_tools(tools=[GenerateUsername])
    msgs: list = [
@ -508,7 +508,7 @@ def test_manual_tool_call_msg(use_responses_api: bool) -> None:
 def test_bind_tools_tool_choice(use_responses_api: bool) -> None:
    """Test passing in manually construct tool call message."""
    llm = ChatOpenAI(
-        model="gpt-3.5-turbo-0125", temperature=0, use_responses_api=use_responses_api
+        model="gpt-5-nano", temperature=0, use_responses_api=use_responses_api
    )
    for tool_choice in ("any", "required"):
        llm_with_tools = llm.bind_tools(
@ -523,7 +523,7 @@ def test_bind_tools_tool_choice(use_responses_api: bool) -> None:


 def test_disable_parallel_tool_calling() -> None:
-    llm = ChatOpenAI(model="gpt-4o-mini")
+    llm = ChatOpenAI(model="gpt-5-nano")
    llm_with_tools = llm.bind_tools([GenerateUsername], parallel_tool_calls=False)
    result = llm_with_tools.invoke(
        "Use the GenerateUsername tool to generate user names for:\n\n"
@ -534,7 +534,7 @@ def test_disable_parallel_tool_calling() -> None:
    assert len(result.tool_calls) == 1


-@pytest.mark.parametrize("model", ["gpt-4o-mini", "o1", "gpt-4"])
+@pytest.mark.parametrize("model", ["gpt-4o-mini", "o1", "gpt-4", "gpt-5-nano"])
 def test_openai_structured_output(model: str) -> None:
    class MyModel(BaseModel):
        """A Person"""
@ -694,7 +694,7 @@ def test_tool_calling_strict(use_responses_api: bool) -> None:
        input: Optional[int] = Field(default=None)

    model = ChatOpenAI(
-        model="gpt-4.1", temperature=0, use_responses_api=use_responses_api
+        model="gpt-5-nano", temperature=0, use_responses_api=use_responses_api
    )
    # N.B. magic_function adds metadata to schema (min/max for number fields)
    model_with_tools = model.bind_tools([magic_function], strict=True)
@ -818,7 +818,7 @@ def test_json_schema_openai_format(
    strict: bool, method: Literal["json_schema", "function_calling"]
 ) -> None:
    """Test we can pass in OpenAI schema format specifying strict."""
-    llm = ChatOpenAI(model="gpt-4o-mini")
+    llm = ChatOpenAI(model="gpt-5-nano")
    schema = {
        "name": "get_weather",
        "description": "Fetches the weather in the given location",
@ -939,7 +939,7 @@ def test_prediction_tokens() -> None:
    """
    )

-    llm = ChatOpenAI(model="gpt-4o")
+    llm = ChatOpenAI(model="gpt-4.1-nano")
    query = (
        "Replace the Username property with an Email property. "
        "Respond only with code, and with no markdown formatting."
@ -981,7 +981,7 @@ class Foo(BaseModel):
 def test_stream_response_format() -> None:
    full: Optional[BaseMessageChunk] = None
    chunks = []
-    for chunk in ChatOpenAI(model="gpt-4o-mini").stream(
+    for chunk in ChatOpenAI(model="gpt-5-nano").stream(
        "how are ya", response_format=Foo
    ):
        chunks.append(chunk)
@ -998,7 +998,7 @@ def test_stream_response_format() -> None:
 async def test_astream_response_format() -> None:
    full: Optional[BaseMessageChunk] = None
    chunks = []
-    async for chunk in ChatOpenAI(model="gpt-4o-mini").astream(
+    async for chunk in ChatOpenAI(model="gpt-5-nano").astream(
        "how are ya", response_format=Foo
    ):
        chunks.append(chunk)
@ -1042,7 +1042,7 @@ def test_o1_stream_default_works() -> None:


 def test_multi_party_conversation() -> None:
-    llm = ChatOpenAI(model="gpt-4o")
+    llm = ChatOpenAI(model="gpt-5-nano")
    messages = [
        HumanMessage("Hi, I have black hair.", name="Alice"),
        HumanMessage("Hi, I have brown hair.", name="Bob"),
@ -1057,7 +1057,7 @@ def test_structured_output_and_tools() -> None:
        response: str
        explanation: str

-    llm = ChatOpenAI(model="gpt-4o-mini").bind_tools(
+    llm = ChatOpenAI(model="gpt-5-nano").bind_tools(
        [GenerateUsername], strict=True, response_format=ResponseFormat
    )

@ -1082,7 +1082,7 @@ def test_tools_and_structured_output() -> None:
        response: str
        explanation: str

-    llm = ChatOpenAI(model="gpt-4o-mini").with_structured_output(
+    llm = ChatOpenAI(model="gpt-5-nano").with_structured_output(
        ResponseFormat, strict=True, include_raw=True, tools=[GenerateUsername]
    )

@ -1114,8 +1114,8 @@ def test_tools_and_structured_output() -> None:

@pytest.mark.scheduled
 def test_prompt_cache_key_invoke() -> None:
-    """Test that prompt_cache_key works with invoke calls."""
-    chat = ChatOpenAI(model="gpt-4o-mini", max_completion_tokens=20)
+    """Test that `prompt_cache_key` works with invoke calls."""
+    chat = ChatOpenAI(model="gpt-5-nano", max_completion_tokens=500)
    messages = [HumanMessage("Say hello")]

    # Test that invoke works with prompt_cache_key parameter
@ -1135,18 +1135,18 @@ def test_prompt_cache_key_invoke() -> None:

@pytest.mark.scheduled
 def test_prompt_cache_key_usage_methods_integration() -> None:
-    """Integration test for prompt_cache_key usage methods."""
+    """Integration test for `prompt_cache_key` usage methods."""
    messages = [HumanMessage("Say hi")]

    # Test keyword argument method
-    chat = ChatOpenAI(model="gpt-4o-mini", max_completion_tokens=10)
+    chat = ChatOpenAI(model="gpt-5-nano", max_completion_tokens=10)
    response = chat.invoke(messages, prompt_cache_key="integration-test-v1")
    assert isinstance(response, AIMessage)
    assert isinstance(response.content, str)

    # Test model-level via model_kwargs
    chat_model_level = ChatOpenAI(
-        model="gpt-4o-mini",
+        model="gpt-5-nano",
        max_completion_tokens=10,
        model_kwargs={"prompt_cache_key": "integration-model-level-v1"},
    )
--- a/libs/partners/openai/tests/integration_tests/chat_models/test_responses_api.py
+++ b/libs/partners/openai/tests/integration_tests/chat_models/test_responses_api.py
@ -175,7 +175,9 @@ class FooDict(TypedDict):


 def test_parsed_pydantic_schema() -> None:
-    llm = ChatOpenAI(model=MODEL_NAME, use_responses_api=True)
+    llm = ChatOpenAI(
+        model=MODEL_NAME, use_responses_api=True, output_version="responses/v1"
+    )
    response = llm.invoke("how are ya", response_format=Foo)
    parsed = Foo(**json.loads(response.text()))
    assert parsed == response.additional_kwargs["parsed"]
@ -193,7 +195,9 @@ def test_parsed_pydantic_schema() -> None:


 async def test_parsed_pydantic_schema_async() -> None:
-    llm = ChatOpenAI(model=MODEL_NAME, use_responses_api=True)
+    llm = ChatOpenAI(
+        model=MODEL_NAME, use_responses_api=True, output_version="responses/v1"
+    )
    response = await llm.ainvoke("how are ya", response_format=Foo)
    parsed = Foo(**json.loads(response.text()))
    assert parsed == response.additional_kwargs["parsed"]
@ -213,7 +217,9 @@ async def test_parsed_pydantic_schema_async() -> None:
@pytest.mark.flaky(retries=3, delay=1)
@pytest.mark.parametrize("schema", [Foo.model_json_schema(), FooDict])
 def test_parsed_dict_schema(schema: Any) -> None:
-    llm = ChatOpenAI(model=MODEL_NAME, use_responses_api=True)
+    llm = ChatOpenAI(
+        model=MODEL_NAME, use_responses_api=True, output_version="responses/v1"
+    )
    response = llm.invoke("how are ya", response_format=schema)
    parsed = json.loads(response.text())
    assert parsed == response.additional_kwargs["parsed"]
@ -231,7 +237,9 @@ def test_parsed_dict_schema(schema: Any) -> None:


 def test_parsed_strict() -> None:
-    llm = ChatOpenAI(model=MODEL_NAME, use_responses_api=True)
+    llm = ChatOpenAI(
+        model=MODEL_NAME, use_responses_api=True, output_version="responses/v1"
+    )

    class InvalidJoke(TypedDict):
        setup: Annotated[str, ..., "The setup of the joke"]
@ -258,7 +266,9 @@ def test_parsed_strict() -> None:
@pytest.mark.flaky(retries=3, delay=1)
@pytest.mark.parametrize("schema", [Foo.model_json_schema(), FooDict])
 async def test_parsed_dict_schema_async(schema: Any) -> None:
-    llm = ChatOpenAI(model=MODEL_NAME, use_responses_api=True)
+    llm = ChatOpenAI(
+        model=MODEL_NAME, use_responses_api=True, output_version="responses/v1"
+    )
    response = await llm.ainvoke("how are ya", response_format=schema)
    parsed = json.loads(response.text())
    assert parsed == response.additional_kwargs["parsed"]
@ -280,7 +290,9 @@ def test_function_calling_and_structured_output() -> None:
        """return x * y"""
        return x * y

-    llm = ChatOpenAI(model=MODEL_NAME, use_responses_api=True)
+    llm = ChatOpenAI(
+        model=MODEL_NAME, use_responses_api=True, output_version="responses/v1"
+    )
    bound_llm = llm.bind_tools([multiply], response_format=Foo, strict=True)
    # Test structured output
    response = llm.invoke("how are ya", response_format=Foo)
@ -324,7 +336,9 @@ def test_reasoning(output_version: Literal["v0", "responses/v1"]) -> None:


 def test_stateful_api() -> None:
-    llm = ChatOpenAI(model=MODEL_NAME, use_responses_api=True)
+    llm = ChatOpenAI(
+        model=MODEL_NAME, use_responses_api=True, output_version="responses/v1"
+    )
    response = llm.invoke("how are you, my name is Bobo")
    assert "id" in response.response_metadata

@ -421,7 +435,9 @@ def test_stream_reasoning_summary(

@pytest.mark.vcr
 def test_code_interpreter() -> None:
-    llm = ChatOpenAI(model="o4-mini", use_responses_api=True)
+    llm = ChatOpenAI(
+        model="o4-mini", use_responses_api=True, output_version="responses/v1"
+    )
    llm_with_tools = llm.bind_tools(
        [{"type": "code_interpreter", "container": {"type": "auto"}}]
    )
@ -431,13 +447,16 @@ def test_code_interpreter() -> None:
    }
    response = llm_with_tools.invoke([input_message])
    _check_response(response)
-    tool_outputs = response.additional_kwargs["tool_outputs"]
+    tool_outputs = [
+        block
+        for block in response.content
+        if isinstance(block, dict) and block.get("type") == "code_interpreter_call"
+    ]
    assert tool_outputs
    assert any(output["type"] == "code_interpreter_call" for output in tool_outputs)

    # Test streaming
    # Use same container
-    tool_outputs = response.additional_kwargs["tool_outputs"]
    assert len(tool_outputs) == 1
    container_id = tool_outputs[0]["container_id"]
    llm_with_tools = llm.bind_tools(
@ -449,7 +468,11 @@ def test_code_interpreter() -> None:
        assert isinstance(chunk, AIMessageChunk)
        full = chunk if full is None else full + chunk
    assert isinstance(full, AIMessageChunk)
-    tool_outputs = full.additional_kwargs["tool_outputs"]
+    tool_outputs = [
+        block
+        for block in full.content
+        if isinstance(block, dict) and block.get("type") == "code_interpreter_call"
+    ]
    assert tool_outputs
    assert any(output["type"] == "code_interpreter_call" for output in tool_outputs)

@ -460,7 +483,9 @@ def test_code_interpreter() -> None:

@pytest.mark.vcr
 def test_mcp_builtin() -> None:
-    llm = ChatOpenAI(model="o4-mini", use_responses_api=True)
+    llm = ChatOpenAI(
+        model="o4-mini", use_responses_api=True, output_version="responses/v1"
+    )

    llm_with_tools = llm.bind_tools(
        [
@ -489,8 +514,8 @@ def test_mcp_builtin() -> None:
                "approve": True,
                "approval_request_id": output["id"],
            }
-            for output in response.additional_kwargs["tool_outputs"]
-            if output["type"] == "mcp_approval_request"
+            for output in response.content
+            if isinstance(output, dict) and output.get("type") == "mcp_approval_request"
        ]
    )
    _ = llm_with_tools.invoke(
@ -549,7 +574,9 @@ def test_mcp_builtin_zdr() -> None:
@pytest.mark.vcr()
 def test_image_generation_streaming() -> None:
    """Test image generation streaming."""
-    llm = ChatOpenAI(model="gpt-4.1", use_responses_api=True)
+    llm = ChatOpenAI(
+        model="gpt-4.1", use_responses_api=True, output_version="responses/v1"
+    )
    tool = {
        "type": "image_generation",
        # For testing purposes let's keep the quality low, so the test runs faster.
@ -596,7 +623,13 @@ def test_image_generation_streaming() -> None:
    # At the moment, the streaming API does not pick up annotations fully.
    # So the following check is commented out.
    # _check_response(complete_ai_message)
-    tool_output = complete_ai_message.additional_kwargs["tool_outputs"][0]
+    tool_outputs = [
+        block
+        for block in complete_ai_message.content
+        if isinstance(block, dict) and block.get("type") == "image_generation_call"
+    ]
+    assert len(tool_outputs) == 1
+    tool_output = tool_outputs[0]
    assert set(tool_output.keys()).issubset(expected_keys)


@ -604,7 +637,9 @@ def test_image_generation_streaming() -> None:
 def test_image_generation_multi_turn() -> None:
    """Test multi-turn editing of image generation by passing in history."""
    # Test multi-turn
-    llm = ChatOpenAI(model="gpt-4.1", use_responses_api=True)
+    llm = ChatOpenAI(
+        model="gpt-4.1", use_responses_api=True, output_version="responses/v1"
+    )
    # Test invocation
    tool = {
        "type": "image_generation",
@ -621,7 +656,13 @@ def test_image_generation_multi_turn() -> None:
    ]
    ai_message = llm_with_tools.invoke(chat_history)
    _check_response(ai_message)
-    tool_output = ai_message.additional_kwargs["tool_outputs"][0]
+    tool_outputs = [
+        block
+        for block in ai_message.content
+        if isinstance(block, dict) and block.get("type") == "image_generation_call"
+    ]
+    assert len(tool_outputs) == 1
+    tool_output = tool_outputs[0]

    # Example tool output for an image
    # {
@ -670,10 +711,40 @@ def test_image_generation_multi_turn() -> None:

    ai_message2 = llm_with_tools.invoke(chat_history)
    _check_response(ai_message2)
-    tool_output2 = ai_message2.additional_kwargs["tool_outputs"][0]
+    tool_outputs2 = [
+        block
+        for block in ai_message2.content
+        if isinstance(block, dict) and block.get("type") == "image_generation_call"
+    ]
+    assert len(tool_outputs2) == 1
+    tool_output2 = tool_outputs2[0]
    assert set(tool_output2.keys()).issubset(expected_keys)


+@pytest.mark.xfail(
+    reason="verbosity parameter not yet supported by OpenAI Responses API"
+)
+def test_verbosity_parameter() -> None:
+    """Test verbosity parameter with Responses API.
+
+    TODO: This test is expected to fail until OpenAI enables verbosity support
+    in the Responses API for available models. The parameter is properly implemented
+    in the codebase but the API currently returns 'Unknown parameter: verbosity'.
+    Remove @pytest.mark.xfail when OpenAI adds support.
+    """
+    llm = ChatOpenAI(
+        model=MODEL_NAME,
+        verbosity="medium",
+        use_responses_api=True,
+        output_version="responses/v1",
+    )
+    response = llm.invoke([HumanMessage(content="Hello, explain quantum computing.")])
+
+    assert isinstance(response, AIMessage)
+    assert response.content
+    # When verbosity works, we expect the response to respect the verbosity level
+
+
@pytest.mark.vcr()
 def test_custom_tool() -> None:
    @custom_tool
--- a/libs/partners/openai/tests/unit_tests/chat_models/test_base.py
+++ b/libs/partners/openai/tests/unit_tests/chat_models/test_base.py
@ -874,8 +874,13 @@ def test_get_num_tokens_from_messages() -> None:
        ),
        ToolMessage("foobar", tool_call_id="foo"),
    ]
-    expected = 176
-    actual = llm.get_num_tokens_from_messages(messages)
+    expected = 431  # Updated to match token count with mocked 100x100 image
+
+    # Mock _url_to_size to avoid PIL dependency in unit tests
+    with patch("langchain_openai.chat_models.base._url_to_size") as mock_url_to_size:
+        mock_url_to_size.return_value = (100, 100)  # 100x100 pixel image
+        actual = llm.get_num_tokens_from_messages(messages)
+
    assert expected == actual

    # Test file inputs
@ -1131,6 +1136,73 @@ def test_init_o1() -> None:
    assert len(record) == 0


+def test_init_minimal_reasoning_effort() -> None:
+    with pytest.warns(None) as record:  # type: ignore[call-overload]
+        ChatOpenAI(model="gpt-5", reasoning_effort="minimal")
+    assert len(record) == 0
+
+
+@pytest.mark.parametrize("use_responses_api", [False, True])
+@pytest.mark.parametrize("use_max_completion_tokens", [True, False])
+def test_minimal_reasoning_effort_payload(
+    use_max_completion_tokens: bool, use_responses_api: bool
+) -> None:
+    """Test that minimal reasoning effort is included in request payload."""
+    if use_max_completion_tokens:
+        kwargs = {"max_completion_tokens": 100}
+    else:
+        kwargs = {"max_tokens": 100}
+
+    init_kwargs: dict[str, Any] = {
+        "model": "gpt-5",
+        "reasoning_effort": "minimal",
+        "use_responses_api": use_responses_api,
+        **kwargs,
+    }
+
+    if use_responses_api:
+        init_kwargs["output_version"] = "responses/v1"
+
+    llm = ChatOpenAI(**init_kwargs)
+
+    messages = [
+        {"role": "developer", "content": "respond with just 'test'"},
+        {"role": "user", "content": "hello"},
+    ]
+
+    payload = llm._get_request_payload(messages, stop=None)
+
+    # When using responses API, reasoning_effort becomes reasoning.effort
+    if use_responses_api:
+        assert "reasoning" in payload
+        assert payload["reasoning"]["effort"] == "minimal"
+        # For responses API, tokens param becomes max_output_tokens
+        assert payload["max_output_tokens"] == 100
+    else:
+        # For non-responses API, reasoning_effort remains as is
+        assert payload["reasoning_effort"] == "minimal"
+        if use_max_completion_tokens:
+            assert payload["max_completion_tokens"] == 100
+        else:
+            # max_tokens gets converted to max_completion_tokens in non-responses API
+            assert payload["max_completion_tokens"] == 100
+
+
+def test_verbosity_parameter_payload() -> None:
+    """Test verbosity parameter is included in request payload for Responses API."""
+    llm = ChatOpenAI(
+        model="gpt-5",
+        verbosity="high",
+        use_responses_api=True,
+        output_version="responses/v1",
+    )
+
+    messages = [{"role": "user", "content": "hello"}]
+    payload = llm._get_request_payload(messages, stop=None)
+
+    assert payload["verbosity"] == "high"
+
+
 def test_structured_output_old_model() -> None:
    class Output(TypedDict):
        """output."""
@ -2198,7 +2270,9 @@ def test__construct_responses_api_input_multiple_message_types() -> None:
    assert messages_copy == messages

    # Test dict messages
-    llm = ChatOpenAI(model="o4-mini", use_responses_api=True)
+    llm = ChatOpenAI(
+        model="o4-mini", use_responses_api=True, output_version="responses/v1"
+    )
    message_dicts: list = [
        {"role": "developer", "content": "This is a developer message."},
        {
@ -2239,7 +2313,9 @@ class FakeTracer(BaseTracer):

 def test_mcp_tracing() -> None:
    # Test we exclude sensitive information from traces
-    llm = ChatOpenAI(model="o4-mini", use_responses_api=True)
+    llm = ChatOpenAI(
+        model="o4-mini", use_responses_api=True, output_version="responses/v1"
+    )

    tracer = FakeTracer()
    mock_client = MagicMock()
@ -2430,7 +2506,9 @@ def test_get_last_messages() -> None:

 def test_get_request_payload_use_previous_response_id() -> None:
    # Default - don't use previous_response ID
-    llm = ChatOpenAI(model="o4-mini", use_responses_api=True)
+    llm = ChatOpenAI(
+        model="o4-mini", use_responses_api=True, output_version="responses/v1"
+    )
    messages = [
        HumanMessage("Hello"),
        AIMessage("Hi there!", response_metadata={"id": "resp_123"}),
--- a/libs/partners/openai/tests/unit_tests/chat_models/test_prompt_cache_key.py
+++ b/libs/partners/openai/tests/unit_tests/chat_models/test_prompt_cache_key.py
@ -71,7 +71,10 @@ def test_prompt_cache_key_model_kwargs() -> None:
 def test_prompt_cache_key_responses_api() -> None:
    """Test that prompt_cache_key works with Responses API."""
    chat = ChatOpenAI(
-        model="gpt-4o-mini", use_responses_api=True, max_completion_tokens=10
+        model="gpt-4o-mini",
+        use_responses_api=True,
+        output_version="responses/v1",
+        max_completion_tokens=10,
    )

    messages = [HumanMessage("Hello")]
--- a/libs/partners/openai/tests/unit_tests/test_tools.py
+++ b/libs/partners/openai/tests/unit_tests/test_tools.py
@ -37,7 +37,9 @@ def test_custom_tool() -> None:
        """Do thing."""
        pass

-    llm = ChatOpenAI(model="gpt-4.1", use_responses_api=True).bind_tools([another_tool])
+    llm = ChatOpenAI(
+        model="gpt-4.1", use_responses_api=True, output_version="responses/v1"
+    ).bind_tools([another_tool])
    assert llm.kwargs == {  # type: ignore[attr-defined]
        "tools": [
            {
@ -49,7 +51,9 @@ def test_custom_tool() -> None:
        ]
    }

-    llm = ChatOpenAI(model="gpt-4.1", use_responses_api=True).bind_tools([my_tool])
+    llm = ChatOpenAI(
+        model="gpt-4.1", use_responses_api=True, output_version="responses/v1"
+    ).bind_tools([my_tool])
    assert llm.kwargs == {  # type: ignore[attr-defined]
        "tools": [{"type": "custom", "name": "my_tool", "description": "Do thing."}]
    }
--- a/libs/partners/openai/uv.lock
+++ b/libs/partners/openai/uv.lock
@ -1,5 +1,5 @@
 version = 1
-revision = 2
+revision = 3
 requires-python = ">=3.9"
 resolution-markers = [
    "python_full_version >= '3.13' and platform_python_implementation == 'PyPy'",
@ -480,7 +480,7 @@ wheels = [

 [[package]]
 name = "langchain-core"
-version = "0.3.73"
+version = "0.3.74"
 source = { editable = "../../core" }
 dependencies = [
    { name = "jsonpatch" },