From ec2b34a02ddf3b6ac7a9ce4b632d3c7b2d61657c Mon Sep 17 00:00:00 2001
From: ccurme <chester.curme@gmail.com>
Date: Thu, 7 Aug 2025 17:30:01 -0300
Subject: [PATCH 1/5] feat(openai): custom tools (#32449)

---
 docs/docs/integrations/chat/openai.ipynb      | 157 ++++++++++++++++++
 libs/core/langchain_core/tools/base.py        |   9 +-
 .../langchain_core/utils/function_calling.py  |  11 ++
 .../openai/langchain_openai/__init__.py       |   2 +
 .../langchain_openai/chat_models/base.py      |  49 +++++-
 .../openai/langchain_openai/tools/__init__.py |   3 +
 .../langchain_openai/tools/custom_tool.py     | 109 ++++++++++++
 .../tests/cassettes/test_custom_tool.yaml.gz  | Bin 0 -> 4367 bytes
 .../chat_models/test_responses_api.py         |  31 +++-
 .../openai/tests/unit_tests/test_imports.py   |   1 +
 .../openai/tests/unit_tests/test_tools.py     | 120 +++++++++++++
 11 files changed, 488 insertions(+), 4 deletions(-)
 create mode 100644 libs/partners/openai/langchain_openai/tools/__init__.py
 create mode 100644 libs/partners/openai/langchain_openai/tools/custom_tool.py
 create mode 100644 libs/partners/openai/tests/cassettes/test_custom_tool.yaml.gz
 create mode 100644 libs/partners/openai/tests/unit_tests/test_tools.py

diff --git a/docs/docs/integrations/chat/openai.ipynb b/docs/docs/integrations/chat/openai.ipynb
index 81b0684712a..54db230ba95 100644
--- a/docs/docs/integrations/chat/openai.ipynb
+++ b/docs/docs/integrations/chat/openai.ipynb
@@ -447,6 +447,163 @@
     ")"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "id": "c5d9d19d-8ab1-4d9d-b3a0-56ee4e89c528",
+   "metadata": {},
+   "source": [
+    "### Custom tools\n",
+    "\n",
+    ":::info Requires ``langchain-openai>=0.3.29``\n",
+    "\n",
+    ":::\n",
+    "\n",
+    "[Custom tools](https://platform.openai.com/docs/guides/function-calling#custom-tools) support tools with arbitrary string inputs. They can be particularly useful when you expect your string arguments to be long or complex."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "a47c809b-852f-46bd-8b9e-d9534c17213d",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "================================\u001b[1m Human Message \u001b[0m=================================\n",
+      "\n",
+      "Use the tool to calculate 3^3.\n",
+      "==================================\u001b[1m Ai Message \u001b[0m==================================\n",
+      "\n",
+      "[{'id': 'rs_6894ff5747c0819d9b02fc5645b0be9c000169fd9fb68d99', 'summary': [], 'type': 'reasoning'}, {'call_id': 'call_7SYwMSQPbbEqFcKlKOpXeEux', 'input': 'print(3**3)', 'name': 'execute_code', 'type': 'custom_tool_call', 'id': 'ctc_6894ff5b9f54819d8155a63638d34103000169fd9fb68d99', 'status': 'completed'}]\n",
+      "Tool Calls:\n",
+      "  execute_code (call_7SYwMSQPbbEqFcKlKOpXeEux)\n",
+      " Call ID: call_7SYwMSQPbbEqFcKlKOpXeEux\n",
+      "  Args:\n",
+      "    __arg1: print(3**3)\n",
+      "=================================\u001b[1m Tool Message \u001b[0m=================================\n",
+      "Name: execute_code\n",
+      "\n",
+      "[{'type': 'custom_tool_call_output', 'output': '27'}]\n",
+      "==================================\u001b[1m Ai Message \u001b[0m==================================\n",
+      "\n",
+      "[{'type': 'text', 'text': '27', 'annotations': [], 'id': 'msg_6894ff5db3b8819d9159b3a370a25843000169fd9fb68d99'}]\n"
+     ]
+    }
+   ],
+   "source": [
+    "from langchain_openai import ChatOpenAI, custom_tool\n",
+    "from langgraph.prebuilt import create_react_agent\n",
+    "\n",
+    "\n",
+    "@custom_tool\n",
+    "def execute_code(code: str) -> str:\n",
+    "    \"\"\"Execute python code.\"\"\"\n",
+    "    return \"27\"\n",
+    "\n",
+    "\n",
+    "llm = ChatOpenAI(model=\"gpt-5\", output_version=\"responses/v1\")\n",
+    "\n",
+    "agent = create_react_agent(llm, [execute_code])\n",
+    "\n",
+    "input_message = {\"role\": \"user\", \"content\": \"Use the tool to calculate 3^3.\"}\n",
+    "for step in agent.stream(\n",
+    "    {\"messages\": [input_message]},\n",
+    "    stream_mode=\"values\",\n",
+    "):\n",
+    "    step[\"messages\"][-1].pretty_print()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "5ef93be6-6d4c-4eea-acfd-248774074082",
+   "metadata": {},
+   "source": [
+    "<details>\n",
+    "<summary>Context-free grammars</summary>\n",
+    "\n",
+    "OpenAI supports the specification of a [context-free grammar](https://platform.openai.com/docs/guides/function-calling#context-free-grammars) for custom tool inputs in `lark` or `regex` format. See [OpenAI docs](https://platform.openai.com/docs/guides/function-calling#context-free-grammars) for details. The `format` parameter can be passed into `@custom_tool` as shown below:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "2ae04586-be33-49c6-8947-7867801d868f",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "================================\u001b[1m Human Message \u001b[0m=================================\n",
+      "\n",
+      "Use the tool to calculate 3^3.\n",
+      "==================================\u001b[1m Ai Message \u001b[0m==================================\n",
+      "\n",
+      "[{'id': 'rs_689500828a8481a297ff0f98e328689c0681550c89797f43', 'summary': [], 'type': 'reasoning'}, {'call_id': 'call_jzH01RVhu6EFz7yUrOFXX55s', 'input': '3 * 3 * 3', 'name': 'do_math', 'type': 'custom_tool_call', 'id': 'ctc_6895008d57bc81a2b84d0993517a66b90681550c89797f43', 'status': 'completed'}]\n",
+      "Tool Calls:\n",
+      "  do_math (call_jzH01RVhu6EFz7yUrOFXX55s)\n",
+      " Call ID: call_jzH01RVhu6EFz7yUrOFXX55s\n",
+      "  Args:\n",
+      "    __arg1: 3 * 3 * 3\n",
+      "=================================\u001b[1m Tool Message \u001b[0m=================================\n",
+      "Name: do_math\n",
+      "\n",
+      "[{'type': 'custom_tool_call_output', 'output': '27'}]\n",
+      "==================================\u001b[1m Ai Message \u001b[0m==================================\n",
+      "\n",
+      "[{'type': 'text', 'text': '27', 'annotations': [], 'id': 'msg_6895009776b881a2a25f0be8507d08f20681550c89797f43'}]\n"
+     ]
+    }
+   ],
+   "source": [
+    "from langchain_openai import ChatOpenAI, custom_tool\n",
+    "from langgraph.prebuilt import create_react_agent\n",
+    "\n",
+    "grammar = \"\"\"\n",
+    "start: expr\n",
+    "expr: term (SP ADD SP term)* -> add\n",
+    "| term\n",
+    "term: factor (SP MUL SP factor)* -> mul\n",
+    "| factor\n",
+    "factor: INT\n",
+    "SP: \" \"\n",
+    "ADD: \"+\"\n",
+    "MUL: \"*\"\n",
+    "%import common.INT\n",
+    "\"\"\"\n",
+    "\n",
+    "format_ = {\"type\": \"grammar\", \"syntax\": \"lark\", \"definition\": grammar}\n",
+    "\n",
+    "\n",
+    "# highlight-next-line\n",
+    "@custom_tool(format=format_)\n",
+    "def do_math(input_string: str) -> str:\n",
+    "    \"\"\"Do a mathematical operation.\"\"\"\n",
+    "    return \"27\"\n",
+    "\n",
+    "\n",
+    "llm = ChatOpenAI(model=\"gpt-5\", output_version=\"responses/v1\")\n",
+    "\n",
+    "agent = create_react_agent(llm, [do_math])\n",
+    "\n",
+    "input_message = {\"role\": \"user\", \"content\": \"Use the tool to calculate 3^3.\"}\n",
+    "for step in agent.stream(\n",
+    "    {\"messages\": [input_message]},\n",
+    "    stream_mode=\"values\",\n",
+    "):\n",
+    "    step[\"messages\"][-1].pretty_print()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "c63430c9-c7b0-4e92-a491-3f165dddeb8f",
+   "metadata": {},
+   "source": [
+    "</details>"
+   ]
+  },
   {
    "cell_type": "markdown",
    "id": "84833dd0-17e9-4269-82ed-550639d65751",
diff --git a/libs/core/langchain_core/tools/base.py b/libs/core/langchain_core/tools/base.py
index e54a09709d6..34ca4b4da30 100644
--- a/libs/core/langchain_core/tools/base.py
+++ b/libs/core/langchain_core/tools/base.py
@@ -74,7 +74,14 @@ if TYPE_CHECKING:
     from collections.abc import Sequence
 
 FILTERED_ARGS = ("run_manager", "callbacks")
-TOOL_MESSAGE_BLOCK_TYPES = ("text", "image_url", "image", "json", "search_result")
+TOOL_MESSAGE_BLOCK_TYPES = (
+    "text",
+    "image_url",
+    "image",
+    "json",
+    "search_result",
+    "custom_tool_call_output",
+)
 
 
 class SchemaAnnotationError(TypeError):
diff --git a/libs/core/langchain_core/utils/function_calling.py b/libs/core/langchain_core/utils/function_calling.py
index d7059fded47..609129ac58b 100644
--- a/libs/core/langchain_core/utils/function_calling.py
+++ b/libs/core/langchain_core/utils/function_calling.py
@@ -575,12 +575,23 @@ def convert_to_openai_tool(
 
         Added support for OpenAI's image generation built-in tool.
     """
+    from langchain_core.tools import Tool
+
     if isinstance(tool, dict):
         if tool.get("type") in _WellKnownOpenAITools:
             return tool
         # As of 03.12.25 can be "web_search_preview" or "web_search_preview_2025_03_11"
         if (tool.get("type") or "").startswith("web_search_preview"):
             return tool
+    if isinstance(tool, Tool) and (tool.metadata or {}).get("type") == "custom_tool":
+        oai_tool = {
+            "type": "custom",
+            "name": tool.name,
+            "description": tool.description,
+        }
+        if tool.metadata is not None and "format" in tool.metadata:
+            oai_tool["format"] = tool.metadata["format"]
+        return oai_tool
     oai_function = convert_to_openai_function(tool, strict=strict)
     return {"type": "function", "function": oai_function}
 
diff --git a/libs/partners/openai/langchain_openai/__init__.py b/libs/partners/openai/langchain_openai/__init__.py
index a1756f0526d..40a94c25ce1 100644
--- a/libs/partners/openai/langchain_openai/__init__.py
+++ b/libs/partners/openai/langchain_openai/__init__.py
@@ -1,6 +1,7 @@
 from langchain_openai.chat_models import AzureChatOpenAI, ChatOpenAI
 from langchain_openai.embeddings import AzureOpenAIEmbeddings, OpenAIEmbeddings
 from langchain_openai.llms import AzureOpenAI, OpenAI
+from langchain_openai.tools import custom_tool
 
 __all__ = [
     "OpenAI",
@@ -9,4 +10,5 @@ __all__ = [
     "AzureOpenAI",
     "AzureChatOpenAI",
     "AzureOpenAIEmbeddings",
+    "custom_tool",
 ]
diff --git a/libs/partners/openai/langchain_openai/chat_models/base.py b/libs/partners/openai/langchain_openai/chat_models/base.py
index 56220d9f531..5d0b480ce18 100644
--- a/libs/partners/openai/langchain_openai/chat_models/base.py
+++ b/libs/partners/openai/langchain_openai/chat_models/base.py
@@ -3582,6 +3582,20 @@ def _make_computer_call_output_from_message(message: ToolMessage) -> dict:
     return computer_call_output
 
 
+def _make_custom_tool_output_from_message(message: ToolMessage) -> Optional[dict]:
+    custom_tool_output = None
+    for block in message.content:
+        if isinstance(block, dict) and block.get("type") == "custom_tool_call_output":
+            custom_tool_output = {
+                "type": "custom_tool_call_output",
+                "call_id": message.tool_call_id,
+                "output": block.get("output") or "",
+            }
+            break
+
+    return custom_tool_output
+
+
 def _pop_index_and_sub_index(block: dict) -> dict:
     """When streaming, langchain-core uses the ``index`` key to aggregate
     text blocks. OpenAI API does not support this key, so we need to remove it.
@@ -3608,7 +3622,10 @@ def _construct_responses_api_input(messages: Sequence[BaseMessage]) -> list:
             msg.pop("name")
         if msg["role"] == "tool":
             tool_output = msg["content"]
-            if lc_msg.additional_kwargs.get("type") == "computer_call_output":
+            custom_tool_output = _make_custom_tool_output_from_message(lc_msg)  # type: ignore[arg-type]
+            if custom_tool_output:
+                input_.append(custom_tool_output)
+            elif lc_msg.additional_kwargs.get("type") == "computer_call_output":
                 computer_call_output = _make_computer_call_output_from_message(
                     cast(ToolMessage, lc_msg)
                 )
@@ -3663,6 +3680,7 @@ def _construct_responses_api_input(messages: Sequence[BaseMessage]) -> list:
                             "file_search_call",
                             "function_call",
                             "computer_call",
+                            "custom_tool_call",
                             "code_interpreter_call",
                             "mcp_call",
                             "mcp_list_tools",
@@ -3690,7 +3708,8 @@ def _construct_responses_api_input(messages: Sequence[BaseMessage]) -> list:
                 content_call_ids = {
                     block["call_id"]
                     for block in input_
-                    if block.get("type") == "function_call" and "call_id" in block
+                    if block.get("type") in ("function_call", "custom_tool_call")
+                    and "call_id" in block
                 }
                 for tool_call in tool_calls:
                     if tool_call["id"] not in content_call_ids:
@@ -3841,6 +3860,15 @@ def _construct_lc_result_from_responses_api(
                     "error": error,
                 }
                 invalid_tool_calls.append(tool_call)
+        elif output.type == "custom_tool_call":
+            content_blocks.append(output.model_dump(exclude_none=True, mode="json"))
+            tool_call = {
+                "type": "tool_call",
+                "name": output.name,
+                "args": {"__arg1": output.input},
+                "id": output.call_id,
+            }
+            tool_calls.append(tool_call)
         elif output.type in (
             "reasoning",
             "web_search_call",
@@ -4044,6 +4072,23 @@ def _convert_responses_chunk_to_generation_chunk(
         tool_output = chunk.item.model_dump(exclude_none=True, mode="json")
         tool_output["index"] = current_index
         content.append(tool_output)
+    elif (
+        chunk.type == "response.output_item.done"
+        and chunk.item.type == "custom_tool_call"
+    ):
+        _advance(chunk.output_index)
+        tool_output = chunk.item.model_dump(exclude_none=True, mode="json")
+        tool_output["index"] = current_index
+        content.append(tool_output)
+        tool_call_chunks.append(
+            {
+                "type": "tool_call_chunk",
+                "name": chunk.item.name,
+                "args": json.dumps({"__arg1": chunk.item.input}),
+                "id": chunk.item.call_id,
+                "index": current_index,
+            }
+        )
     elif chunk.type == "response.function_call_arguments.delta":
         _advance(chunk.output_index)
         tool_call_chunks.append(
diff --git a/libs/partners/openai/langchain_openai/tools/__init__.py b/libs/partners/openai/langchain_openai/tools/__init__.py
new file mode 100644
index 00000000000..11e5dd9c95a
--- /dev/null
+++ b/libs/partners/openai/langchain_openai/tools/__init__.py
@@ -0,0 +1,3 @@
+from langchain_openai.tools.custom_tool import custom_tool
+
+__all__ = ["custom_tool"]
diff --git a/libs/partners/openai/langchain_openai/tools/custom_tool.py b/libs/partners/openai/langchain_openai/tools/custom_tool.py
new file mode 100644
index 00000000000..eb527083476
--- /dev/null
+++ b/libs/partners/openai/langchain_openai/tools/custom_tool.py
@@ -0,0 +1,109 @@
+import inspect
+from collections.abc import Awaitable
+from typing import Any, Callable
+
+from langchain_core.tools import tool
+
+
+def _make_wrapped_func(func: Callable[..., str]) -> Callable[..., list[dict[str, Any]]]:
+    def wrapped(x: str) -> list[dict[str, Any]]:
+        return [{"type": "custom_tool_call_output", "output": func(x)}]
+
+    return wrapped
+
+
+def _make_wrapped_coroutine(
+    coroutine: Callable[..., Awaitable[str]],
+) -> Callable[..., Awaitable[list[dict[str, Any]]]]:
+    async def wrapped(*args: Any, **kwargs: Any) -> list[dict[str, Any]]:
+        result = await coroutine(*args, **kwargs)
+        return [{"type": "custom_tool_call_output", "output": result}]
+
+    return wrapped
+
+
+def custom_tool(*args: Any, **kwargs: Any) -> Any:
+    """Decorator to create an OpenAI custom tool.
+
+    Custom tools allow for tools with (potentially long) freeform string inputs.
+
+    See below for an example using LangGraph:
+
+    .. code-block:: python
+
+        @custom_tool
+        def execute_code(code: str) -> str:
+            \"\"\"Execute python code.\"\"\"
+            return "27"
+
+
+        llm = ChatOpenAI(model="gpt-5", output_version="responses/v1")
+
+        agent = create_react_agent(llm, [execute_code])
+
+        input_message = {"role": "user", "content": "Use the tool to calculate 3^3."}
+        for step in agent.stream(
+            {"messages": [input_message]},
+            stream_mode="values",
+        ):
+            step["messages"][-1].pretty_print()
+
+    You can also specify a format for a corresponding context-free grammar using the
+    ``format`` kwarg:
+
+    .. code-block:: python
+
+        from langchain_openai import ChatOpenAI, custom_tool
+        from langgraph.prebuilt import create_react_agent
+
+        grammar = \"\"\"
+        start: expr
+        expr: term (SP ADD SP term)* -> add
+        | term
+        term: factor (SP MUL SP factor)* -> mul
+        | factor
+        factor: INT
+        SP: " "
+        ADD: "+"
+        MUL: "*"
+        %import common.INT
+        \"\"\"
+
+        format = {"type": "grammar", "syntax": "lark", "definition": grammar}
+
+        # highlight-next-line
+        @custom_tool(format=format)
+        def do_math(input_string: str) -> str:
+            \"\"\"Do a mathematical operation.\"\"\"
+            return "27"
+
+
+        llm = ChatOpenAI(model="gpt-5", output_version="responses/v1")
+
+        agent = create_react_agent(llm, [do_math])
+
+        input_message = {"role": "user", "content": "Use the tool to calculate 3^3."}
+        for step in agent.stream(
+            {"messages": [input_message]},
+            stream_mode="values",
+        ):
+            step["messages"][-1].pretty_print()
+    """
+
+    def decorator(func: Callable[..., Any]) -> Any:
+        metadata = {"type": "custom_tool"}
+        if "format" in kwargs:
+            metadata["format"] = kwargs.pop("format")
+        tool_obj = tool(infer_schema=False, **kwargs)(func)
+        tool_obj.metadata = metadata
+        tool_obj.description = func.__doc__
+        if inspect.iscoroutinefunction(func):
+            tool_obj.coroutine = _make_wrapped_coroutine(func)
+        else:
+            tool_obj.func = _make_wrapped_func(func)
+        return tool_obj
+
+    if args and callable(args[0]) and not kwargs:
+        return decorator(args[0])
+
+    return decorator
diff --git a/libs/partners/openai/tests/cassettes/test_custom_tool.yaml.gz b/libs/partners/openai/tests/cassettes/test_custom_tool.yaml.gz
new file mode 100644
index 0000000000000000000000000000000000000000..3a0ea3d888f512c4d6d40cb0bfbd7273a8ad045d
GIT binary patch
literal 4367
zcmV+q5%BIGiwFQo2$g68|Lt5`kLt(~e$THsd5W|Wl5JoUj+6)PV`gj?FuuLv3-}HP
zn~Sad_*A#uzJOui%p}>(&Pua81MPBkb=6m2mCL&~e;mv(3cvrQ@lD$r@%L|k{i|jN
zt9|_KUw`=q|CsS)rv*LQ=(E2{&Cphu&cPUq2lO}ViHW-CQLFKCV~jgTt>2@xxaS%E
zC^V!Ir7pUO>~IIxKH0k839&$R0T%{vq0HKAkqTL#KB{9eWSy<8Hha{Yu(aR$lf-zL
zc#1^XQ(v{T7v6)<D2?fadYmd^ym+TVpwH8KbAHqoEC_5{nzXd1_QW&CqGQN&PiA<y
z2^Q?CGVkcVcQVG_QM2J%S8S;gPaCicEKq;(U`w3?JH{6s*nl?Hvw!{*zO>AhVeY|N
zXx&QJ&0UoK-uU*nUw)Yn2i?9f9Q^Xj-%1M_W}t5kJJ{S?d5Em--cDALy$$ZH)3-rj
z>hCX#%phubX0VB@2Np*0&b+hC+J^Ubc?ivYW3>T6cNU!+VYIRX&kV!HYOh<5E)C;A
ze{ik0x@+R{(rx(Q!re-DVQY;e9Qu8um#^BJ(LQeI+rz^q*~3!XH=kSmu2#MYUS)j$
z_S<wJz@LY`{rz&$|0L=c=fAVQ=c0($1Afgrn>H_WTO2cNDbBJr9gHl_*_=6__ou?J
z$q)1sFX%0WVNa?*Y^MSx-d3D0!#O=~DzGhlx270r(kM3JW)cn163pBB7$ZTfN~1($
z+B?nHGX;1{8=u!JX=Y=Au*gxQb0|}7UK|>S1!%Bvy`JESKa^%S;YZukV(D>eA3=<F
z&|gFf@8AWQYPRL#?GG+?5yTv@Kt-ZG3~I9FDNT`5#g3yfa}Q~Xz^j5=Vh4j8;1p<e
zLPB<GAAe6N^gozX!+}e4gAK(EEDN^Y;-q1$I4-05Y@+ycM;$9?UJ%`>(DN0+^Au)c
za|0#j#>+O?Gey<$h%#iZ*s{jZCv6PcfgP}23Y!6P@}L#H#-}{$q~>8dwDE#HBmAHZ
zb3wnd4%<X13mQhqL>hdC1+u<zGWryqp6(o-V#GQ-A8rCZ2zEr8y>Rx_i)u8d@L`a1
zNZqF9Fd+LRN^`Q)U*<cFX(0h4k~r9xUfUR3kSKb1{hK}M$TIb(%?0{)>)Kz*;wxGF
zt|($Kj}2*w@cyVV=L8T!1EnZa?s+MUz=aU?fPS3_-AVIDQ3}6=`iX1M+Ep=qoO?RM
z$154MmVgDqwR3_Ri(bk(O?kcrsMw?zwLnX{^92cCknpF2guNN=wn1nX1tb<0MtkJZ
zmq-DDbJ}iLLT>s%%Vho}CrWE7EKkz`*H)yF103h92rO{Ixx)!cqA<e~?<=vAbZKs^
zT2o1M6rVk1B6$aBq2R_-AQgSPy8&WhuY5?~rCR{=OHgp9-}8t^0OOgj6?q2fzI*1A
zCB+MFlZYH}$y*m|nlmO?hLA1N$7c`l!UY_8W&7}%KXO)#5o=BHfOj>{>ecjmFB7Q;
zlcSER&2gIshL>sL9ZDkCfN_)}dAP*AT!+|?A@LZJh{&sDv}KMuN8l|d_(8O7uS#uz
z!|g^3#8B&}8Usac$xRHEfO?}45vWY$)b1S&AaI%TIT?fB1N~(d@e3;E1}yysbRB-0
z#0ao?DkH=j2_JEHQ#tW2`mqsFM0Wfc){h8(Ch~&!zh>e$1p|az_@bplB3eo9pt^>q
zHHo(Lz=d@^EfXovQZ3huZ0So_T$0u*8$!n4<Kdaecj<VyFic(#^y^u7Ch}q<lp@b*
z>;?TQkuQtJCp_wqh|BqTCh|R+jvWh&Dv~_>e$v4wjs7NiwYSM@7Fk5gbU7j3o0=?}
z*_eX=_KeKgPKrE1<%9>?=EYDZ5<bRm3>lwR(<4HK7tja~+?`*;X(#%)bI=)aojO^?
zk3n0AvX?n>cXB){&}PMm?aSz>I1YT5`>!whs^5t-14IBHI|IlEx<z^8RsX_TAls~o
z*38IP<#}6XJQso!;#u7xo^zpbLYzpRFe{1oAYQX&T~x35iY$f(IUeu?tvT^74FvWE
zXc1%VCH}IA@$UJ}>1LPCJ@W<I>+jh~+h~&j5maUKRUf6GjH1|0W#f6|^d}5x4UoX^
zj29PctVyD~lEnyJ0;f)Hogt8QL_179WkhEpjqyJ+H!j+K>?phr&Qi>mC|)Ue=Q#vd
zS>2}G_`B-)RYa&q9k3Oje0J-KtJm8m{jS$32V+h=``5k6Ok{cty$<@XoS$UpBs1qD
zkl$EiCO8zhe(Aer*S<JbNEU74>fiubVT+u{OB87m7BDfP!6yT(Rc_ZT)^Q+4o)+Nr
zNAdCLwHo6W%lK*vt6(T^!AW~+RphAx8zozj=0J6M%n2Mpq&(-KR0e^3&}o1cd2X$w
zGl`x{7cR$kei_nqFl=+1iwuAquQ|80Oa+2OnI^o`eJwObHWcIQM2n0uw~X-+N7#C{
zyQ&iUqsF|ee6OQy{esQk_!`r50nzii1%p*o|HZ=G1NFP{Rn+LGgud=;VZRE(HSqNz
zDO&1Q$&IpZjp+{UA*@}x@}fp~)N8-f*A0E8TP9+z(V_nOi(;m3$#!Q3E4#7XZ&m>*
zq3*2P?YBCRWY}B758qkn!13H$LI`%<5=4=8D6Wu2ciTOf!PJ3aNAdT1`fditZQMBQ
z;5u{V!|MIf*3AZ{{q<Mu;ru<@x4}C2-+S_`=dtC{)-{9o7w=6U>I!78hvgTSJbaJP
zjGmE?mfx41%4*m~&4SF|G6yrjz6?d-zkKivsOP?ao4$TVy)!$jV;1-%s&^*9)L4cW
zMwaT&5`N4!6`|+-x9XkQ`K#Xf&(%Ac%m<}YN5|o1Rc?!O;3x;~c`9reZr~=WuQ24)
zN&bezzdH3SMWG&7N}Xs=Z}OJPEGzUM>}VM;Vv-dwN0P?mUI@w3yP)_`MYJhIOUoO(
zqFfiZCdsyxvoQxMQ>gS3OY_@D<Y4GjQ$`mVC2h0Ze<oU%`3`!ZQ)rseuCdrk-JXs4
zm@VBd<xlwc5mi&1fY>vkoQckO$n8<X^5Mb+RU{>r@fn2)sz^w2@=1kBnW}zLVS*|W
zqU9$QCJ2!-k)KtVls5iZg$b%ih?bvJn4pS;*z%JKlWKADDTT=eNB@k%1XUyii9TMK
z0DdknVjQ5iT}cWuAUnkP4nHj{oNAM{MEEVrj^zZxb#FOQZNA~}e1CcCN}-pcQSo%z
zsMX?DQ9=wr(wXpoS(JQL9bZ+)SJm-E)uGBNK*J1ha{{EWchmyl=Xn`0dsTsgDFS5C
zIA?Ko!Bmz91(LRsvBQEsc)Bclnh!aj0TM2OXyEsN_T@|MX%J?#_o~=|qGs+Z$%f>5
zV7L}9h?WYhSMe1}^|?0Bh!!_c?Wr(qO$Ua{C%nr!p3C7Uv!y~OD0YUP=n|$(6d!eP
zR^$Yo<_F6*pUfyOK9$)p$`EUyYLL5-C=NYQwjmSj;qN2lC^u(!#kdL32dI01?D%OF
za<8DtFc;khJ*cLYW+~+%bl;`43K*LrTkETCR!Kx%0Yg;C$@kSG@`!V~F%&STX`ea)
zJFZOJq~lRNOZPz>TvrxPv_yBwpRD)bG}RqwnaIt;^oT5$c5+2$Qy#k`6<oHgGK_^B
zMwL__h$5b+$|>2!Ko>T~3(^{j9ABhJxg~aXDjGHzd;Us7=`PnMxo)0MEJ)19Y?|nZ
zxo0su;1#uz2X=uf7*khFUUEK)w9R?wt~_*<@~Z%8sKWJ7{xV*!B_1}QJs>%>wwQFt
zFe^~3t;TpxI*gEV%(_9>@~jxq@_|`}%P|EIjqDuZHb78_&SN~?B(RM;BNCI2oK9X{
zGQShnyxCjp{ahrsXyK3bK!ivc<yuzQUWw@umxtO3!%GnY`HAP`C5Sp}nR@N}B7+JR
zATy=j2RN74LH-zEbUN)^zy%8tp7K;+j+ZwsLs4@J_!jlRmv<I!<KMHuR}|Pc@$XsS
zBQ2FX`1dUE<;Qay|DFXt;_7$s?^)o>MBc-{XMv9tg?I4pS>Pkba|i#P1wI17cku67
z;3Hbz!M|sLFVB#-@$XsSBQbsl|DFXtGBj`p|DFZD{PEIl{Cg1i^7=FHEWpVw+Lk&7
zc<9#SVBO`F>-s4Wk`u4PLo`=MVwv#{;G3<?C_B{f!A?^%^_{n%c~6l<XF3?RRl!ZB
zuvvu>0WZ-Y5JykP?1}f;HYaRaJTq%@%q*TtvvXydIzS*;^f&>e)D*#s&X*S%kJv8T
zybGUueSe-u&v=NM5tTa@1Uv74;tjv~;H!Q*si?{$8gA#PVh4hhwoy=$MB#8@sdWm_
z0KV8yiFK+4`zxBTR8!PX$_WMak<)~-Y;%d{<`B$#kW*q<mhggQ-z3yG=Hu~fr9?xR
zOxgtt)?}IP^fx=t@BwuO+fBJ*D4jf17}NoL)xA||5GQ*7@|(7sI<tz=@)OT&x2Un*
z#q`cp=+cVBwK&1^rUDmp1K#1s;8@AXRmU^Z<K&Yrqd4w)I@mPTfu5)Xwyh3^7_7*K
zNPDl6MAC>UMs&a#32b1<Fp%FRICqyPbh43pQxcntCCyKVZCR6dg3%nwqSI|HMKJ*g
z5zuw0#g(Hvs<_z7y!+LtdvN>s$~(v*INJ#FNj&(|h9fys%IsYTxf)^0{Sv)DkiI$r
z(cx(?a|`DpMm!r5!bx!rfEOt~Zd=iBrDhsz)>Y*Vocs{650WTUiO%}!6UP`AsKO82
z6EBT&H~~tKD9Ie$pF0DfxxAxa!AV}0rl^8gMcO1I(@tp#ss>_GLG*#KUkxk?7~RYq
zDE4}`eI*4riJod5Uh~QadWixxEN6%8wl{0^Jn3Ga+|46f+@`U7!8U5p`%KU~<vYmL
z=S~TM+T1|23It+L9K%s4ObGo)aXe6(w@doW3qbViE&Iy5rO42IB64Z%B)`*JO5O_C
z#w{ej@$yR5`A8?$$BFe0vOjcW7_IV(q7Jn5dZZ_4C3b3&s%Y-s1*|(rMZDlw*1<gg
zm_Jxlfq?K<{wkzDr1il>fQ_w;@#KdYOz35Q^2dUW*6#&2*dMQUy-Uv-ajesjq9su=
zJXIFl%^wUlri<STY;bYCjb}@71#5<x$mNhu1ln4vO}+g`gN@<oo=e~I<(ybQ@6t8C
zyepNZQ1AF)C#RSzocty@VvlqACP(d9Wy#xIFsr!qEnhAbeAo8r9Mn&Pjk{bNY<d3Q
z!bY~~65AlN)su%wcRqOjklku)ol9IT%2EQa=NpCIxvtQwi+gfBHR1nm+~0UPm1vp8
zYu4Y$8(tS<Ob-!mLacA_?)sy;|34ofkCJbfpXDL`_aD;0u5wf_Hg_W{o^coOfd@6j
zFF(+)ylhnF-K?MMS6&^TUkve=2GLZ16vD>f?qaO_opM7As1|=DBTtpb@uhxdu4OTJ
zcJ&a3sToTy8j)xkt$skLXnE!nX4#v~Zvwc#vy6b~!4Iyzv%s9CZ#7@EObDuD)cKs2
zNj@bGHoDa^DZR4)NG>Nx^nsYZ3}zCLABgG8U@qRO`RSO(ucwy!$a#tX^X=rf_uf}|
zzMXv8N6w${{d7AS2%>7i_Y~<R=S`iDwUo=f<ov1B$6CteUUKe4A8RR>d&zn9e5|Eh
z?j;v<g&%4u=kH~YN;E42Eoatzd*}5HxV&yM>V$XH%%AsMKD*(ZVe<Cg6ciY$?p=)1
zpf4x5@jA5)Xml=X5c5v2^Olb{71)a2{=iuiZN5o&5dZudZF+9B>CZd2^zqr8e*^S4
JxW`RP006VymE!;a

literal 0
HcmV?d00001

diff --git a/libs/partners/openai/tests/integration_tests/chat_models/test_responses_api.py b/libs/partners/openai/tests/integration_tests/chat_models/test_responses_api.py
index 0e23d0e3f06..4d051c5601e 100644
--- a/libs/partners/openai/tests/integration_tests/chat_models/test_responses_api.py
+++ b/libs/partners/openai/tests/integration_tests/chat_models/test_responses_api.py
@@ -17,7 +17,7 @@ from langchain_core.messages import (
 from pydantic import BaseModel
 from typing_extensions import TypedDict
 
-from langchain_openai import ChatOpenAI
+from langchain_openai import ChatOpenAI, custom_tool
 
 MODEL_NAME = "gpt-4o-mini"
 
@@ -672,3 +672,32 @@ def test_image_generation_multi_turn() -> None:
     _check_response(ai_message2)
     tool_output2 = ai_message2.additional_kwargs["tool_outputs"][0]
     assert set(tool_output2.keys()).issubset(expected_keys)
+
+
+@pytest.mark.vcr()
+def test_custom_tool() -> None:
+    @custom_tool
+    def execute_code(code: str) -> str:
+        """Execute python code."""
+        return "27"
+
+    llm = ChatOpenAI(model="gpt-5", output_version="responses/v1").bind_tools(
+        [execute_code]
+    )
+
+    input_message = {"role": "user", "content": "Use the tool to evaluate 3^3."}
+    tool_call_message = llm.invoke([input_message])
+    assert isinstance(tool_call_message, AIMessage)
+    assert len(tool_call_message.tool_calls) == 1
+    tool_call = tool_call_message.tool_calls[0]
+    tool_message = execute_code.invoke(tool_call)
+    response = llm.invoke([input_message, tool_call_message, tool_message])
+    assert isinstance(response, AIMessage)
+
+    # Test streaming
+    full: Optional[BaseMessageChunk] = None
+    for chunk in llm.stream([input_message]):
+        assert isinstance(chunk, AIMessageChunk)
+        full = chunk if full is None else full + chunk
+    assert isinstance(full, AIMessageChunk)
+    assert len(full.tool_calls) == 1
diff --git a/libs/partners/openai/tests/unit_tests/test_imports.py b/libs/partners/openai/tests/unit_tests/test_imports.py
index 324e71bda9b..144a394c2ff 100644
--- a/libs/partners/openai/tests/unit_tests/test_imports.py
+++ b/libs/partners/openai/tests/unit_tests/test_imports.py
@@ -7,6 +7,7 @@ EXPECTED_ALL = [
     "AzureOpenAI",
     "AzureChatOpenAI",
     "AzureOpenAIEmbeddings",
+    "custom_tool",
 ]
 
 
diff --git a/libs/partners/openai/tests/unit_tests/test_tools.py b/libs/partners/openai/tests/unit_tests/test_tools.py
new file mode 100644
index 00000000000..106aa0aa080
--- /dev/null
+++ b/libs/partners/openai/tests/unit_tests/test_tools.py
@@ -0,0 +1,120 @@
+from langchain_core.messages import AIMessage, HumanMessage, ToolMessage
+from langchain_core.tools import Tool
+
+from langchain_openai import ChatOpenAI, custom_tool
+
+
+def test_custom_tool() -> None:
+    @custom_tool
+    def my_tool(x: str) -> str:
+        """Do thing."""
+        return "a" + x
+
+    # Test decorator
+    assert isinstance(my_tool, Tool)
+    assert my_tool.metadata == {"type": "custom_tool"}
+    assert my_tool.description == "Do thing."
+
+    result = my_tool.invoke(
+        {
+            "type": "tool_call",
+            "name": "my_tool",
+            "args": {"whatever": "b"},
+            "id": "abc",
+            "extras": {"type": "custom_tool_call"},
+        }
+    )
+    assert result == ToolMessage(
+        [{"type": "custom_tool_call_output", "output": "ab"}],
+        name="my_tool",
+        tool_call_id="abc",
+    )
+
+    # Test tool schema
+    ## Test with format
+    @custom_tool(format={"type": "grammar", "syntax": "lark", "definition": "..."})
+    def another_tool(x: str) -> None:
+        """Do thing."""
+        pass
+
+    llm = ChatOpenAI(model="gpt-4.1", use_responses_api=True).bind_tools([another_tool])
+    assert llm.kwargs == {  # type: ignore[attr-defined]
+        "tools": [
+            {
+                "type": "custom",
+                "name": "another_tool",
+                "description": "Do thing.",
+                "format": {"type": "grammar", "syntax": "lark", "definition": "..."},
+            }
+        ]
+    }
+
+    llm = ChatOpenAI(model="gpt-4.1", use_responses_api=True).bind_tools([my_tool])
+    assert llm.kwargs == {  # type: ignore[attr-defined]
+        "tools": [{"type": "custom", "name": "my_tool", "description": "Do thing."}]
+    }
+
+    # Test passing messages back
+    message_history = [
+        HumanMessage("Use the tool"),
+        AIMessage(
+            [
+                {
+                    "type": "custom_tool_call",
+                    "id": "ctc_abc123",
+                    "call_id": "abc",
+                    "name": "my_tool",
+                    "input": "a",
+                }
+            ],
+            tool_calls=[
+                {
+                    "type": "tool_call",
+                    "name": "my_tool",
+                    "args": {"__arg1": "a"},
+                    "id": "abc",
+                }
+            ],
+        ),
+        result,
+    ]
+    payload = llm._get_request_payload(message_history)  # type: ignore[attr-defined]
+    expected_input = [
+        {"content": "Use the tool", "role": "user"},
+        {
+            "type": "custom_tool_call",
+            "id": "ctc_abc123",
+            "call_id": "abc",
+            "name": "my_tool",
+            "input": "a",
+        },
+        {"type": "custom_tool_call_output", "call_id": "abc", "output": "ab"},
+    ]
+    assert payload["input"] == expected_input
+
+
+async def test_async_custom_tool() -> None:
+    @custom_tool
+    async def my_async_tool(x: str) -> str:
+        """Do async thing."""
+        return "a" + x
+
+    # Test decorator
+    assert isinstance(my_async_tool, Tool)
+    assert my_async_tool.metadata == {"type": "custom_tool"}
+    assert my_async_tool.description == "Do async thing."
+
+    result = await my_async_tool.ainvoke(
+        {
+            "type": "tool_call",
+            "name": "my_async_tool",
+            "args": {"whatever": "b"},
+            "id": "abc",
+            "extras": {"type": "custom_tool_call"},
+        }
+    )
+    assert result == ToolMessage(
+        [{"type": "custom_tool_call_output", "output": "ab"}],
+        name="my_async_tool",
+        tool_call_id="abc",
+    )

From 5036bd7adb6b169169f928c5c199559e13f7f6e6 Mon Sep 17 00:00:00 2001
From: Michael Matloka <michael@matloka.com>
Date: Thu, 7 Aug 2025 22:33:19 +0200
Subject: [PATCH 2/5] fix(openai): don't crash get_num_tokens_from_messages on
 gpt-5 (#32451)

---
 .../openai/langchain_openai/chat_models/base.py      | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/libs/partners/openai/langchain_openai/chat_models/base.py b/libs/partners/openai/langchain_openai/chat_models/base.py
index 5d0b480ce18..eb1fd4506e3 100644
--- a/libs/partners/openai/langchain_openai/chat_models/base.py
+++ b/libs/partners/openai/langchain_openai/chat_models/base.py
@@ -1447,8 +1447,10 @@ class BaseChatOpenAI(BaseChatModel):
             encoding = tiktoken.encoding_for_model(model)
         except KeyError:
             encoder = "cl100k_base"
-            if self.model_name.startswith("gpt-4o") or self.model_name.startswith(
-                "gpt-4.1"
+            if (
+                self.model_name.startswith("gpt-4o")
+                or self.model_name.startswith("gpt-4.1")
+                or self.model_name.startswith("gpt-5")
             ):
                 encoder = "o200k_base"
             encoding = tiktoken.get_encoding(encoder)
@@ -1499,7 +1501,11 @@ class BaseChatOpenAI(BaseChatModel):
             tokens_per_message = 4
             # if there's a name, the role is omitted
             tokens_per_name = -1
-        elif model.startswith("gpt-3.5-turbo") or model.startswith("gpt-4"):
+        elif (
+            model.startswith("gpt-3.5-turbo")
+            or model.startswith("gpt-4")
+            or model.startswith("gpt-5")
+        ):
             tokens_per_message = 3
             tokens_per_name = 1
         else:

From 6727d6e8c8a7e9c11258987716dc86f8cd2c0c0a Mon Sep 17 00:00:00 2001
From: ccurme <chester.curme@gmail.com>
Date: Thu, 7 Aug 2025 17:39:01 -0300
Subject: [PATCH 3/5] release(core): 0.3.74 (#32454)

---
 libs/core/langchain_core/version.py | 2 +-
 libs/core/pyproject.toml            | 2 +-
 libs/core/uv.lock                   | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/libs/core/langchain_core/version.py b/libs/core/langchain_core/version.py
index 2111f6c6fec..f65e4f6a645 100644
--- a/libs/core/langchain_core/version.py
+++ b/libs/core/langchain_core/version.py
@@ -1,3 +1,3 @@
 """langchain-core version information and utilities."""
 
-VERSION = "0.3.73"
+VERSION = "0.3.74"
diff --git a/libs/core/pyproject.toml b/libs/core/pyproject.toml
index c08199b6f0a..e329e0d3c5c 100644
--- a/libs/core/pyproject.toml
+++ b/libs/core/pyproject.toml
@@ -16,7 +16,7 @@ dependencies = [
     "pydantic>=2.7.4",
 ]
 name = "langchain-core"
-version = "0.3.73"
+version = "0.3.74"
 description = "Building applications with LLMs through composability"
 readme = "README.md"
 
diff --git a/libs/core/uv.lock b/libs/core/uv.lock
index 9e200b80789..7ae9f48264b 100644
--- a/libs/core/uv.lock
+++ b/libs/core/uv.lock
@@ -987,7 +987,7 @@ wheels = [
 
 [[package]]
 name = "langchain-core"
-version = "0.3.73"
+version = "0.3.74"
 source = { editable = "." }
 dependencies = [
     { name = "jsonpatch" },

From 00244122bd12cca1bf05584c2abfcf8b9c481fe4 Mon Sep 17 00:00:00 2001
From: Mason Daugherty <mason@langchain.dev>
Date: Thu, 7 Aug 2025 22:24:21 -0400
Subject: [PATCH 4/5] feat(openai): `minimal` and `verbosity` (#32455)

---
 .../tests/unit_tests/chat_models/test_base.py |  53 +++++++++
 .../chat_models/test_chat_models.py           |  53 +++++++++
 .../langchain_openai/chat_models/base.py      |  49 +++++++-
 .../chat_models/test_base.py                  |  40 +++----
 .../chat_models/test_responses_api.py         | 109 +++++++++++++++---
 .../tests/unit_tests/chat_models/test_base.py |  88 +++++++++++++-
 .../chat_models/test_prompt_cache_key.py      |   5 +-
 .../openai/tests/unit_tests/test_tools.py     |   8 +-
 libs/partners/openai/uv.lock                  |   4 +-
 9 files changed, 354 insertions(+), 55 deletions(-)

diff --git a/libs/langchain/tests/unit_tests/chat_models/test_base.py b/libs/langchain/tests/unit_tests/chat_models/test_base.py
index 8cd5e0631b8..65be8a429f2 100644
--- a/libs/langchain/tests/unit_tests/chat_models/test_base.py
+++ b/libs/langchain/tests/unit_tests/chat_models/test_base.py
@@ -68,6 +68,32 @@ def test_init_unknown_provider() -> None:
     clear=True,
 )
 def test_configurable() -> None:
+    """Test configurable chat model behavior without default parameters.
+
+    Verifies that a configurable chat model initialized without default parameters:
+    - Has access to all standard runnable methods (``invoke``, ``stream``, etc.)
+    - Blocks access to non-configurable methods until configuration is provided
+    - Supports declarative operations (``bind_tools``) without mutating original model
+    - Can chain declarative operations and configuration to access full functionality
+    - Properly resolves to the configured model type when parameters are provided
+
+    Example:
+
+    .. python::
+
+        # This creates a configurable model without specifying which model
+        model = init_chat_model()
+
+        # This will FAIL - no model specified yet
+        model.get_num_tokens("hello")  # AttributeError!
+
+        # This works - provides model at runtime
+        response = model.invoke(
+            "Hello",
+            config={"configurable": {"model": "gpt-4o"}}
+        )
+
+    """
     model = init_chat_model()
 
     for method in (
@@ -125,6 +151,7 @@ def test_configurable() -> None:
             "presence_penalty": None,
             "reasoning": None,
             "reasoning_effort": None,
+            "verbosity": None,
             "frequency_penalty": None,
             "include": None,
             "seed": None,
@@ -170,6 +197,32 @@ def test_configurable() -> None:
     clear=True,
 )
 def test_configurable_with_default() -> None:
+    """Test configurable chat model behavior with default parameters.
+
+    Verifies that a configurable chat model initialized with default parameters:
+    - Has access to all standard runnable methods (``invoke``, ``stream``, etc.)
+    - Provides immediate access to non-configurable methods (e.g. ``get_num_tokens``)
+    - Supports model switching through runtime configuration using ``config_prefix``
+    - Maintains proper model identity and attributes when reconfigured
+    - Can be used in chains with different model providers via configuration
+
+    Example:
+
+    .. python::
+
+        # This creates a configurable model with default parameters (model)
+        model = init_chat_model("gpt-4o", configurable_fields="any", config_prefix="bar")
+
+        # This works immediately - uses default gpt-4o
+        tokens = model.get_num_tokens("hello")
+
+        # This also works - switches to Claude at runtime
+        response = model.invoke(
+            "Hello",
+            config={"configurable": {"my_model_model": "claude-3-sonnet-20240229"}}
+        )
+
+    """  # noqa: E501
     model = init_chat_model("gpt-4o", configurable_fields="any", config_prefix="bar")
     for method in (
         "invoke",
diff --git a/libs/langchain_v1/tests/unit_tests/chat_models/test_chat_models.py b/libs/langchain_v1/tests/unit_tests/chat_models/test_chat_models.py
index 147d7813f89..0f991195f7d 100644
--- a/libs/langchain_v1/tests/unit_tests/chat_models/test_chat_models.py
+++ b/libs/langchain_v1/tests/unit_tests/chat_models/test_chat_models.py
@@ -68,6 +68,32 @@ def test_init_unknown_provider() -> None:
     clear=True,
 )
 def test_configurable() -> None:
+    """Test configurable chat model behavior without default parameters.
+
+    Verifies that a configurable chat model initialized without default parameters:
+    - Has access to all standard runnable methods (``invoke``, ``stream``, etc.)
+    - Blocks access to non-configurable methods until configuration is provided
+    - Supports declarative operations (``bind_tools``) without mutating original model
+    - Can chain declarative operations and configuration to access full functionality
+    - Properly resolves to the configured model type when parameters are provided
+
+    Example:
+
+    .. python::
+
+        # This creates a configurable model without specifying which model
+        model = init_chat_model()
+
+        # This will FAIL - no model specified yet
+        model.get_num_tokens("hello")  # AttributeError!
+
+        # This works - provides model at runtime
+        response = model.invoke(
+            "Hello",
+            config={"configurable": {"model": "gpt-4o"}}
+        )
+
+    """
     model = init_chat_model()
 
     for method in (
@@ -125,6 +151,7 @@ def test_configurable() -> None:
             "presence_penalty": None,
             "reasoning": None,
             "reasoning_effort": None,
+            "verbosity": None,
             "frequency_penalty": None,
             "include": None,
             "seed": None,
@@ -170,6 +197,32 @@ def test_configurable() -> None:
     clear=True,
 )
 def test_configurable_with_default() -> None:
+    """Test configurable chat model behavior with default parameters.
+
+    Verifies that a configurable chat model initialized with default parameters:
+    - Has access to all standard runnable methods (``invoke``, ``stream``, etc.)
+    - Provides immediate access to non-configurable methods (e.g. ``get_num_tokens``)
+    - Supports model switching through runtime configuration using ``config_prefix``
+    - Maintains proper model identity and attributes when reconfigured
+    - Can be used in chains with different model providers via configuration
+
+    Example:
+
+    .. python::
+
+        # This creates a configurable model with default parameters (model)
+        model = init_chat_model("gpt-4o", configurable_fields="any", config_prefix="bar")
+
+        # This works immediately - uses default gpt-4o
+        tokens = model.get_num_tokens("hello")
+
+        # This also works - switches to Claude at runtime
+        response = model.invoke(
+            "Hello",
+            config={"configurable": {"my_model_model": "claude-3-sonnet-20240229"}}
+        )
+
+    """  # noqa: E501
     model = init_chat_model("gpt-4o", configurable_fields="any", config_prefix="bar")
     for method in (
         "invoke",
diff --git a/libs/partners/openai/langchain_openai/chat_models/base.py b/libs/partners/openai/langchain_openai/chat_models/base.py
index eb1fd4506e3..a8702359b36 100644
--- a/libs/partners/openai/langchain_openai/chat_models/base.py
+++ b/libs/partners/openai/langchain_openai/chat_models/base.py
@@ -458,8 +458,7 @@ class BaseChatOpenAI(BaseChatModel):
         alias="api_key", default_factory=secret_from_env("OPENAI_API_KEY", default=None)
     )
     openai_api_base: Optional[str] = Field(default=None, alias="base_url")
-    """Base URL path for API requests, leave blank if not using a proxy or service
-        emulator."""
+    """Base URL path for API requests, leave blank if not using a proxy or service emulator."""  # noqa: E501
     openai_organization: Optional[str] = Field(default=None, alias="organization")
     """Automatically inferred from env var ``OPENAI_ORG_ID`` if not provided."""
     # to support explicit proxy for OpenAI
@@ -507,8 +506,9 @@ class BaseChatOpenAI(BaseChatModel):
 
     Reasoning models only, like OpenAI o1, o3, and o4-mini.
 
-    Currently supported values are low, medium, and high. Reducing reasoning effort
-    can result in faster responses and fewer tokens used on reasoning in a response.
+    Currently supported values are ``'minimal'``, ``'low'``, ``'medium'``, and
+    ``'high'``. Reducing reasoning effort can result in faster responses and fewer
+    tokens used on reasoning in a response.
 
     .. versionadded:: 0.2.14
     """
@@ -527,6 +527,17 @@ class BaseChatOpenAI(BaseChatModel):
 
     .. versionadded:: 0.3.24
 
+    """
+    verbosity: Optional[str] = None
+    """Controls the verbosity level of responses for reasoning models. For use with the
+    Responses API.
+
+    Currently supported values are ``'low'``, ``'medium'``, and ``'high'``.
+
+    Controls how detailed the model's responses are.
+
+    .. versionadded:: 0.3.28
+
     """
     tiktoken_model_name: Optional[str] = None
     """The model name to pass to tiktoken when using this class.
@@ -654,6 +665,7 @@ class BaseChatOpenAI(BaseChatModel):
         llm = ChatOpenAI(
             model="o4-mini",
             use_responses_api=True,
+            output_version="responses/v1",
         )
         llm.invoke([HumanMessage("How are you?")], previous_response_id="resp_123")
 
@@ -701,10 +713,24 @@ class BaseChatOpenAI(BaseChatModel):
     @model_validator(mode="before")
     @classmethod
     def validate_temperature(cls, values: dict[str, Any]) -> Any:
-        """Currently o1 models only allow temperature=1."""
+        """Validate temperature parameter for different models.
+
+        - o1 models only allow temperature=1
+        - gpt-5 models only allow temperature=1 or unset (defaults to 1)
+        """
         model = values.get("model_name") or values.get("model") or ""
+
+        # For o1 models, set temperature=1 if not provided
         if model.startswith("o1") and "temperature" not in values:
             values["temperature"] = 1
+
+        # For gpt-5 models, handle temperature restrictions
+        if model.startswith("gpt-5"):
+            temperature = values.get("temperature")
+            if temperature is not None and temperature != 1:
+                # For gpt-5, only temperature=1 is supported, so remove non-defaults
+                values.pop("temperature", None)
+
         return values
 
     @model_validator(mode="after")
@@ -805,6 +831,7 @@ class BaseChatOpenAI(BaseChatModel):
             "temperature": self.temperature,
             "reasoning_effort": self.reasoning_effort,
             "reasoning": self.reasoning,
+            "verbosity": self.verbosity,
             "include": self.include,
             "service_tier": self.service_tier,
             "truncation": self.truncation,
@@ -1178,6 +1205,7 @@ class BaseChatOpenAI(BaseChatModel):
             kwargs["stop"] = stop
 
         payload = {**self._default_params, **kwargs}
+
         if self._use_responses_api(payload):
             if self.use_previous_response_id:
                 last_messages, previous_response_id = _get_last_messages(messages)
@@ -2366,7 +2394,11 @@ class ChatOpenAI(BaseChatOpenAI):  # type: ignore[override]
 
             from langchain_openai import ChatOpenAI
 
-            llm = ChatOpenAI(model="gpt-4.1-mini", use_responses_api=True)
+            llm = ChatOpenAI(
+                model="gpt-4.1-mini",
+                use_responses_api=True,
+                output_version="responses/v1",
+            )
             response = llm.invoke("Hi, I'm Bob.")
             response.text()
 
@@ -3486,6 +3518,11 @@ def _construct_responses_api_payload(
     if "reasoning_effort" in payload and "reasoning" not in payload:
         payload["reasoning"] = {"effort": payload.pop("reasoning_effort")}
 
+    # Remove temperature parameter for models that don't support it in responses API
+    model = payload.get("model", "")
+    if model.startswith("gpt-5"):
+        payload.pop("temperature", None)
+
     payload["input"] = _construct_responses_api_input(messages)
     if tools := payload.pop("tools", None):
         new_tools: list = []
diff --git a/libs/partners/openai/tests/integration_tests/chat_models/test_base.py b/libs/partners/openai/tests/integration_tests/chat_models/test_base.py
index 1bc191d418f..b18bd5f97e9 100644
--- a/libs/partners/openai/tests/integration_tests/chat_models/test_base.py
+++ b/libs/partners/openai/tests/integration_tests/chat_models/test_base.py
@@ -32,7 +32,7 @@ from pydantic import BaseModel, Field
 from langchain_openai import ChatOpenAI
 from tests.unit_tests.fake.callbacks import FakeCallbackHandler
 
-MAX_TOKEN_COUNT = 16
+MAX_TOKEN_COUNT = 100
 
 
 @pytest.mark.scheduled
@@ -219,7 +219,7 @@ async def test_openai_abatch_tags(use_responses_api: bool) -> None:
 def test_openai_invoke() -> None:
     """Test invoke tokens from ChatOpenAI."""
     llm = ChatOpenAI(
-        model="o4-mini",
+        model="gpt-5-nano",
         service_tier="flex",  # Also test service_tier
         max_retries=3,  # Add retries for 503 capacity errors
     )
@@ -418,7 +418,7 @@ class MakeASandwich(BaseModel):
 
 
 def test_tool_use() -> None:
-    llm = ChatOpenAI(model="gpt-4-turbo", temperature=0)
+    llm = ChatOpenAI(model="gpt-5-nano", temperature=0)
     llm_with_tool = llm.bind_tools(tools=[GenerateUsername], tool_choice=True)
     msgs: list = [HumanMessage("Sally has green hair, what would her username be?")]
     ai_msg = llm_with_tool.invoke(msgs)
@@ -462,7 +462,7 @@ def test_tool_use() -> None:
 def test_manual_tool_call_msg(use_responses_api: bool) -> None:
     """Test passing in manually construct tool call message."""
     llm = ChatOpenAI(
-        model="gpt-3.5-turbo-0125", temperature=0, use_responses_api=use_responses_api
+        model="gpt-5-nano", temperature=0, use_responses_api=use_responses_api
     )
     llm_with_tool = llm.bind_tools(tools=[GenerateUsername])
     msgs: list = [
@@ -508,7 +508,7 @@ def test_manual_tool_call_msg(use_responses_api: bool) -> None:
 def test_bind_tools_tool_choice(use_responses_api: bool) -> None:
     """Test passing in manually construct tool call message."""
     llm = ChatOpenAI(
-        model="gpt-3.5-turbo-0125", temperature=0, use_responses_api=use_responses_api
+        model="gpt-5-nano", temperature=0, use_responses_api=use_responses_api
     )
     for tool_choice in ("any", "required"):
         llm_with_tools = llm.bind_tools(
@@ -523,7 +523,7 @@ def test_bind_tools_tool_choice(use_responses_api: bool) -> None:
 
 
 def test_disable_parallel_tool_calling() -> None:
-    llm = ChatOpenAI(model="gpt-4o-mini")
+    llm = ChatOpenAI(model="gpt-5-nano")
     llm_with_tools = llm.bind_tools([GenerateUsername], parallel_tool_calls=False)
     result = llm_with_tools.invoke(
         "Use the GenerateUsername tool to generate user names for:\n\n"
@@ -534,7 +534,7 @@ def test_disable_parallel_tool_calling() -> None:
     assert len(result.tool_calls) == 1
 
 
-@pytest.mark.parametrize("model", ["gpt-4o-mini", "o1", "gpt-4"])
+@pytest.mark.parametrize("model", ["gpt-4o-mini", "o1", "gpt-4", "gpt-5-nano"])
 def test_openai_structured_output(model: str) -> None:
     class MyModel(BaseModel):
         """A Person"""
@@ -694,7 +694,7 @@ def test_tool_calling_strict(use_responses_api: bool) -> None:
         input: Optional[int] = Field(default=None)
 
     model = ChatOpenAI(
-        model="gpt-4.1", temperature=0, use_responses_api=use_responses_api
+        model="gpt-5-nano", temperature=0, use_responses_api=use_responses_api
     )
     # N.B. magic_function adds metadata to schema (min/max for number fields)
     model_with_tools = model.bind_tools([magic_function], strict=True)
@@ -818,7 +818,7 @@ def test_json_schema_openai_format(
     strict: bool, method: Literal["json_schema", "function_calling"]
 ) -> None:
     """Test we can pass in OpenAI schema format specifying strict."""
-    llm = ChatOpenAI(model="gpt-4o-mini")
+    llm = ChatOpenAI(model="gpt-5-nano")
     schema = {
         "name": "get_weather",
         "description": "Fetches the weather in the given location",
@@ -939,7 +939,7 @@ def test_prediction_tokens() -> None:
     """
     )
 
-    llm = ChatOpenAI(model="gpt-4o")
+    llm = ChatOpenAI(model="gpt-4.1-nano")
     query = (
         "Replace the Username property with an Email property. "
         "Respond only with code, and with no markdown formatting."
@@ -981,7 +981,7 @@ class Foo(BaseModel):
 def test_stream_response_format() -> None:
     full: Optional[BaseMessageChunk] = None
     chunks = []
-    for chunk in ChatOpenAI(model="gpt-4o-mini").stream(
+    for chunk in ChatOpenAI(model="gpt-5-nano").stream(
         "how are ya", response_format=Foo
     ):
         chunks.append(chunk)
@@ -998,7 +998,7 @@ def test_stream_response_format() -> None:
 async def test_astream_response_format() -> None:
     full: Optional[BaseMessageChunk] = None
     chunks = []
-    async for chunk in ChatOpenAI(model="gpt-4o-mini").astream(
+    async for chunk in ChatOpenAI(model="gpt-5-nano").astream(
         "how are ya", response_format=Foo
     ):
         chunks.append(chunk)
@@ -1042,7 +1042,7 @@ def test_o1_stream_default_works() -> None:
 
 
 def test_multi_party_conversation() -> None:
-    llm = ChatOpenAI(model="gpt-4o")
+    llm = ChatOpenAI(model="gpt-5-nano")
     messages = [
         HumanMessage("Hi, I have black hair.", name="Alice"),
         HumanMessage("Hi, I have brown hair.", name="Bob"),
@@ -1057,7 +1057,7 @@ def test_structured_output_and_tools() -> None:
         response: str
         explanation: str
 
-    llm = ChatOpenAI(model="gpt-4o-mini").bind_tools(
+    llm = ChatOpenAI(model="gpt-5-nano").bind_tools(
         [GenerateUsername], strict=True, response_format=ResponseFormat
     )
 
@@ -1082,7 +1082,7 @@ def test_tools_and_structured_output() -> None:
         response: str
         explanation: str
 
-    llm = ChatOpenAI(model="gpt-4o-mini").with_structured_output(
+    llm = ChatOpenAI(model="gpt-5-nano").with_structured_output(
         ResponseFormat, strict=True, include_raw=True, tools=[GenerateUsername]
     )
 
@@ -1114,8 +1114,8 @@ def test_tools_and_structured_output() -> None:
 
 @pytest.mark.scheduled
 def test_prompt_cache_key_invoke() -> None:
-    """Test that prompt_cache_key works with invoke calls."""
-    chat = ChatOpenAI(model="gpt-4o-mini", max_completion_tokens=20)
+    """Test that `prompt_cache_key` works with invoke calls."""
+    chat = ChatOpenAI(model="gpt-5-nano", max_completion_tokens=500)
     messages = [HumanMessage("Say hello")]
 
     # Test that invoke works with prompt_cache_key parameter
@@ -1135,18 +1135,18 @@ def test_prompt_cache_key_invoke() -> None:
 
 @pytest.mark.scheduled
 def test_prompt_cache_key_usage_methods_integration() -> None:
-    """Integration test for prompt_cache_key usage methods."""
+    """Integration test for `prompt_cache_key` usage methods."""
     messages = [HumanMessage("Say hi")]
 
     # Test keyword argument method
-    chat = ChatOpenAI(model="gpt-4o-mini", max_completion_tokens=10)
+    chat = ChatOpenAI(model="gpt-5-nano", max_completion_tokens=10)
     response = chat.invoke(messages, prompt_cache_key="integration-test-v1")
     assert isinstance(response, AIMessage)
     assert isinstance(response.content, str)
 
     # Test model-level via model_kwargs
     chat_model_level = ChatOpenAI(
-        model="gpt-4o-mini",
+        model="gpt-5-nano",
         max_completion_tokens=10,
         model_kwargs={"prompt_cache_key": "integration-model-level-v1"},
     )
diff --git a/libs/partners/openai/tests/integration_tests/chat_models/test_responses_api.py b/libs/partners/openai/tests/integration_tests/chat_models/test_responses_api.py
index 4d051c5601e..3b1a3b41e5d 100644
--- a/libs/partners/openai/tests/integration_tests/chat_models/test_responses_api.py
+++ b/libs/partners/openai/tests/integration_tests/chat_models/test_responses_api.py
@@ -175,7 +175,9 @@ class FooDict(TypedDict):
 
 
 def test_parsed_pydantic_schema() -> None:
-    llm = ChatOpenAI(model=MODEL_NAME, use_responses_api=True)
+    llm = ChatOpenAI(
+        model=MODEL_NAME, use_responses_api=True, output_version="responses/v1"
+    )
     response = llm.invoke("how are ya", response_format=Foo)
     parsed = Foo(**json.loads(response.text()))
     assert parsed == response.additional_kwargs["parsed"]
@@ -193,7 +195,9 @@ def test_parsed_pydantic_schema() -> None:
 
 
 async def test_parsed_pydantic_schema_async() -> None:
-    llm = ChatOpenAI(model=MODEL_NAME, use_responses_api=True)
+    llm = ChatOpenAI(
+        model=MODEL_NAME, use_responses_api=True, output_version="responses/v1"
+    )
     response = await llm.ainvoke("how are ya", response_format=Foo)
     parsed = Foo(**json.loads(response.text()))
     assert parsed == response.additional_kwargs["parsed"]
@@ -213,7 +217,9 @@ async def test_parsed_pydantic_schema_async() -> None:
 @pytest.mark.flaky(retries=3, delay=1)
 @pytest.mark.parametrize("schema", [Foo.model_json_schema(), FooDict])
 def test_parsed_dict_schema(schema: Any) -> None:
-    llm = ChatOpenAI(model=MODEL_NAME, use_responses_api=True)
+    llm = ChatOpenAI(
+        model=MODEL_NAME, use_responses_api=True, output_version="responses/v1"
+    )
     response = llm.invoke("how are ya", response_format=schema)
     parsed = json.loads(response.text())
     assert parsed == response.additional_kwargs["parsed"]
@@ -231,7 +237,9 @@ def test_parsed_dict_schema(schema: Any) -> None:
 
 
 def test_parsed_strict() -> None:
-    llm = ChatOpenAI(model=MODEL_NAME, use_responses_api=True)
+    llm = ChatOpenAI(
+        model=MODEL_NAME, use_responses_api=True, output_version="responses/v1"
+    )
 
     class InvalidJoke(TypedDict):
         setup: Annotated[str, ..., "The setup of the joke"]
@@ -258,7 +266,9 @@ def test_parsed_strict() -> None:
 @pytest.mark.flaky(retries=3, delay=1)
 @pytest.mark.parametrize("schema", [Foo.model_json_schema(), FooDict])
 async def test_parsed_dict_schema_async(schema: Any) -> None:
-    llm = ChatOpenAI(model=MODEL_NAME, use_responses_api=True)
+    llm = ChatOpenAI(
+        model=MODEL_NAME, use_responses_api=True, output_version="responses/v1"
+    )
     response = await llm.ainvoke("how are ya", response_format=schema)
     parsed = json.loads(response.text())
     assert parsed == response.additional_kwargs["parsed"]
@@ -280,7 +290,9 @@ def test_function_calling_and_structured_output() -> None:
         """return x * y"""
         return x * y
 
-    llm = ChatOpenAI(model=MODEL_NAME, use_responses_api=True)
+    llm = ChatOpenAI(
+        model=MODEL_NAME, use_responses_api=True, output_version="responses/v1"
+    )
     bound_llm = llm.bind_tools([multiply], response_format=Foo, strict=True)
     # Test structured output
     response = llm.invoke("how are ya", response_format=Foo)
@@ -324,7 +336,9 @@ def test_reasoning(output_version: Literal["v0", "responses/v1"]) -> None:
 
 
 def test_stateful_api() -> None:
-    llm = ChatOpenAI(model=MODEL_NAME, use_responses_api=True)
+    llm = ChatOpenAI(
+        model=MODEL_NAME, use_responses_api=True, output_version="responses/v1"
+    )
     response = llm.invoke("how are you, my name is Bobo")
     assert "id" in response.response_metadata
 
@@ -421,7 +435,9 @@ def test_stream_reasoning_summary(
 
 @pytest.mark.vcr
 def test_code_interpreter() -> None:
-    llm = ChatOpenAI(model="o4-mini", use_responses_api=True)
+    llm = ChatOpenAI(
+        model="o4-mini", use_responses_api=True, output_version="responses/v1"
+    )
     llm_with_tools = llm.bind_tools(
         [{"type": "code_interpreter", "container": {"type": "auto"}}]
     )
@@ -431,13 +447,16 @@ def test_code_interpreter() -> None:
     }
     response = llm_with_tools.invoke([input_message])
     _check_response(response)
-    tool_outputs = response.additional_kwargs["tool_outputs"]
+    tool_outputs = [
+        block
+        for block in response.content
+        if isinstance(block, dict) and block.get("type") == "code_interpreter_call"
+    ]
     assert tool_outputs
     assert any(output["type"] == "code_interpreter_call" for output in tool_outputs)
 
     # Test streaming
     # Use same container
-    tool_outputs = response.additional_kwargs["tool_outputs"]
     assert len(tool_outputs) == 1
     container_id = tool_outputs[0]["container_id"]
     llm_with_tools = llm.bind_tools(
@@ -449,7 +468,11 @@ def test_code_interpreter() -> None:
         assert isinstance(chunk, AIMessageChunk)
         full = chunk if full is None else full + chunk
     assert isinstance(full, AIMessageChunk)
-    tool_outputs = full.additional_kwargs["tool_outputs"]
+    tool_outputs = [
+        block
+        for block in full.content
+        if isinstance(block, dict) and block.get("type") == "code_interpreter_call"
+    ]
     assert tool_outputs
     assert any(output["type"] == "code_interpreter_call" for output in tool_outputs)
 
@@ -460,7 +483,9 @@ def test_code_interpreter() -> None:
 
 @pytest.mark.vcr
 def test_mcp_builtin() -> None:
-    llm = ChatOpenAI(model="o4-mini", use_responses_api=True)
+    llm = ChatOpenAI(
+        model="o4-mini", use_responses_api=True, output_version="responses/v1"
+    )
 
     llm_with_tools = llm.bind_tools(
         [
@@ -489,8 +514,8 @@ def test_mcp_builtin() -> None:
                 "approve": True,
                 "approval_request_id": output["id"],
             }
-            for output in response.additional_kwargs["tool_outputs"]
-            if output["type"] == "mcp_approval_request"
+            for output in response.content
+            if isinstance(output, dict) and output.get("type") == "mcp_approval_request"
         ]
     )
     _ = llm_with_tools.invoke(
@@ -549,7 +574,9 @@ def test_mcp_builtin_zdr() -> None:
 @pytest.mark.vcr()
 def test_image_generation_streaming() -> None:
     """Test image generation streaming."""
-    llm = ChatOpenAI(model="gpt-4.1", use_responses_api=True)
+    llm = ChatOpenAI(
+        model="gpt-4.1", use_responses_api=True, output_version="responses/v1"
+    )
     tool = {
         "type": "image_generation",
         # For testing purposes let's keep the quality low, so the test runs faster.
@@ -596,7 +623,13 @@ def test_image_generation_streaming() -> None:
     # At the moment, the streaming API does not pick up annotations fully.
     # So the following check is commented out.
     # _check_response(complete_ai_message)
-    tool_output = complete_ai_message.additional_kwargs["tool_outputs"][0]
+    tool_outputs = [
+        block
+        for block in complete_ai_message.content
+        if isinstance(block, dict) and block.get("type") == "image_generation_call"
+    ]
+    assert len(tool_outputs) == 1
+    tool_output = tool_outputs[0]
     assert set(tool_output.keys()).issubset(expected_keys)
 
 
@@ -604,7 +637,9 @@ def test_image_generation_streaming() -> None:
 def test_image_generation_multi_turn() -> None:
     """Test multi-turn editing of image generation by passing in history."""
     # Test multi-turn
-    llm = ChatOpenAI(model="gpt-4.1", use_responses_api=True)
+    llm = ChatOpenAI(
+        model="gpt-4.1", use_responses_api=True, output_version="responses/v1"
+    )
     # Test invocation
     tool = {
         "type": "image_generation",
@@ -621,7 +656,13 @@ def test_image_generation_multi_turn() -> None:
     ]
     ai_message = llm_with_tools.invoke(chat_history)
     _check_response(ai_message)
-    tool_output = ai_message.additional_kwargs["tool_outputs"][0]
+    tool_outputs = [
+        block
+        for block in ai_message.content
+        if isinstance(block, dict) and block.get("type") == "image_generation_call"
+    ]
+    assert len(tool_outputs) == 1
+    tool_output = tool_outputs[0]
 
     # Example tool output for an image
     # {
@@ -670,10 +711,40 @@ def test_image_generation_multi_turn() -> None:
 
     ai_message2 = llm_with_tools.invoke(chat_history)
     _check_response(ai_message2)
-    tool_output2 = ai_message2.additional_kwargs["tool_outputs"][0]
+    tool_outputs2 = [
+        block
+        for block in ai_message2.content
+        if isinstance(block, dict) and block.get("type") == "image_generation_call"
+    ]
+    assert len(tool_outputs2) == 1
+    tool_output2 = tool_outputs2[0]
     assert set(tool_output2.keys()).issubset(expected_keys)
 
 
+@pytest.mark.xfail(
+    reason="verbosity parameter not yet supported by OpenAI Responses API"
+)
+def test_verbosity_parameter() -> None:
+    """Test verbosity parameter with Responses API.
+
+    TODO: This test is expected to fail until OpenAI enables verbosity support
+    in the Responses API for available models. The parameter is properly implemented
+    in the codebase but the API currently returns 'Unknown parameter: verbosity'.
+    Remove @pytest.mark.xfail when OpenAI adds support.
+    """
+    llm = ChatOpenAI(
+        model=MODEL_NAME,
+        verbosity="medium",
+        use_responses_api=True,
+        output_version="responses/v1",
+    )
+    response = llm.invoke([HumanMessage(content="Hello, explain quantum computing.")])
+
+    assert isinstance(response, AIMessage)
+    assert response.content
+    # When verbosity works, we expect the response to respect the verbosity level
+
+
 @pytest.mark.vcr()
 def test_custom_tool() -> None:
     @custom_tool
diff --git a/libs/partners/openai/tests/unit_tests/chat_models/test_base.py b/libs/partners/openai/tests/unit_tests/chat_models/test_base.py
index c4176711482..73185790602 100644
--- a/libs/partners/openai/tests/unit_tests/chat_models/test_base.py
+++ b/libs/partners/openai/tests/unit_tests/chat_models/test_base.py
@@ -874,8 +874,13 @@ def test_get_num_tokens_from_messages() -> None:
         ),
         ToolMessage("foobar", tool_call_id="foo"),
     ]
-    expected = 176
-    actual = llm.get_num_tokens_from_messages(messages)
+    expected = 431  # Updated to match token count with mocked 100x100 image
+
+    # Mock _url_to_size to avoid PIL dependency in unit tests
+    with patch("langchain_openai.chat_models.base._url_to_size") as mock_url_to_size:
+        mock_url_to_size.return_value = (100, 100)  # 100x100 pixel image
+        actual = llm.get_num_tokens_from_messages(messages)
+
     assert expected == actual
 
     # Test file inputs
@@ -1131,6 +1136,73 @@ def test_init_o1() -> None:
     assert len(record) == 0
 
 
+def test_init_minimal_reasoning_effort() -> None:
+    with pytest.warns(None) as record:  # type: ignore[call-overload]
+        ChatOpenAI(model="gpt-5", reasoning_effort="minimal")
+    assert len(record) == 0
+
+
+@pytest.mark.parametrize("use_responses_api", [False, True])
+@pytest.mark.parametrize("use_max_completion_tokens", [True, False])
+def test_minimal_reasoning_effort_payload(
+    use_max_completion_tokens: bool, use_responses_api: bool
+) -> None:
+    """Test that minimal reasoning effort is included in request payload."""
+    if use_max_completion_tokens:
+        kwargs = {"max_completion_tokens": 100}
+    else:
+        kwargs = {"max_tokens": 100}
+
+    init_kwargs: dict[str, Any] = {
+        "model": "gpt-5",
+        "reasoning_effort": "minimal",
+        "use_responses_api": use_responses_api,
+        **kwargs,
+    }
+
+    if use_responses_api:
+        init_kwargs["output_version"] = "responses/v1"
+
+    llm = ChatOpenAI(**init_kwargs)
+
+    messages = [
+        {"role": "developer", "content": "respond with just 'test'"},
+        {"role": "user", "content": "hello"},
+    ]
+
+    payload = llm._get_request_payload(messages, stop=None)
+
+    # When using responses API, reasoning_effort becomes reasoning.effort
+    if use_responses_api:
+        assert "reasoning" in payload
+        assert payload["reasoning"]["effort"] == "minimal"
+        # For responses API, tokens param becomes max_output_tokens
+        assert payload["max_output_tokens"] == 100
+    else:
+        # For non-responses API, reasoning_effort remains as is
+        assert payload["reasoning_effort"] == "minimal"
+        if use_max_completion_tokens:
+            assert payload["max_completion_tokens"] == 100
+        else:
+            # max_tokens gets converted to max_completion_tokens in non-responses API
+            assert payload["max_completion_tokens"] == 100
+
+
+def test_verbosity_parameter_payload() -> None:
+    """Test verbosity parameter is included in request payload for Responses API."""
+    llm = ChatOpenAI(
+        model="gpt-5",
+        verbosity="high",
+        use_responses_api=True,
+        output_version="responses/v1",
+    )
+
+    messages = [{"role": "user", "content": "hello"}]
+    payload = llm._get_request_payload(messages, stop=None)
+
+    assert payload["verbosity"] == "high"
+
+
 def test_structured_output_old_model() -> None:
     class Output(TypedDict):
         """output."""
@@ -2198,7 +2270,9 @@ def test__construct_responses_api_input_multiple_message_types() -> None:
     assert messages_copy == messages
 
     # Test dict messages
-    llm = ChatOpenAI(model="o4-mini", use_responses_api=True)
+    llm = ChatOpenAI(
+        model="o4-mini", use_responses_api=True, output_version="responses/v1"
+    )
     message_dicts: list = [
         {"role": "developer", "content": "This is a developer message."},
         {
@@ -2239,7 +2313,9 @@ class FakeTracer(BaseTracer):
 
 def test_mcp_tracing() -> None:
     # Test we exclude sensitive information from traces
-    llm = ChatOpenAI(model="o4-mini", use_responses_api=True)
+    llm = ChatOpenAI(
+        model="o4-mini", use_responses_api=True, output_version="responses/v1"
+    )
 
     tracer = FakeTracer()
     mock_client = MagicMock()
@@ -2430,7 +2506,9 @@ def test_get_last_messages() -> None:
 
 def test_get_request_payload_use_previous_response_id() -> None:
     # Default - don't use previous_response ID
-    llm = ChatOpenAI(model="o4-mini", use_responses_api=True)
+    llm = ChatOpenAI(
+        model="o4-mini", use_responses_api=True, output_version="responses/v1"
+    )
     messages = [
         HumanMessage("Hello"),
         AIMessage("Hi there!", response_metadata={"id": "resp_123"}),
diff --git a/libs/partners/openai/tests/unit_tests/chat_models/test_prompt_cache_key.py b/libs/partners/openai/tests/unit_tests/chat_models/test_prompt_cache_key.py
index 1f6c8c5d583..1aad6baff79 100644
--- a/libs/partners/openai/tests/unit_tests/chat_models/test_prompt_cache_key.py
+++ b/libs/partners/openai/tests/unit_tests/chat_models/test_prompt_cache_key.py
@@ -71,7 +71,10 @@ def test_prompt_cache_key_model_kwargs() -> None:
 def test_prompt_cache_key_responses_api() -> None:
     """Test that prompt_cache_key works with Responses API."""
     chat = ChatOpenAI(
-        model="gpt-4o-mini", use_responses_api=True, max_completion_tokens=10
+        model="gpt-4o-mini",
+        use_responses_api=True,
+        output_version="responses/v1",
+        max_completion_tokens=10,
     )
 
     messages = [HumanMessage("Hello")]
diff --git a/libs/partners/openai/tests/unit_tests/test_tools.py b/libs/partners/openai/tests/unit_tests/test_tools.py
index 106aa0aa080..63b097e6248 100644
--- a/libs/partners/openai/tests/unit_tests/test_tools.py
+++ b/libs/partners/openai/tests/unit_tests/test_tools.py
@@ -37,7 +37,9 @@ def test_custom_tool() -> None:
         """Do thing."""
         pass
 
-    llm = ChatOpenAI(model="gpt-4.1", use_responses_api=True).bind_tools([another_tool])
+    llm = ChatOpenAI(
+        model="gpt-4.1", use_responses_api=True, output_version="responses/v1"
+    ).bind_tools([another_tool])
     assert llm.kwargs == {  # type: ignore[attr-defined]
         "tools": [
             {
@@ -49,7 +51,9 @@ def test_custom_tool() -> None:
         ]
     }
 
-    llm = ChatOpenAI(model="gpt-4.1", use_responses_api=True).bind_tools([my_tool])
+    llm = ChatOpenAI(
+        model="gpt-4.1", use_responses_api=True, output_version="responses/v1"
+    ).bind_tools([my_tool])
     assert llm.kwargs == {  # type: ignore[attr-defined]
         "tools": [{"type": "custom", "name": "my_tool", "description": "Do thing."}]
     }
diff --git a/libs/partners/openai/uv.lock b/libs/partners/openai/uv.lock
index 38b8da21426..58a1807abe3 100644
--- a/libs/partners/openai/uv.lock
+++ b/libs/partners/openai/uv.lock
@@ -1,5 +1,5 @@
 version = 1
-revision = 2
+revision = 3
 requires-python = ">=3.9"
 resolution-markers = [
     "python_full_version >= '3.13' and platform_python_implementation == 'PyPy'",
@@ -480,7 +480,7 @@ wheels = [
 
 [[package]]
 name = "langchain-core"
-version = "0.3.73"
+version = "0.3.74"
 source = { editable = "../../core" }
 dependencies = [
     { name = "jsonpatch" },

From 02001212b0a2b37d90451d8493089389ea220cab Mon Sep 17 00:00:00 2001
From: ccurme <chester.curme@gmail.com>
Date: Fri, 8 Aug 2025 09:51:18 -0300
Subject: [PATCH 5/5] fix(openai): revert some changes (#32462)

Keep coverage on `output_version="v0"` (increasing coverage is being
managed in v0.4 branch).
---
 .../chat_models/test_responses_api.py         | 85 +++++--------------
 1 file changed, 19 insertions(+), 66 deletions(-)

diff --git a/libs/partners/openai/tests/integration_tests/chat_models/test_responses_api.py b/libs/partners/openai/tests/integration_tests/chat_models/test_responses_api.py
index 3b1a3b41e5d..32d3f199c85 100644
--- a/libs/partners/openai/tests/integration_tests/chat_models/test_responses_api.py
+++ b/libs/partners/openai/tests/integration_tests/chat_models/test_responses_api.py
@@ -175,9 +175,7 @@ class FooDict(TypedDict):
 
 
 def test_parsed_pydantic_schema() -> None:
-    llm = ChatOpenAI(
-        model=MODEL_NAME, use_responses_api=True, output_version="responses/v1"
-    )
+    llm = ChatOpenAI(model=MODEL_NAME, use_responses_api=True)
     response = llm.invoke("how are ya", response_format=Foo)
     parsed = Foo(**json.loads(response.text()))
     assert parsed == response.additional_kwargs["parsed"]
@@ -195,9 +193,7 @@ def test_parsed_pydantic_schema() -> None:
 
 
 async def test_parsed_pydantic_schema_async() -> None:
-    llm = ChatOpenAI(
-        model=MODEL_NAME, use_responses_api=True, output_version="responses/v1"
-    )
+    llm = ChatOpenAI(model=MODEL_NAME, use_responses_api=True)
     response = await llm.ainvoke("how are ya", response_format=Foo)
     parsed = Foo(**json.loads(response.text()))
     assert parsed == response.additional_kwargs["parsed"]
@@ -217,9 +213,7 @@ async def test_parsed_pydantic_schema_async() -> None:
 @pytest.mark.flaky(retries=3, delay=1)
 @pytest.mark.parametrize("schema", [Foo.model_json_schema(), FooDict])
 def test_parsed_dict_schema(schema: Any) -> None:
-    llm = ChatOpenAI(
-        model=MODEL_NAME, use_responses_api=True, output_version="responses/v1"
-    )
+    llm = ChatOpenAI(model=MODEL_NAME, use_responses_api=True)
     response = llm.invoke("how are ya", response_format=schema)
     parsed = json.loads(response.text())
     assert parsed == response.additional_kwargs["parsed"]
@@ -237,9 +231,7 @@ def test_parsed_dict_schema(schema: Any) -> None:
 
 
 def test_parsed_strict() -> None:
-    llm = ChatOpenAI(
-        model=MODEL_NAME, use_responses_api=True, output_version="responses/v1"
-    )
+    llm = ChatOpenAI(model=MODEL_NAME, use_responses_api=True)
 
     class InvalidJoke(TypedDict):
         setup: Annotated[str, ..., "The setup of the joke"]
@@ -266,9 +258,7 @@ def test_parsed_strict() -> None:
 @pytest.mark.flaky(retries=3, delay=1)
 @pytest.mark.parametrize("schema", [Foo.model_json_schema(), FooDict])
 async def test_parsed_dict_schema_async(schema: Any) -> None:
-    llm = ChatOpenAI(
-        model=MODEL_NAME, use_responses_api=True, output_version="responses/v1"
-    )
+    llm = ChatOpenAI(model=MODEL_NAME, use_responses_api=True)
     response = await llm.ainvoke("how are ya", response_format=schema)
     parsed = json.loads(response.text())
     assert parsed == response.additional_kwargs["parsed"]
@@ -290,9 +280,7 @@ def test_function_calling_and_structured_output() -> None:
         """return x * y"""
         return x * y
 
-    llm = ChatOpenAI(
-        model=MODEL_NAME, use_responses_api=True, output_version="responses/v1"
-    )
+    llm = ChatOpenAI(model=MODEL_NAME, use_responses_api=True)
     bound_llm = llm.bind_tools([multiply], response_format=Foo, strict=True)
     # Test structured output
     response = llm.invoke("how are ya", response_format=Foo)
@@ -336,9 +324,7 @@ def test_reasoning(output_version: Literal["v0", "responses/v1"]) -> None:
 
 
 def test_stateful_api() -> None:
-    llm = ChatOpenAI(
-        model=MODEL_NAME, use_responses_api=True, output_version="responses/v1"
-    )
+    llm = ChatOpenAI(model=MODEL_NAME, use_responses_api=True)
     response = llm.invoke("how are you, my name is Bobo")
     assert "id" in response.response_metadata
 
@@ -435,9 +421,7 @@ def test_stream_reasoning_summary(
 
 @pytest.mark.vcr
 def test_code_interpreter() -> None:
-    llm = ChatOpenAI(
-        model="o4-mini", use_responses_api=True, output_version="responses/v1"
-    )
+    llm = ChatOpenAI(model="o4-mini", use_responses_api=True)
     llm_with_tools = llm.bind_tools(
         [{"type": "code_interpreter", "container": {"type": "auto"}}]
     )
@@ -447,16 +431,13 @@ def test_code_interpreter() -> None:
     }
     response = llm_with_tools.invoke([input_message])
     _check_response(response)
-    tool_outputs = [
-        block
-        for block in response.content
-        if isinstance(block, dict) and block.get("type") == "code_interpreter_call"
-    ]
+    tool_outputs = response.additional_kwargs["tool_outputs"]
     assert tool_outputs
     assert any(output["type"] == "code_interpreter_call" for output in tool_outputs)
 
     # Test streaming
     # Use same container
+    tool_outputs = response.additional_kwargs["tool_outputs"]
     assert len(tool_outputs) == 1
     container_id = tool_outputs[0]["container_id"]
     llm_with_tools = llm.bind_tools(
@@ -468,11 +449,7 @@ def test_code_interpreter() -> None:
         assert isinstance(chunk, AIMessageChunk)
         full = chunk if full is None else full + chunk
     assert isinstance(full, AIMessageChunk)
-    tool_outputs = [
-        block
-        for block in full.content
-        if isinstance(block, dict) and block.get("type") == "code_interpreter_call"
-    ]
+    tool_outputs = full.additional_kwargs["tool_outputs"]
     assert tool_outputs
     assert any(output["type"] == "code_interpreter_call" for output in tool_outputs)
 
@@ -483,9 +460,7 @@ def test_code_interpreter() -> None:
 
 @pytest.mark.vcr
 def test_mcp_builtin() -> None:
-    llm = ChatOpenAI(
-        model="o4-mini", use_responses_api=True, output_version="responses/v1"
-    )
+    llm = ChatOpenAI(model="o4-mini", use_responses_api=True)
 
     llm_with_tools = llm.bind_tools(
         [
@@ -514,8 +489,8 @@ def test_mcp_builtin() -> None:
                 "approve": True,
                 "approval_request_id": output["id"],
             }
-            for output in response.content
-            if isinstance(output, dict) and output.get("type") == "mcp_approval_request"
+            for output in response.additional_kwargs["tool_outputs"]
+            if output["type"] == "mcp_approval_request"
         ]
     )
     _ = llm_with_tools.invoke(
@@ -574,9 +549,7 @@ def test_mcp_builtin_zdr() -> None:
 @pytest.mark.vcr()
 def test_image_generation_streaming() -> None:
     """Test image generation streaming."""
-    llm = ChatOpenAI(
-        model="gpt-4.1", use_responses_api=True, output_version="responses/v1"
-    )
+    llm = ChatOpenAI(model="gpt-4.1", use_responses_api=True)
     tool = {
         "type": "image_generation",
         # For testing purposes let's keep the quality low, so the test runs faster.
@@ -623,13 +596,7 @@ def test_image_generation_streaming() -> None:
     # At the moment, the streaming API does not pick up annotations fully.
     # So the following check is commented out.
     # _check_response(complete_ai_message)
-    tool_outputs = [
-        block
-        for block in complete_ai_message.content
-        if isinstance(block, dict) and block.get("type") == "image_generation_call"
-    ]
-    assert len(tool_outputs) == 1
-    tool_output = tool_outputs[0]
+    tool_output = complete_ai_message.additional_kwargs["tool_outputs"][0]
     assert set(tool_output.keys()).issubset(expected_keys)
 
 
@@ -637,9 +604,7 @@ def test_image_generation_streaming() -> None:
 def test_image_generation_multi_turn() -> None:
     """Test multi-turn editing of image generation by passing in history."""
     # Test multi-turn
-    llm = ChatOpenAI(
-        model="gpt-4.1", use_responses_api=True, output_version="responses/v1"
-    )
+    llm = ChatOpenAI(model="gpt-4.1", use_responses_api=True)
     # Test invocation
     tool = {
         "type": "image_generation",
@@ -656,13 +621,7 @@ def test_image_generation_multi_turn() -> None:
     ]
     ai_message = llm_with_tools.invoke(chat_history)
     _check_response(ai_message)
-    tool_outputs = [
-        block
-        for block in ai_message.content
-        if isinstance(block, dict) and block.get("type") == "image_generation_call"
-    ]
-    assert len(tool_outputs) == 1
-    tool_output = tool_outputs[0]
+    tool_output = ai_message.additional_kwargs["tool_outputs"][0]
 
     # Example tool output for an image
     # {
@@ -711,13 +670,7 @@ def test_image_generation_multi_turn() -> None:
 
     ai_message2 = llm_with_tools.invoke(chat_history)
     _check_response(ai_message2)
-    tool_outputs2 = [
-        block
-        for block in ai_message2.content
-        if isinstance(block, dict) and block.get("type") == "image_generation_call"
-    ]
-    assert len(tool_outputs2) == 1
-    tool_output2 = tool_outputs2[0]
+    tool_output2 = ai_message2.additional_kwargs["tool_outputs"][0]
     assert set(tool_output2.keys()).issubset(expected_keys)