From ec2b34a02ddf3b6ac7a9ce4b632d3c7b2d61657c Mon Sep 17 00:00:00 2001 From: ccurme Date: Thu, 7 Aug 2025 17:30:01 -0300 Subject: [PATCH 1/5] feat(openai): custom tools (#32449) --- docs/docs/integrations/chat/openai.ipynb | 157 ++++++++++++++++++ libs/core/langchain_core/tools/base.py | 9 +- .../langchain_core/utils/function_calling.py | 11 ++ .../openai/langchain_openai/__init__.py | 2 + .../langchain_openai/chat_models/base.py | 49 +++++- .../openai/langchain_openai/tools/__init__.py | 3 + .../langchain_openai/tools/custom_tool.py | 109 ++++++++++++ .../tests/cassettes/test_custom_tool.yaml.gz | Bin 0 -> 4367 bytes .../chat_models/test_responses_api.py | 31 +++- .../openai/tests/unit_tests/test_imports.py | 1 + .../openai/tests/unit_tests/test_tools.py | 120 +++++++++++++ 11 files changed, 488 insertions(+), 4 deletions(-) create mode 100644 libs/partners/openai/langchain_openai/tools/__init__.py create mode 100644 libs/partners/openai/langchain_openai/tools/custom_tool.py create mode 100644 libs/partners/openai/tests/cassettes/test_custom_tool.yaml.gz create mode 100644 libs/partners/openai/tests/unit_tests/test_tools.py diff --git a/docs/docs/integrations/chat/openai.ipynb b/docs/docs/integrations/chat/openai.ipynb index 81b0684712a..54db230ba95 100644 --- a/docs/docs/integrations/chat/openai.ipynb +++ b/docs/docs/integrations/chat/openai.ipynb @@ -447,6 +447,163 @@ ")" ] }, + { + "cell_type": "markdown", + "id": "c5d9d19d-8ab1-4d9d-b3a0-56ee4e89c528", + "metadata": {}, + "source": [ + "### Custom tools\n", + "\n", + ":::info Requires ``langchain-openai>=0.3.29``\n", + "\n", + ":::\n", + "\n", + "[Custom tools](https://platform.openai.com/docs/guides/function-calling#custom-tools) support tools with arbitrary string inputs. They can be particularly useful when you expect your string arguments to be long or complex." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "a47c809b-852f-46bd-8b9e-d9534c17213d", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "================================\u001b[1m Human Message \u001b[0m=================================\n", + "\n", + "Use the tool to calculate 3^3.\n", + "==================================\u001b[1m Ai Message \u001b[0m==================================\n", + "\n", + "[{'id': 'rs_6894ff5747c0819d9b02fc5645b0be9c000169fd9fb68d99', 'summary': [], 'type': 'reasoning'}, {'call_id': 'call_7SYwMSQPbbEqFcKlKOpXeEux', 'input': 'print(3**3)', 'name': 'execute_code', 'type': 'custom_tool_call', 'id': 'ctc_6894ff5b9f54819d8155a63638d34103000169fd9fb68d99', 'status': 'completed'}]\n", + "Tool Calls:\n", + " execute_code (call_7SYwMSQPbbEqFcKlKOpXeEux)\n", + " Call ID: call_7SYwMSQPbbEqFcKlKOpXeEux\n", + " Args:\n", + " __arg1: print(3**3)\n", + "=================================\u001b[1m Tool Message \u001b[0m=================================\n", + "Name: execute_code\n", + "\n", + "[{'type': 'custom_tool_call_output', 'output': '27'}]\n", + "==================================\u001b[1m Ai Message \u001b[0m==================================\n", + "\n", + "[{'type': 'text', 'text': '27', 'annotations': [], 'id': 'msg_6894ff5db3b8819d9159b3a370a25843000169fd9fb68d99'}]\n" + ] + } + ], + "source": [ + "from langchain_openai import ChatOpenAI, custom_tool\n", + "from langgraph.prebuilt import create_react_agent\n", + "\n", + "\n", + "@custom_tool\n", + "def execute_code(code: str) -> str:\n", + " \"\"\"Execute python code.\"\"\"\n", + " return \"27\"\n", + "\n", + "\n", + "llm = ChatOpenAI(model=\"gpt-5\", output_version=\"responses/v1\")\n", + "\n", + "agent = create_react_agent(llm, [execute_code])\n", + "\n", + "input_message = {\"role\": \"user\", \"content\": \"Use the tool to calculate 3^3.\"}\n", + "for step in agent.stream(\n", + " {\"messages\": [input_message]},\n", + " stream_mode=\"values\",\n", + "):\n", + " step[\"messages\"][-1].pretty_print()" + ] + }, + { + "cell_type": "markdown", + "id": "5ef93be6-6d4c-4eea-acfd-248774074082", + "metadata": {}, + "source": [ + "
\n", + "Context-free grammars\n", + "\n", + "OpenAI supports the specification of a [context-free grammar](https://platform.openai.com/docs/guides/function-calling#context-free-grammars) for custom tool inputs in `lark` or `regex` format. See [OpenAI docs](https://platform.openai.com/docs/guides/function-calling#context-free-grammars) for details. The `format` parameter can be passed into `@custom_tool` as shown below:" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "2ae04586-be33-49c6-8947-7867801d868f", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "================================\u001b[1m Human Message \u001b[0m=================================\n", + "\n", + "Use the tool to calculate 3^3.\n", + "==================================\u001b[1m Ai Message \u001b[0m==================================\n", + "\n", + "[{'id': 'rs_689500828a8481a297ff0f98e328689c0681550c89797f43', 'summary': [], 'type': 'reasoning'}, {'call_id': 'call_jzH01RVhu6EFz7yUrOFXX55s', 'input': '3 * 3 * 3', 'name': 'do_math', 'type': 'custom_tool_call', 'id': 'ctc_6895008d57bc81a2b84d0993517a66b90681550c89797f43', 'status': 'completed'}]\n", + "Tool Calls:\n", + " do_math (call_jzH01RVhu6EFz7yUrOFXX55s)\n", + " Call ID: call_jzH01RVhu6EFz7yUrOFXX55s\n", + " Args:\n", + " __arg1: 3 * 3 * 3\n", + "=================================\u001b[1m Tool Message \u001b[0m=================================\n", + "Name: do_math\n", + "\n", + "[{'type': 'custom_tool_call_output', 'output': '27'}]\n", + "==================================\u001b[1m Ai Message \u001b[0m==================================\n", + "\n", + "[{'type': 'text', 'text': '27', 'annotations': [], 'id': 'msg_6895009776b881a2a25f0be8507d08f20681550c89797f43'}]\n" + ] + } + ], + "source": [ + "from langchain_openai import ChatOpenAI, custom_tool\n", + "from langgraph.prebuilt import create_react_agent\n", + "\n", + "grammar = \"\"\"\n", + "start: expr\n", + "expr: term (SP ADD SP term)* -> add\n", + "| term\n", + "term: factor (SP MUL SP factor)* -> mul\n", + "| factor\n", + "factor: INT\n", + "SP: \" \"\n", + "ADD: \"+\"\n", + "MUL: \"*\"\n", + "%import common.INT\n", + "\"\"\"\n", + "\n", + "format_ = {\"type\": \"grammar\", \"syntax\": \"lark\", \"definition\": grammar}\n", + "\n", + "\n", + "# highlight-next-line\n", + "@custom_tool(format=format_)\n", + "def do_math(input_string: str) -> str:\n", + " \"\"\"Do a mathematical operation.\"\"\"\n", + " return \"27\"\n", + "\n", + "\n", + "llm = ChatOpenAI(model=\"gpt-5\", output_version=\"responses/v1\")\n", + "\n", + "agent = create_react_agent(llm, [do_math])\n", + "\n", + "input_message = {\"role\": \"user\", \"content\": \"Use the tool to calculate 3^3.\"}\n", + "for step in agent.stream(\n", + " {\"messages\": [input_message]},\n", + " stream_mode=\"values\",\n", + "):\n", + " step[\"messages\"][-1].pretty_print()" + ] + }, + { + "cell_type": "markdown", + "id": "c63430c9-c7b0-4e92-a491-3f165dddeb8f", + "metadata": {}, + "source": [ + "
" + ] + }, { "cell_type": "markdown", "id": "84833dd0-17e9-4269-82ed-550639d65751", diff --git a/libs/core/langchain_core/tools/base.py b/libs/core/langchain_core/tools/base.py index e54a09709d6..34ca4b4da30 100644 --- a/libs/core/langchain_core/tools/base.py +++ b/libs/core/langchain_core/tools/base.py @@ -74,7 +74,14 @@ if TYPE_CHECKING: from collections.abc import Sequence FILTERED_ARGS = ("run_manager", "callbacks") -TOOL_MESSAGE_BLOCK_TYPES = ("text", "image_url", "image", "json", "search_result") +TOOL_MESSAGE_BLOCK_TYPES = ( + "text", + "image_url", + "image", + "json", + "search_result", + "custom_tool_call_output", +) class SchemaAnnotationError(TypeError): diff --git a/libs/core/langchain_core/utils/function_calling.py b/libs/core/langchain_core/utils/function_calling.py index d7059fded47..609129ac58b 100644 --- a/libs/core/langchain_core/utils/function_calling.py +++ b/libs/core/langchain_core/utils/function_calling.py @@ -575,12 +575,23 @@ def convert_to_openai_tool( Added support for OpenAI's image generation built-in tool. """ + from langchain_core.tools import Tool + if isinstance(tool, dict): if tool.get("type") in _WellKnownOpenAITools: return tool # As of 03.12.25 can be "web_search_preview" or "web_search_preview_2025_03_11" if (tool.get("type") or "").startswith("web_search_preview"): return tool + if isinstance(tool, Tool) and (tool.metadata or {}).get("type") == "custom_tool": + oai_tool = { + "type": "custom", + "name": tool.name, + "description": tool.description, + } + if tool.metadata is not None and "format" in tool.metadata: + oai_tool["format"] = tool.metadata["format"] + return oai_tool oai_function = convert_to_openai_function(tool, strict=strict) return {"type": "function", "function": oai_function} diff --git a/libs/partners/openai/langchain_openai/__init__.py b/libs/partners/openai/langchain_openai/__init__.py index a1756f0526d..40a94c25ce1 100644 --- a/libs/partners/openai/langchain_openai/__init__.py +++ b/libs/partners/openai/langchain_openai/__init__.py @@ -1,6 +1,7 @@ from langchain_openai.chat_models import AzureChatOpenAI, ChatOpenAI from langchain_openai.embeddings import AzureOpenAIEmbeddings, OpenAIEmbeddings from langchain_openai.llms import AzureOpenAI, OpenAI +from langchain_openai.tools import custom_tool __all__ = [ "OpenAI", @@ -9,4 +10,5 @@ __all__ = [ "AzureOpenAI", "AzureChatOpenAI", "AzureOpenAIEmbeddings", + "custom_tool", ] diff --git a/libs/partners/openai/langchain_openai/chat_models/base.py b/libs/partners/openai/langchain_openai/chat_models/base.py index 56220d9f531..5d0b480ce18 100644 --- a/libs/partners/openai/langchain_openai/chat_models/base.py +++ b/libs/partners/openai/langchain_openai/chat_models/base.py @@ -3582,6 +3582,20 @@ def _make_computer_call_output_from_message(message: ToolMessage) -> dict: return computer_call_output +def _make_custom_tool_output_from_message(message: ToolMessage) -> Optional[dict]: + custom_tool_output = None + for block in message.content: + if isinstance(block, dict) and block.get("type") == "custom_tool_call_output": + custom_tool_output = { + "type": "custom_tool_call_output", + "call_id": message.tool_call_id, + "output": block.get("output") or "", + } + break + + return custom_tool_output + + def _pop_index_and_sub_index(block: dict) -> dict: """When streaming, langchain-core uses the ``index`` key to aggregate text blocks. OpenAI API does not support this key, so we need to remove it. @@ -3608,7 +3622,10 @@ def _construct_responses_api_input(messages: Sequence[BaseMessage]) -> list: msg.pop("name") if msg["role"] == "tool": tool_output = msg["content"] - if lc_msg.additional_kwargs.get("type") == "computer_call_output": + custom_tool_output = _make_custom_tool_output_from_message(lc_msg) # type: ignore[arg-type] + if custom_tool_output: + input_.append(custom_tool_output) + elif lc_msg.additional_kwargs.get("type") == "computer_call_output": computer_call_output = _make_computer_call_output_from_message( cast(ToolMessage, lc_msg) ) @@ -3663,6 +3680,7 @@ def _construct_responses_api_input(messages: Sequence[BaseMessage]) -> list: "file_search_call", "function_call", "computer_call", + "custom_tool_call", "code_interpreter_call", "mcp_call", "mcp_list_tools", @@ -3690,7 +3708,8 @@ def _construct_responses_api_input(messages: Sequence[BaseMessage]) -> list: content_call_ids = { block["call_id"] for block in input_ - if block.get("type") == "function_call" and "call_id" in block + if block.get("type") in ("function_call", "custom_tool_call") + and "call_id" in block } for tool_call in tool_calls: if tool_call["id"] not in content_call_ids: @@ -3841,6 +3860,15 @@ def _construct_lc_result_from_responses_api( "error": error, } invalid_tool_calls.append(tool_call) + elif output.type == "custom_tool_call": + content_blocks.append(output.model_dump(exclude_none=True, mode="json")) + tool_call = { + "type": "tool_call", + "name": output.name, + "args": {"__arg1": output.input}, + "id": output.call_id, + } + tool_calls.append(tool_call) elif output.type in ( "reasoning", "web_search_call", @@ -4044,6 +4072,23 @@ def _convert_responses_chunk_to_generation_chunk( tool_output = chunk.item.model_dump(exclude_none=True, mode="json") tool_output["index"] = current_index content.append(tool_output) + elif ( + chunk.type == "response.output_item.done" + and chunk.item.type == "custom_tool_call" + ): + _advance(chunk.output_index) + tool_output = chunk.item.model_dump(exclude_none=True, mode="json") + tool_output["index"] = current_index + content.append(tool_output) + tool_call_chunks.append( + { + "type": "tool_call_chunk", + "name": chunk.item.name, + "args": json.dumps({"__arg1": chunk.item.input}), + "id": chunk.item.call_id, + "index": current_index, + } + ) elif chunk.type == "response.function_call_arguments.delta": _advance(chunk.output_index) tool_call_chunks.append( diff --git a/libs/partners/openai/langchain_openai/tools/__init__.py b/libs/partners/openai/langchain_openai/tools/__init__.py new file mode 100644 index 00000000000..11e5dd9c95a --- /dev/null +++ b/libs/partners/openai/langchain_openai/tools/__init__.py @@ -0,0 +1,3 @@ +from langchain_openai.tools.custom_tool import custom_tool + +__all__ = ["custom_tool"] diff --git a/libs/partners/openai/langchain_openai/tools/custom_tool.py b/libs/partners/openai/langchain_openai/tools/custom_tool.py new file mode 100644 index 00000000000..eb527083476 --- /dev/null +++ b/libs/partners/openai/langchain_openai/tools/custom_tool.py @@ -0,0 +1,109 @@ +import inspect +from collections.abc import Awaitable +from typing import Any, Callable + +from langchain_core.tools import tool + + +def _make_wrapped_func(func: Callable[..., str]) -> Callable[..., list[dict[str, Any]]]: + def wrapped(x: str) -> list[dict[str, Any]]: + return [{"type": "custom_tool_call_output", "output": func(x)}] + + return wrapped + + +def _make_wrapped_coroutine( + coroutine: Callable[..., Awaitable[str]], +) -> Callable[..., Awaitable[list[dict[str, Any]]]]: + async def wrapped(*args: Any, **kwargs: Any) -> list[dict[str, Any]]: + result = await coroutine(*args, **kwargs) + return [{"type": "custom_tool_call_output", "output": result}] + + return wrapped + + +def custom_tool(*args: Any, **kwargs: Any) -> Any: + """Decorator to create an OpenAI custom tool. + + Custom tools allow for tools with (potentially long) freeform string inputs. + + See below for an example using LangGraph: + + .. code-block:: python + + @custom_tool + def execute_code(code: str) -> str: + \"\"\"Execute python code.\"\"\" + return "27" + + + llm = ChatOpenAI(model="gpt-5", output_version="responses/v1") + + agent = create_react_agent(llm, [execute_code]) + + input_message = {"role": "user", "content": "Use the tool to calculate 3^3."} + for step in agent.stream( + {"messages": [input_message]}, + stream_mode="values", + ): + step["messages"][-1].pretty_print() + + You can also specify a format for a corresponding context-free grammar using the + ``format`` kwarg: + + .. code-block:: python + + from langchain_openai import ChatOpenAI, custom_tool + from langgraph.prebuilt import create_react_agent + + grammar = \"\"\" + start: expr + expr: term (SP ADD SP term)* -> add + | term + term: factor (SP MUL SP factor)* -> mul + | factor + factor: INT + SP: " " + ADD: "+" + MUL: "*" + %import common.INT + \"\"\" + + format = {"type": "grammar", "syntax": "lark", "definition": grammar} + + # highlight-next-line + @custom_tool(format=format) + def do_math(input_string: str) -> str: + \"\"\"Do a mathematical operation.\"\"\" + return "27" + + + llm = ChatOpenAI(model="gpt-5", output_version="responses/v1") + + agent = create_react_agent(llm, [do_math]) + + input_message = {"role": "user", "content": "Use the tool to calculate 3^3."} + for step in agent.stream( + {"messages": [input_message]}, + stream_mode="values", + ): + step["messages"][-1].pretty_print() + """ + + def decorator(func: Callable[..., Any]) -> Any: + metadata = {"type": "custom_tool"} + if "format" in kwargs: + metadata["format"] = kwargs.pop("format") + tool_obj = tool(infer_schema=False, **kwargs)(func) + tool_obj.metadata = metadata + tool_obj.description = func.__doc__ + if inspect.iscoroutinefunction(func): + tool_obj.coroutine = _make_wrapped_coroutine(func) + else: + tool_obj.func = _make_wrapped_func(func) + return tool_obj + + if args and callable(args[0]) and not kwargs: + return decorator(args[0]) + + return decorator diff --git a/libs/partners/openai/tests/cassettes/test_custom_tool.yaml.gz b/libs/partners/openai/tests/cassettes/test_custom_tool.yaml.gz new file mode 100644 index 0000000000000000000000000000000000000000..3a0ea3d888f512c4d6d40cb0bfbd7273a8ad045d GIT binary patch literal 4367 zcmV+q5%BIGiwFQo2$g68|Lt5`kLt(~e$THsd5W|Wl5JoUj+6)PV`gj?FuuLv3-}HP zn~Sad_*A#uzJOui%p}>(&Pua81MPBkb=6m2mCL&~e;mv(3cvrQ@lD$r@%L|k{i|jN zt9|_KUw`=q|CsS)rv*LQ=(E2{&Cphu&cPUq2lO}ViHW-CQLFKCV~jgTt>2@xxaS%E zC^V!Ir7pUO>~IIxKH0k839&$R0T%{vq0HKAkqTL#KB{9eWSy<8Hha{Yu(aR$lf-zL zc#1^XQ(v{T7v6)AhVeY|N zXx&QJ&0UoK-uU*nUw)Yn2i?9f9Q^Xj-%1M_W}t5kJJ{S?d5Em--cDALy$$ZH)3-rj z>hCX#%phubX0VB@2Np*0&b+hC+J^Ubc?ivYW3>T6cNU!+VYIRX&kV!HYOh<5E)C;A ze{ik0x@+R{(rx(Q!re-DVQY;e9Qu8um#^BJ(LQeI+rz^q*~3!XH=kSmu2#MYUS)j$ z_SH_WTO2cNDbBJr9gHl_*_=6__ou?J z$q)1sFX%0WVNa?*Y^MSx-d3D0!#O=~DzGhlx270r(kM3JW)cn163pBB7$ZTfN~1($ z+B?nHGX;1{8=u!JX=Y=Au*gxQb0|}7UK|>S1!%Bvy`JESKa^%S;YZukV(D>eA3=(DN0+^Au)c za|0#j#>+O?Gey<$h%#iZ*s{jZCv6PcfgP}23Y!6P@}L#H#-}{$q~>8dwDE#HBmAHZ zb3wnd4%hdC1+uRx_i)u8d@L`a1 zNZqF9Fd+LRN^`Q)U*)Kz*;wxGF zt|($Kj}2*w@cyVV=L8T!1EnZa?s+MUz=aU?fPS3_-AVIDQ3}6=`iX1M+Ep=qoO?RM z$154MmVgDqwR3_Ri(bk(O?kcrsMw?zwLnX{^92cCknpF2guNN=wn1nX1tb<0MtkJZ zmq-DDbJ}iLLT>s%%Vho}CrWE7EKkz`*H)yF103h92rO{Ixx)!cqA{>ecjmFB7Q; zlcSER&2gIshL>sL9ZDkCfN_)}dAP*AT!+|?A@LZJh{&sDv}KMuN8l|d_(8O7uS#uz z!|g^3#8B&}8Usac$xRHEfO?}45vWY$)b1S&AaI%TIT?fB1N~(d@e3;E1}yysbRB-0 z#0ao?DkH=j2_JEHQ#tW2`mqsFM0Wfc){h8(Ch~&!zh>e$1p|az_@bplB3eo9pt^>q zHHo(Lz=d@^EfXovQZ3huZ0So_T$0u*8$!n4;?TQkuQtJCp_wqh|BqTCh|R+jvWh&Dv~_>e$v4wjs7NiwYSM@7Fk5gbU7j3o0=?} z*_eX=_KeKgPKrE1<%9>?=EYDZ5lwR(<4HK7tja~+?`*;X(#%)bI=)aojO^? zk3n0AvX?n>cXB){&}PMm?aSz>I1YT5`>!whs^5t-14IBHI|IlEx1LPCJ@W+jh~+h~&j5maUKRUf6GjH1|0W#f6|^d}5x4UoX^ zj29PctVyD~lEnyJ0;f)Hogt8QL_179WkhEpjqyJ+H!j+K>?phr&Qi>mC|)Ue=Q#vd zS>2}G_`B-)RYa&q9k3Oje0J-KtJm8m{jS$32V+h=``5k6Ok{cty$<@XoS$UpBs1qD zkl$EiCO8zhe(Aer*SfiubVT+u{OB87m7BDfP!6yT(Rc_ZT)^Q+4o)+Nr zNAdCLwHo6W%lK*vt6(T^!AW~+RphAx8zozj=0J6M%n2Mpq&(-KR0e^3&}o1cd2X$w zGl`x{7cR$kei_nqFl=+1iwuAquQ|80Oa+2OnI^o`eJwObHWcIQM2n0uw~X-+N7#C{ zyQ&iUqsF|ee6OQy{esQk_!`r50nzii1%p*o|HZ=G1NFP{Rn+LGgud=;VZRE(HSqNz zDO&1Q$&IpZjp+{UA*@}x@}fp~)N8-f*A0E8TP9+z(V_nOi(;m3$#!Q3E4#7XZ&m>* zq3*2P?YBCRWY}B758qkn!13H$LI`%<5=4=8D6Wu2ciTOf!PJ3aNAdT1`fditZQMBQ z;5u{V!|MIf*3AZ{{qI!78hvgTSJbaJP zjGmE?mfx41%4*m~&4SF|G6yrjz6?d-zkKivsOP?ao4$TVy)!$jV;1-%s&^*9)L4cW zMwaT&5`N4!6`|+-x9XkQ`K#Xf&(%Ac%m<}YN5|o1Rc?!O;3x;~c`9reZr~=WuQ24) zN&bezzdH3SMWG&7N}Xs=Z}OJPEGzUM>}VM;Vv-dwN0P?mUI@w3yP)_`MYJhIOUoO( zqFfiZCdsyxvoQxMQ>gS3OY_@DvkoQckO$n8r@fn2)sz^w2@=1kBnW}zLVS*|W zqU9$QCJ2!-k)KtVls5iZg$b%ih?bvJn4pS;*z%JKlWKADDTT=eNB@k%1XUyii9TMK z0DdknVjQ5iT}cWuAUnkP4nHj{oNAM{MEEVrj^zZxb#FOQZNA~}e1CcCN}-pcQSo%z zsMX?DQ9=wr(wXpoS(JQL9bZ+)SJm-E)uGBNK*J1ha{{EWchmyl=Xn`0dsTsgDFS5C zIA?Ko!Bmz91(LRsvBQEsc)Bclnh!aj0TM2OXyEsN_T@|MX%J?#_o~=|qGs+Z$%f>5 zV7L}9h?WYhSMe1}^|?0Bh!!_c?Wr(qO$Ua{C%nr!p3C7Uv!y~OD0YUP=n|$(6d!eP zR^$Yo<_F6*pUfyOK9$)p$`EUyYLL5-C=NYQwjmSj;qN2lC^u(!#kdL32dI01?D%OF za<8DtFc;khJ*cLYW+~+%bl;`43K*LrTkETCR!Kx%0Yg;C$@kSG@`!V~F%&STX`ea) zJFZOJq~lRNOZPz>TvrxPv_yBwpRD)bG}RqwnaIt;^oT5$c5+2$Qy#k`62!Ko>T~3(^{j9ABhJxg~aXDjGHzd;Us7=`PnMxo)0MEJ)19Y?|nZ zxo0su;1#uz2X=uf7*khFUUEK)w9R?wt~_*<@~Z%8sKWJ7{xV*!B_1}QJs>%>wwQFt zFe^~3t;TpxI*gEV%(_9>@~jxq@_|`}%P|EIjqDuZHb78_&SN~?B(RM;BNCI2oK9X{ zGQShnyxCjp{ahrsXyK3bK!ivcMBc-{XMv9tg?I4pS>Pkba|i#P1wI17cku67 z;3Hbz!M|sLFVB#-@$XsSBQbsl|DFXtGBj`p|DFZD{PEIl{Cg1i^7=FHEWpVw+Lk&7 zc<9#SVBO`F>-s4Wk`u4PLo`=MVwv#{;G30WZ-Y5JykP?1}f;HYaRaJTq%@%q*TtvvXydIzS*;^f&>e)D*#s&X*S%kJv8T zybGUueSe-u&v=NM5tTa@1Uv74;tjv~;H!Q*si?{$8gA#PVh4hhwoy=$MB#8@sdWm_ z0KV8yiFK+4`zxBTR8!PX$_WMak<)~-Y;%d{<`B$#kW*qUMs&a#32b1s$~(v*INJ#FNj&(|h9fys%IsYTxf)^0{Sv)DkiI$r z(cx(?a|`DpMm!r5!bx!rfEOt~Zd=iBrDhsz)>Y*Vocs{650WTUiO%}!6UP`AsKO82 z6EBT&H~~tKD9Ie$pF0DfxxAxa!AV}0rl^8gMcO1I(@tp#ss>_GLG*#KUkxk?7~RYq zDE4}`eI*4riJod5Uh~QadWixxEN6%8wl{0^Jn3Ga+|46f+@`U7!8U5p`%KU~fQ_w;@#KdYOz35Q^2dUW*6#&2*dMQUy-Uv-ajesjq9su= zJXIFl%^wUlrif?qaO_opM7As1|=DBTtpb@uhxdu4OTJ zcJ&a3sToTy8j)xkt$skLXnE!nX4#v~Zvwc#vy6b~!4Iyzv%s9CZ#7@EObDuD)cKs2 zNj@bGHoDa^DZR4)NG>Nx^nsYZ3}zCLABgG8U@qRO`RSO(ucwy!$a#tX^X=rf_uf}| zzMXv8N6w${{d7AS2%>7i_Y~d&zn9e5|Eh z?j;vV$XH%%AsMKD*(ZVeCZd2^zqr8e*^S4 JxW`RP006VymE!;a literal 0 HcmV?d00001 diff --git a/libs/partners/openai/tests/integration_tests/chat_models/test_responses_api.py b/libs/partners/openai/tests/integration_tests/chat_models/test_responses_api.py index 0e23d0e3f06..4d051c5601e 100644 --- a/libs/partners/openai/tests/integration_tests/chat_models/test_responses_api.py +++ b/libs/partners/openai/tests/integration_tests/chat_models/test_responses_api.py @@ -17,7 +17,7 @@ from langchain_core.messages import ( from pydantic import BaseModel from typing_extensions import TypedDict -from langchain_openai import ChatOpenAI +from langchain_openai import ChatOpenAI, custom_tool MODEL_NAME = "gpt-4o-mini" @@ -672,3 +672,32 @@ def test_image_generation_multi_turn() -> None: _check_response(ai_message2) tool_output2 = ai_message2.additional_kwargs["tool_outputs"][0] assert set(tool_output2.keys()).issubset(expected_keys) + + +@pytest.mark.vcr() +def test_custom_tool() -> None: + @custom_tool + def execute_code(code: str) -> str: + """Execute python code.""" + return "27" + + llm = ChatOpenAI(model="gpt-5", output_version="responses/v1").bind_tools( + [execute_code] + ) + + input_message = {"role": "user", "content": "Use the tool to evaluate 3^3."} + tool_call_message = llm.invoke([input_message]) + assert isinstance(tool_call_message, AIMessage) + assert len(tool_call_message.tool_calls) == 1 + tool_call = tool_call_message.tool_calls[0] + tool_message = execute_code.invoke(tool_call) + response = llm.invoke([input_message, tool_call_message, tool_message]) + assert isinstance(response, AIMessage) + + # Test streaming + full: Optional[BaseMessageChunk] = None + for chunk in llm.stream([input_message]): + assert isinstance(chunk, AIMessageChunk) + full = chunk if full is None else full + chunk + assert isinstance(full, AIMessageChunk) + assert len(full.tool_calls) == 1 diff --git a/libs/partners/openai/tests/unit_tests/test_imports.py b/libs/partners/openai/tests/unit_tests/test_imports.py index 324e71bda9b..144a394c2ff 100644 --- a/libs/partners/openai/tests/unit_tests/test_imports.py +++ b/libs/partners/openai/tests/unit_tests/test_imports.py @@ -7,6 +7,7 @@ EXPECTED_ALL = [ "AzureOpenAI", "AzureChatOpenAI", "AzureOpenAIEmbeddings", + "custom_tool", ] diff --git a/libs/partners/openai/tests/unit_tests/test_tools.py b/libs/partners/openai/tests/unit_tests/test_tools.py new file mode 100644 index 00000000000..106aa0aa080 --- /dev/null +++ b/libs/partners/openai/tests/unit_tests/test_tools.py @@ -0,0 +1,120 @@ +from langchain_core.messages import AIMessage, HumanMessage, ToolMessage +from langchain_core.tools import Tool + +from langchain_openai import ChatOpenAI, custom_tool + + +def test_custom_tool() -> None: + @custom_tool + def my_tool(x: str) -> str: + """Do thing.""" + return "a" + x + + # Test decorator + assert isinstance(my_tool, Tool) + assert my_tool.metadata == {"type": "custom_tool"} + assert my_tool.description == "Do thing." + + result = my_tool.invoke( + { + "type": "tool_call", + "name": "my_tool", + "args": {"whatever": "b"}, + "id": "abc", + "extras": {"type": "custom_tool_call"}, + } + ) + assert result == ToolMessage( + [{"type": "custom_tool_call_output", "output": "ab"}], + name="my_tool", + tool_call_id="abc", + ) + + # Test tool schema + ## Test with format + @custom_tool(format={"type": "grammar", "syntax": "lark", "definition": "..."}) + def another_tool(x: str) -> None: + """Do thing.""" + pass + + llm = ChatOpenAI(model="gpt-4.1", use_responses_api=True).bind_tools([another_tool]) + assert llm.kwargs == { # type: ignore[attr-defined] + "tools": [ + { + "type": "custom", + "name": "another_tool", + "description": "Do thing.", + "format": {"type": "grammar", "syntax": "lark", "definition": "..."}, + } + ] + } + + llm = ChatOpenAI(model="gpt-4.1", use_responses_api=True).bind_tools([my_tool]) + assert llm.kwargs == { # type: ignore[attr-defined] + "tools": [{"type": "custom", "name": "my_tool", "description": "Do thing."}] + } + + # Test passing messages back + message_history = [ + HumanMessage("Use the tool"), + AIMessage( + [ + { + "type": "custom_tool_call", + "id": "ctc_abc123", + "call_id": "abc", + "name": "my_tool", + "input": "a", + } + ], + tool_calls=[ + { + "type": "tool_call", + "name": "my_tool", + "args": {"__arg1": "a"}, + "id": "abc", + } + ], + ), + result, + ] + payload = llm._get_request_payload(message_history) # type: ignore[attr-defined] + expected_input = [ + {"content": "Use the tool", "role": "user"}, + { + "type": "custom_tool_call", + "id": "ctc_abc123", + "call_id": "abc", + "name": "my_tool", + "input": "a", + }, + {"type": "custom_tool_call_output", "call_id": "abc", "output": "ab"}, + ] + assert payload["input"] == expected_input + + +async def test_async_custom_tool() -> None: + @custom_tool + async def my_async_tool(x: str) -> str: + """Do async thing.""" + return "a" + x + + # Test decorator + assert isinstance(my_async_tool, Tool) + assert my_async_tool.metadata == {"type": "custom_tool"} + assert my_async_tool.description == "Do async thing." + + result = await my_async_tool.ainvoke( + { + "type": "tool_call", + "name": "my_async_tool", + "args": {"whatever": "b"}, + "id": "abc", + "extras": {"type": "custom_tool_call"}, + } + ) + assert result == ToolMessage( + [{"type": "custom_tool_call_output", "output": "ab"}], + name="my_async_tool", + tool_call_id="abc", + ) From 5036bd7adb6b169169f928c5c199559e13f7f6e6 Mon Sep 17 00:00:00 2001 From: Michael Matloka Date: Thu, 7 Aug 2025 22:33:19 +0200 Subject: [PATCH 2/5] fix(openai): don't crash get_num_tokens_from_messages on gpt-5 (#32451) --- .../openai/langchain_openai/chat_models/base.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/libs/partners/openai/langchain_openai/chat_models/base.py b/libs/partners/openai/langchain_openai/chat_models/base.py index 5d0b480ce18..eb1fd4506e3 100644 --- a/libs/partners/openai/langchain_openai/chat_models/base.py +++ b/libs/partners/openai/langchain_openai/chat_models/base.py @@ -1447,8 +1447,10 @@ class BaseChatOpenAI(BaseChatModel): encoding = tiktoken.encoding_for_model(model) except KeyError: encoder = "cl100k_base" - if self.model_name.startswith("gpt-4o") or self.model_name.startswith( - "gpt-4.1" + if ( + self.model_name.startswith("gpt-4o") + or self.model_name.startswith("gpt-4.1") + or self.model_name.startswith("gpt-5") ): encoder = "o200k_base" encoding = tiktoken.get_encoding(encoder) @@ -1499,7 +1501,11 @@ class BaseChatOpenAI(BaseChatModel): tokens_per_message = 4 # if there's a name, the role is omitted tokens_per_name = -1 - elif model.startswith("gpt-3.5-turbo") or model.startswith("gpt-4"): + elif ( + model.startswith("gpt-3.5-turbo") + or model.startswith("gpt-4") + or model.startswith("gpt-5") + ): tokens_per_message = 3 tokens_per_name = 1 else: From 6727d6e8c8a7e9c11258987716dc86f8cd2c0c0a Mon Sep 17 00:00:00 2001 From: ccurme Date: Thu, 7 Aug 2025 17:39:01 -0300 Subject: [PATCH 3/5] release(core): 0.3.74 (#32454) --- libs/core/langchain_core/version.py | 2 +- libs/core/pyproject.toml | 2 +- libs/core/uv.lock | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/libs/core/langchain_core/version.py b/libs/core/langchain_core/version.py index 2111f6c6fec..f65e4f6a645 100644 --- a/libs/core/langchain_core/version.py +++ b/libs/core/langchain_core/version.py @@ -1,3 +1,3 @@ """langchain-core version information and utilities.""" -VERSION = "0.3.73" +VERSION = "0.3.74" diff --git a/libs/core/pyproject.toml b/libs/core/pyproject.toml index c08199b6f0a..e329e0d3c5c 100644 --- a/libs/core/pyproject.toml +++ b/libs/core/pyproject.toml @@ -16,7 +16,7 @@ dependencies = [ "pydantic>=2.7.4", ] name = "langchain-core" -version = "0.3.73" +version = "0.3.74" description = "Building applications with LLMs through composability" readme = "README.md" diff --git a/libs/core/uv.lock b/libs/core/uv.lock index 9e200b80789..7ae9f48264b 100644 --- a/libs/core/uv.lock +++ b/libs/core/uv.lock @@ -987,7 +987,7 @@ wheels = [ [[package]] name = "langchain-core" -version = "0.3.73" +version = "0.3.74" source = { editable = "." } dependencies = [ { name = "jsonpatch" }, From 00244122bd12cca1bf05584c2abfcf8b9c481fe4 Mon Sep 17 00:00:00 2001 From: Mason Daugherty Date: Thu, 7 Aug 2025 22:24:21 -0400 Subject: [PATCH 4/5] feat(openai): `minimal` and `verbosity` (#32455) --- .../tests/unit_tests/chat_models/test_base.py | 53 +++++++++ .../chat_models/test_chat_models.py | 53 +++++++++ .../langchain_openai/chat_models/base.py | 49 +++++++- .../chat_models/test_base.py | 40 +++---- .../chat_models/test_responses_api.py | 109 +++++++++++++++--- .../tests/unit_tests/chat_models/test_base.py | 88 +++++++++++++- .../chat_models/test_prompt_cache_key.py | 5 +- .../openai/tests/unit_tests/test_tools.py | 8 +- libs/partners/openai/uv.lock | 4 +- 9 files changed, 354 insertions(+), 55 deletions(-) diff --git a/libs/langchain/tests/unit_tests/chat_models/test_base.py b/libs/langchain/tests/unit_tests/chat_models/test_base.py index 8cd5e0631b8..65be8a429f2 100644 --- a/libs/langchain/tests/unit_tests/chat_models/test_base.py +++ b/libs/langchain/tests/unit_tests/chat_models/test_base.py @@ -68,6 +68,32 @@ def test_init_unknown_provider() -> None: clear=True, ) def test_configurable() -> None: + """Test configurable chat model behavior without default parameters. + + Verifies that a configurable chat model initialized without default parameters: + - Has access to all standard runnable methods (``invoke``, ``stream``, etc.) + - Blocks access to non-configurable methods until configuration is provided + - Supports declarative operations (``bind_tools``) without mutating original model + - Can chain declarative operations and configuration to access full functionality + - Properly resolves to the configured model type when parameters are provided + + Example: + + .. python:: + + # This creates a configurable model without specifying which model + model = init_chat_model() + + # This will FAIL - no model specified yet + model.get_num_tokens("hello") # AttributeError! + + # This works - provides model at runtime + response = model.invoke( + "Hello", + config={"configurable": {"model": "gpt-4o"}} + ) + + """ model = init_chat_model() for method in ( @@ -125,6 +151,7 @@ def test_configurable() -> None: "presence_penalty": None, "reasoning": None, "reasoning_effort": None, + "verbosity": None, "frequency_penalty": None, "include": None, "seed": None, @@ -170,6 +197,32 @@ def test_configurable() -> None: clear=True, ) def test_configurable_with_default() -> None: + """Test configurable chat model behavior with default parameters. + + Verifies that a configurable chat model initialized with default parameters: + - Has access to all standard runnable methods (``invoke``, ``stream``, etc.) + - Provides immediate access to non-configurable methods (e.g. ``get_num_tokens``) + - Supports model switching through runtime configuration using ``config_prefix`` + - Maintains proper model identity and attributes when reconfigured + - Can be used in chains with different model providers via configuration + + Example: + + .. python:: + + # This creates a configurable model with default parameters (model) + model = init_chat_model("gpt-4o", configurable_fields="any", config_prefix="bar") + + # This works immediately - uses default gpt-4o + tokens = model.get_num_tokens("hello") + + # This also works - switches to Claude at runtime + response = model.invoke( + "Hello", + config={"configurable": {"my_model_model": "claude-3-sonnet-20240229"}} + ) + + """ # noqa: E501 model = init_chat_model("gpt-4o", configurable_fields="any", config_prefix="bar") for method in ( "invoke", diff --git a/libs/langchain_v1/tests/unit_tests/chat_models/test_chat_models.py b/libs/langchain_v1/tests/unit_tests/chat_models/test_chat_models.py index 147d7813f89..0f991195f7d 100644 --- a/libs/langchain_v1/tests/unit_tests/chat_models/test_chat_models.py +++ b/libs/langchain_v1/tests/unit_tests/chat_models/test_chat_models.py @@ -68,6 +68,32 @@ def test_init_unknown_provider() -> None: clear=True, ) def test_configurable() -> None: + """Test configurable chat model behavior without default parameters. + + Verifies that a configurable chat model initialized without default parameters: + - Has access to all standard runnable methods (``invoke``, ``stream``, etc.) + - Blocks access to non-configurable methods until configuration is provided + - Supports declarative operations (``bind_tools``) without mutating original model + - Can chain declarative operations and configuration to access full functionality + - Properly resolves to the configured model type when parameters are provided + + Example: + + .. python:: + + # This creates a configurable model without specifying which model + model = init_chat_model() + + # This will FAIL - no model specified yet + model.get_num_tokens("hello") # AttributeError! + + # This works - provides model at runtime + response = model.invoke( + "Hello", + config={"configurable": {"model": "gpt-4o"}} + ) + + """ model = init_chat_model() for method in ( @@ -125,6 +151,7 @@ def test_configurable() -> None: "presence_penalty": None, "reasoning": None, "reasoning_effort": None, + "verbosity": None, "frequency_penalty": None, "include": None, "seed": None, @@ -170,6 +197,32 @@ def test_configurable() -> None: clear=True, ) def test_configurable_with_default() -> None: + """Test configurable chat model behavior with default parameters. + + Verifies that a configurable chat model initialized with default parameters: + - Has access to all standard runnable methods (``invoke``, ``stream``, etc.) + - Provides immediate access to non-configurable methods (e.g. ``get_num_tokens``) + - Supports model switching through runtime configuration using ``config_prefix`` + - Maintains proper model identity and attributes when reconfigured + - Can be used in chains with different model providers via configuration + + Example: + + .. python:: + + # This creates a configurable model with default parameters (model) + model = init_chat_model("gpt-4o", configurable_fields="any", config_prefix="bar") + + # This works immediately - uses default gpt-4o + tokens = model.get_num_tokens("hello") + + # This also works - switches to Claude at runtime + response = model.invoke( + "Hello", + config={"configurable": {"my_model_model": "claude-3-sonnet-20240229"}} + ) + + """ # noqa: E501 model = init_chat_model("gpt-4o", configurable_fields="any", config_prefix="bar") for method in ( "invoke", diff --git a/libs/partners/openai/langchain_openai/chat_models/base.py b/libs/partners/openai/langchain_openai/chat_models/base.py index eb1fd4506e3..a8702359b36 100644 --- a/libs/partners/openai/langchain_openai/chat_models/base.py +++ b/libs/partners/openai/langchain_openai/chat_models/base.py @@ -458,8 +458,7 @@ class BaseChatOpenAI(BaseChatModel): alias="api_key", default_factory=secret_from_env("OPENAI_API_KEY", default=None) ) openai_api_base: Optional[str] = Field(default=None, alias="base_url") - """Base URL path for API requests, leave blank if not using a proxy or service - emulator.""" + """Base URL path for API requests, leave blank if not using a proxy or service emulator.""" # noqa: E501 openai_organization: Optional[str] = Field(default=None, alias="organization") """Automatically inferred from env var ``OPENAI_ORG_ID`` if not provided.""" # to support explicit proxy for OpenAI @@ -507,8 +506,9 @@ class BaseChatOpenAI(BaseChatModel): Reasoning models only, like OpenAI o1, o3, and o4-mini. - Currently supported values are low, medium, and high. Reducing reasoning effort - can result in faster responses and fewer tokens used on reasoning in a response. + Currently supported values are ``'minimal'``, ``'low'``, ``'medium'``, and + ``'high'``. Reducing reasoning effort can result in faster responses and fewer + tokens used on reasoning in a response. .. versionadded:: 0.2.14 """ @@ -527,6 +527,17 @@ class BaseChatOpenAI(BaseChatModel): .. versionadded:: 0.3.24 + """ + verbosity: Optional[str] = None + """Controls the verbosity level of responses for reasoning models. For use with the + Responses API. + + Currently supported values are ``'low'``, ``'medium'``, and ``'high'``. + + Controls how detailed the model's responses are. + + .. versionadded:: 0.3.28 + """ tiktoken_model_name: Optional[str] = None """The model name to pass to tiktoken when using this class. @@ -654,6 +665,7 @@ class BaseChatOpenAI(BaseChatModel): llm = ChatOpenAI( model="o4-mini", use_responses_api=True, + output_version="responses/v1", ) llm.invoke([HumanMessage("How are you?")], previous_response_id="resp_123") @@ -701,10 +713,24 @@ class BaseChatOpenAI(BaseChatModel): @model_validator(mode="before") @classmethod def validate_temperature(cls, values: dict[str, Any]) -> Any: - """Currently o1 models only allow temperature=1.""" + """Validate temperature parameter for different models. + + - o1 models only allow temperature=1 + - gpt-5 models only allow temperature=1 or unset (defaults to 1) + """ model = values.get("model_name") or values.get("model") or "" + + # For o1 models, set temperature=1 if not provided if model.startswith("o1") and "temperature" not in values: values["temperature"] = 1 + + # For gpt-5 models, handle temperature restrictions + if model.startswith("gpt-5"): + temperature = values.get("temperature") + if temperature is not None and temperature != 1: + # For gpt-5, only temperature=1 is supported, so remove non-defaults + values.pop("temperature", None) + return values @model_validator(mode="after") @@ -805,6 +831,7 @@ class BaseChatOpenAI(BaseChatModel): "temperature": self.temperature, "reasoning_effort": self.reasoning_effort, "reasoning": self.reasoning, + "verbosity": self.verbosity, "include": self.include, "service_tier": self.service_tier, "truncation": self.truncation, @@ -1178,6 +1205,7 @@ class BaseChatOpenAI(BaseChatModel): kwargs["stop"] = stop payload = {**self._default_params, **kwargs} + if self._use_responses_api(payload): if self.use_previous_response_id: last_messages, previous_response_id = _get_last_messages(messages) @@ -2366,7 +2394,11 @@ class ChatOpenAI(BaseChatOpenAI): # type: ignore[override] from langchain_openai import ChatOpenAI - llm = ChatOpenAI(model="gpt-4.1-mini", use_responses_api=True) + llm = ChatOpenAI( + model="gpt-4.1-mini", + use_responses_api=True, + output_version="responses/v1", + ) response = llm.invoke("Hi, I'm Bob.") response.text() @@ -3486,6 +3518,11 @@ def _construct_responses_api_payload( if "reasoning_effort" in payload and "reasoning" not in payload: payload["reasoning"] = {"effort": payload.pop("reasoning_effort")} + # Remove temperature parameter for models that don't support it in responses API + model = payload.get("model", "") + if model.startswith("gpt-5"): + payload.pop("temperature", None) + payload["input"] = _construct_responses_api_input(messages) if tools := payload.pop("tools", None): new_tools: list = [] diff --git a/libs/partners/openai/tests/integration_tests/chat_models/test_base.py b/libs/partners/openai/tests/integration_tests/chat_models/test_base.py index 1bc191d418f..b18bd5f97e9 100644 --- a/libs/partners/openai/tests/integration_tests/chat_models/test_base.py +++ b/libs/partners/openai/tests/integration_tests/chat_models/test_base.py @@ -32,7 +32,7 @@ from pydantic import BaseModel, Field from langchain_openai import ChatOpenAI from tests.unit_tests.fake.callbacks import FakeCallbackHandler -MAX_TOKEN_COUNT = 16 +MAX_TOKEN_COUNT = 100 @pytest.mark.scheduled @@ -219,7 +219,7 @@ async def test_openai_abatch_tags(use_responses_api: bool) -> None: def test_openai_invoke() -> None: """Test invoke tokens from ChatOpenAI.""" llm = ChatOpenAI( - model="o4-mini", + model="gpt-5-nano", service_tier="flex", # Also test service_tier max_retries=3, # Add retries for 503 capacity errors ) @@ -418,7 +418,7 @@ class MakeASandwich(BaseModel): def test_tool_use() -> None: - llm = ChatOpenAI(model="gpt-4-turbo", temperature=0) + llm = ChatOpenAI(model="gpt-5-nano", temperature=0) llm_with_tool = llm.bind_tools(tools=[GenerateUsername], tool_choice=True) msgs: list = [HumanMessage("Sally has green hair, what would her username be?")] ai_msg = llm_with_tool.invoke(msgs) @@ -462,7 +462,7 @@ def test_tool_use() -> None: def test_manual_tool_call_msg(use_responses_api: bool) -> None: """Test passing in manually construct tool call message.""" llm = ChatOpenAI( - model="gpt-3.5-turbo-0125", temperature=0, use_responses_api=use_responses_api + model="gpt-5-nano", temperature=0, use_responses_api=use_responses_api ) llm_with_tool = llm.bind_tools(tools=[GenerateUsername]) msgs: list = [ @@ -508,7 +508,7 @@ def test_manual_tool_call_msg(use_responses_api: bool) -> None: def test_bind_tools_tool_choice(use_responses_api: bool) -> None: """Test passing in manually construct tool call message.""" llm = ChatOpenAI( - model="gpt-3.5-turbo-0125", temperature=0, use_responses_api=use_responses_api + model="gpt-5-nano", temperature=0, use_responses_api=use_responses_api ) for tool_choice in ("any", "required"): llm_with_tools = llm.bind_tools( @@ -523,7 +523,7 @@ def test_bind_tools_tool_choice(use_responses_api: bool) -> None: def test_disable_parallel_tool_calling() -> None: - llm = ChatOpenAI(model="gpt-4o-mini") + llm = ChatOpenAI(model="gpt-5-nano") llm_with_tools = llm.bind_tools([GenerateUsername], parallel_tool_calls=False) result = llm_with_tools.invoke( "Use the GenerateUsername tool to generate user names for:\n\n" @@ -534,7 +534,7 @@ def test_disable_parallel_tool_calling() -> None: assert len(result.tool_calls) == 1 -@pytest.mark.parametrize("model", ["gpt-4o-mini", "o1", "gpt-4"]) +@pytest.mark.parametrize("model", ["gpt-4o-mini", "o1", "gpt-4", "gpt-5-nano"]) def test_openai_structured_output(model: str) -> None: class MyModel(BaseModel): """A Person""" @@ -694,7 +694,7 @@ def test_tool_calling_strict(use_responses_api: bool) -> None: input: Optional[int] = Field(default=None) model = ChatOpenAI( - model="gpt-4.1", temperature=0, use_responses_api=use_responses_api + model="gpt-5-nano", temperature=0, use_responses_api=use_responses_api ) # N.B. magic_function adds metadata to schema (min/max for number fields) model_with_tools = model.bind_tools([magic_function], strict=True) @@ -818,7 +818,7 @@ def test_json_schema_openai_format( strict: bool, method: Literal["json_schema", "function_calling"] ) -> None: """Test we can pass in OpenAI schema format specifying strict.""" - llm = ChatOpenAI(model="gpt-4o-mini") + llm = ChatOpenAI(model="gpt-5-nano") schema = { "name": "get_weather", "description": "Fetches the weather in the given location", @@ -939,7 +939,7 @@ def test_prediction_tokens() -> None: """ ) - llm = ChatOpenAI(model="gpt-4o") + llm = ChatOpenAI(model="gpt-4.1-nano") query = ( "Replace the Username property with an Email property. " "Respond only with code, and with no markdown formatting." @@ -981,7 +981,7 @@ class Foo(BaseModel): def test_stream_response_format() -> None: full: Optional[BaseMessageChunk] = None chunks = [] - for chunk in ChatOpenAI(model="gpt-4o-mini").stream( + for chunk in ChatOpenAI(model="gpt-5-nano").stream( "how are ya", response_format=Foo ): chunks.append(chunk) @@ -998,7 +998,7 @@ def test_stream_response_format() -> None: async def test_astream_response_format() -> None: full: Optional[BaseMessageChunk] = None chunks = [] - async for chunk in ChatOpenAI(model="gpt-4o-mini").astream( + async for chunk in ChatOpenAI(model="gpt-5-nano").astream( "how are ya", response_format=Foo ): chunks.append(chunk) @@ -1042,7 +1042,7 @@ def test_o1_stream_default_works() -> None: def test_multi_party_conversation() -> None: - llm = ChatOpenAI(model="gpt-4o") + llm = ChatOpenAI(model="gpt-5-nano") messages = [ HumanMessage("Hi, I have black hair.", name="Alice"), HumanMessage("Hi, I have brown hair.", name="Bob"), @@ -1057,7 +1057,7 @@ def test_structured_output_and_tools() -> None: response: str explanation: str - llm = ChatOpenAI(model="gpt-4o-mini").bind_tools( + llm = ChatOpenAI(model="gpt-5-nano").bind_tools( [GenerateUsername], strict=True, response_format=ResponseFormat ) @@ -1082,7 +1082,7 @@ def test_tools_and_structured_output() -> None: response: str explanation: str - llm = ChatOpenAI(model="gpt-4o-mini").with_structured_output( + llm = ChatOpenAI(model="gpt-5-nano").with_structured_output( ResponseFormat, strict=True, include_raw=True, tools=[GenerateUsername] ) @@ -1114,8 +1114,8 @@ def test_tools_and_structured_output() -> None: @pytest.mark.scheduled def test_prompt_cache_key_invoke() -> None: - """Test that prompt_cache_key works with invoke calls.""" - chat = ChatOpenAI(model="gpt-4o-mini", max_completion_tokens=20) + """Test that `prompt_cache_key` works with invoke calls.""" + chat = ChatOpenAI(model="gpt-5-nano", max_completion_tokens=500) messages = [HumanMessage("Say hello")] # Test that invoke works with prompt_cache_key parameter @@ -1135,18 +1135,18 @@ def test_prompt_cache_key_invoke() -> None: @pytest.mark.scheduled def test_prompt_cache_key_usage_methods_integration() -> None: - """Integration test for prompt_cache_key usage methods.""" + """Integration test for `prompt_cache_key` usage methods.""" messages = [HumanMessage("Say hi")] # Test keyword argument method - chat = ChatOpenAI(model="gpt-4o-mini", max_completion_tokens=10) + chat = ChatOpenAI(model="gpt-5-nano", max_completion_tokens=10) response = chat.invoke(messages, prompt_cache_key="integration-test-v1") assert isinstance(response, AIMessage) assert isinstance(response.content, str) # Test model-level via model_kwargs chat_model_level = ChatOpenAI( - model="gpt-4o-mini", + model="gpt-5-nano", max_completion_tokens=10, model_kwargs={"prompt_cache_key": "integration-model-level-v1"}, ) diff --git a/libs/partners/openai/tests/integration_tests/chat_models/test_responses_api.py b/libs/partners/openai/tests/integration_tests/chat_models/test_responses_api.py index 4d051c5601e..3b1a3b41e5d 100644 --- a/libs/partners/openai/tests/integration_tests/chat_models/test_responses_api.py +++ b/libs/partners/openai/tests/integration_tests/chat_models/test_responses_api.py @@ -175,7 +175,9 @@ class FooDict(TypedDict): def test_parsed_pydantic_schema() -> None: - llm = ChatOpenAI(model=MODEL_NAME, use_responses_api=True) + llm = ChatOpenAI( + model=MODEL_NAME, use_responses_api=True, output_version="responses/v1" + ) response = llm.invoke("how are ya", response_format=Foo) parsed = Foo(**json.loads(response.text())) assert parsed == response.additional_kwargs["parsed"] @@ -193,7 +195,9 @@ def test_parsed_pydantic_schema() -> None: async def test_parsed_pydantic_schema_async() -> None: - llm = ChatOpenAI(model=MODEL_NAME, use_responses_api=True) + llm = ChatOpenAI( + model=MODEL_NAME, use_responses_api=True, output_version="responses/v1" + ) response = await llm.ainvoke("how are ya", response_format=Foo) parsed = Foo(**json.loads(response.text())) assert parsed == response.additional_kwargs["parsed"] @@ -213,7 +217,9 @@ async def test_parsed_pydantic_schema_async() -> None: @pytest.mark.flaky(retries=3, delay=1) @pytest.mark.parametrize("schema", [Foo.model_json_schema(), FooDict]) def test_parsed_dict_schema(schema: Any) -> None: - llm = ChatOpenAI(model=MODEL_NAME, use_responses_api=True) + llm = ChatOpenAI( + model=MODEL_NAME, use_responses_api=True, output_version="responses/v1" + ) response = llm.invoke("how are ya", response_format=schema) parsed = json.loads(response.text()) assert parsed == response.additional_kwargs["parsed"] @@ -231,7 +237,9 @@ def test_parsed_dict_schema(schema: Any) -> None: def test_parsed_strict() -> None: - llm = ChatOpenAI(model=MODEL_NAME, use_responses_api=True) + llm = ChatOpenAI( + model=MODEL_NAME, use_responses_api=True, output_version="responses/v1" + ) class InvalidJoke(TypedDict): setup: Annotated[str, ..., "The setup of the joke"] @@ -258,7 +266,9 @@ def test_parsed_strict() -> None: @pytest.mark.flaky(retries=3, delay=1) @pytest.mark.parametrize("schema", [Foo.model_json_schema(), FooDict]) async def test_parsed_dict_schema_async(schema: Any) -> None: - llm = ChatOpenAI(model=MODEL_NAME, use_responses_api=True) + llm = ChatOpenAI( + model=MODEL_NAME, use_responses_api=True, output_version="responses/v1" + ) response = await llm.ainvoke("how are ya", response_format=schema) parsed = json.loads(response.text()) assert parsed == response.additional_kwargs["parsed"] @@ -280,7 +290,9 @@ def test_function_calling_and_structured_output() -> None: """return x * y""" return x * y - llm = ChatOpenAI(model=MODEL_NAME, use_responses_api=True) + llm = ChatOpenAI( + model=MODEL_NAME, use_responses_api=True, output_version="responses/v1" + ) bound_llm = llm.bind_tools([multiply], response_format=Foo, strict=True) # Test structured output response = llm.invoke("how are ya", response_format=Foo) @@ -324,7 +336,9 @@ def test_reasoning(output_version: Literal["v0", "responses/v1"]) -> None: def test_stateful_api() -> None: - llm = ChatOpenAI(model=MODEL_NAME, use_responses_api=True) + llm = ChatOpenAI( + model=MODEL_NAME, use_responses_api=True, output_version="responses/v1" + ) response = llm.invoke("how are you, my name is Bobo") assert "id" in response.response_metadata @@ -421,7 +435,9 @@ def test_stream_reasoning_summary( @pytest.mark.vcr def test_code_interpreter() -> None: - llm = ChatOpenAI(model="o4-mini", use_responses_api=True) + llm = ChatOpenAI( + model="o4-mini", use_responses_api=True, output_version="responses/v1" + ) llm_with_tools = llm.bind_tools( [{"type": "code_interpreter", "container": {"type": "auto"}}] ) @@ -431,13 +447,16 @@ def test_code_interpreter() -> None: } response = llm_with_tools.invoke([input_message]) _check_response(response) - tool_outputs = response.additional_kwargs["tool_outputs"] + tool_outputs = [ + block + for block in response.content + if isinstance(block, dict) and block.get("type") == "code_interpreter_call" + ] assert tool_outputs assert any(output["type"] == "code_interpreter_call" for output in tool_outputs) # Test streaming # Use same container - tool_outputs = response.additional_kwargs["tool_outputs"] assert len(tool_outputs) == 1 container_id = tool_outputs[0]["container_id"] llm_with_tools = llm.bind_tools( @@ -449,7 +468,11 @@ def test_code_interpreter() -> None: assert isinstance(chunk, AIMessageChunk) full = chunk if full is None else full + chunk assert isinstance(full, AIMessageChunk) - tool_outputs = full.additional_kwargs["tool_outputs"] + tool_outputs = [ + block + for block in full.content + if isinstance(block, dict) and block.get("type") == "code_interpreter_call" + ] assert tool_outputs assert any(output["type"] == "code_interpreter_call" for output in tool_outputs) @@ -460,7 +483,9 @@ def test_code_interpreter() -> None: @pytest.mark.vcr def test_mcp_builtin() -> None: - llm = ChatOpenAI(model="o4-mini", use_responses_api=True) + llm = ChatOpenAI( + model="o4-mini", use_responses_api=True, output_version="responses/v1" + ) llm_with_tools = llm.bind_tools( [ @@ -489,8 +514,8 @@ def test_mcp_builtin() -> None: "approve": True, "approval_request_id": output["id"], } - for output in response.additional_kwargs["tool_outputs"] - if output["type"] == "mcp_approval_request" + for output in response.content + if isinstance(output, dict) and output.get("type") == "mcp_approval_request" ] ) _ = llm_with_tools.invoke( @@ -549,7 +574,9 @@ def test_mcp_builtin_zdr() -> None: @pytest.mark.vcr() def test_image_generation_streaming() -> None: """Test image generation streaming.""" - llm = ChatOpenAI(model="gpt-4.1", use_responses_api=True) + llm = ChatOpenAI( + model="gpt-4.1", use_responses_api=True, output_version="responses/v1" + ) tool = { "type": "image_generation", # For testing purposes let's keep the quality low, so the test runs faster. @@ -596,7 +623,13 @@ def test_image_generation_streaming() -> None: # At the moment, the streaming API does not pick up annotations fully. # So the following check is commented out. # _check_response(complete_ai_message) - tool_output = complete_ai_message.additional_kwargs["tool_outputs"][0] + tool_outputs = [ + block + for block in complete_ai_message.content + if isinstance(block, dict) and block.get("type") == "image_generation_call" + ] + assert len(tool_outputs) == 1 + tool_output = tool_outputs[0] assert set(tool_output.keys()).issubset(expected_keys) @@ -604,7 +637,9 @@ def test_image_generation_streaming() -> None: def test_image_generation_multi_turn() -> None: """Test multi-turn editing of image generation by passing in history.""" # Test multi-turn - llm = ChatOpenAI(model="gpt-4.1", use_responses_api=True) + llm = ChatOpenAI( + model="gpt-4.1", use_responses_api=True, output_version="responses/v1" + ) # Test invocation tool = { "type": "image_generation", @@ -621,7 +656,13 @@ def test_image_generation_multi_turn() -> None: ] ai_message = llm_with_tools.invoke(chat_history) _check_response(ai_message) - tool_output = ai_message.additional_kwargs["tool_outputs"][0] + tool_outputs = [ + block + for block in ai_message.content + if isinstance(block, dict) and block.get("type") == "image_generation_call" + ] + assert len(tool_outputs) == 1 + tool_output = tool_outputs[0] # Example tool output for an image # { @@ -670,10 +711,40 @@ def test_image_generation_multi_turn() -> None: ai_message2 = llm_with_tools.invoke(chat_history) _check_response(ai_message2) - tool_output2 = ai_message2.additional_kwargs["tool_outputs"][0] + tool_outputs2 = [ + block + for block in ai_message2.content + if isinstance(block, dict) and block.get("type") == "image_generation_call" + ] + assert len(tool_outputs2) == 1 + tool_output2 = tool_outputs2[0] assert set(tool_output2.keys()).issubset(expected_keys) +@pytest.mark.xfail( + reason="verbosity parameter not yet supported by OpenAI Responses API" +) +def test_verbosity_parameter() -> None: + """Test verbosity parameter with Responses API. + + TODO: This test is expected to fail until OpenAI enables verbosity support + in the Responses API for available models. The parameter is properly implemented + in the codebase but the API currently returns 'Unknown parameter: verbosity'. + Remove @pytest.mark.xfail when OpenAI adds support. + """ + llm = ChatOpenAI( + model=MODEL_NAME, + verbosity="medium", + use_responses_api=True, + output_version="responses/v1", + ) + response = llm.invoke([HumanMessage(content="Hello, explain quantum computing.")]) + + assert isinstance(response, AIMessage) + assert response.content + # When verbosity works, we expect the response to respect the verbosity level + + @pytest.mark.vcr() def test_custom_tool() -> None: @custom_tool diff --git a/libs/partners/openai/tests/unit_tests/chat_models/test_base.py b/libs/partners/openai/tests/unit_tests/chat_models/test_base.py index c4176711482..73185790602 100644 --- a/libs/partners/openai/tests/unit_tests/chat_models/test_base.py +++ b/libs/partners/openai/tests/unit_tests/chat_models/test_base.py @@ -874,8 +874,13 @@ def test_get_num_tokens_from_messages() -> None: ), ToolMessage("foobar", tool_call_id="foo"), ] - expected = 176 - actual = llm.get_num_tokens_from_messages(messages) + expected = 431 # Updated to match token count with mocked 100x100 image + + # Mock _url_to_size to avoid PIL dependency in unit tests + with patch("langchain_openai.chat_models.base._url_to_size") as mock_url_to_size: + mock_url_to_size.return_value = (100, 100) # 100x100 pixel image + actual = llm.get_num_tokens_from_messages(messages) + assert expected == actual # Test file inputs @@ -1131,6 +1136,73 @@ def test_init_o1() -> None: assert len(record) == 0 +def test_init_minimal_reasoning_effort() -> None: + with pytest.warns(None) as record: # type: ignore[call-overload] + ChatOpenAI(model="gpt-5", reasoning_effort="minimal") + assert len(record) == 0 + + +@pytest.mark.parametrize("use_responses_api", [False, True]) +@pytest.mark.parametrize("use_max_completion_tokens", [True, False]) +def test_minimal_reasoning_effort_payload( + use_max_completion_tokens: bool, use_responses_api: bool +) -> None: + """Test that minimal reasoning effort is included in request payload.""" + if use_max_completion_tokens: + kwargs = {"max_completion_tokens": 100} + else: + kwargs = {"max_tokens": 100} + + init_kwargs: dict[str, Any] = { + "model": "gpt-5", + "reasoning_effort": "minimal", + "use_responses_api": use_responses_api, + **kwargs, + } + + if use_responses_api: + init_kwargs["output_version"] = "responses/v1" + + llm = ChatOpenAI(**init_kwargs) + + messages = [ + {"role": "developer", "content": "respond with just 'test'"}, + {"role": "user", "content": "hello"}, + ] + + payload = llm._get_request_payload(messages, stop=None) + + # When using responses API, reasoning_effort becomes reasoning.effort + if use_responses_api: + assert "reasoning" in payload + assert payload["reasoning"]["effort"] == "minimal" + # For responses API, tokens param becomes max_output_tokens + assert payload["max_output_tokens"] == 100 + else: + # For non-responses API, reasoning_effort remains as is + assert payload["reasoning_effort"] == "minimal" + if use_max_completion_tokens: + assert payload["max_completion_tokens"] == 100 + else: + # max_tokens gets converted to max_completion_tokens in non-responses API + assert payload["max_completion_tokens"] == 100 + + +def test_verbosity_parameter_payload() -> None: + """Test verbosity parameter is included in request payload for Responses API.""" + llm = ChatOpenAI( + model="gpt-5", + verbosity="high", + use_responses_api=True, + output_version="responses/v1", + ) + + messages = [{"role": "user", "content": "hello"}] + payload = llm._get_request_payload(messages, stop=None) + + assert payload["verbosity"] == "high" + + def test_structured_output_old_model() -> None: class Output(TypedDict): """output.""" @@ -2198,7 +2270,9 @@ def test__construct_responses_api_input_multiple_message_types() -> None: assert messages_copy == messages # Test dict messages - llm = ChatOpenAI(model="o4-mini", use_responses_api=True) + llm = ChatOpenAI( + model="o4-mini", use_responses_api=True, output_version="responses/v1" + ) message_dicts: list = [ {"role": "developer", "content": "This is a developer message."}, { @@ -2239,7 +2313,9 @@ class FakeTracer(BaseTracer): def test_mcp_tracing() -> None: # Test we exclude sensitive information from traces - llm = ChatOpenAI(model="o4-mini", use_responses_api=True) + llm = ChatOpenAI( + model="o4-mini", use_responses_api=True, output_version="responses/v1" + ) tracer = FakeTracer() mock_client = MagicMock() @@ -2430,7 +2506,9 @@ def test_get_last_messages() -> None: def test_get_request_payload_use_previous_response_id() -> None: # Default - don't use previous_response ID - llm = ChatOpenAI(model="o4-mini", use_responses_api=True) + llm = ChatOpenAI( + model="o4-mini", use_responses_api=True, output_version="responses/v1" + ) messages = [ HumanMessage("Hello"), AIMessage("Hi there!", response_metadata={"id": "resp_123"}), diff --git a/libs/partners/openai/tests/unit_tests/chat_models/test_prompt_cache_key.py b/libs/partners/openai/tests/unit_tests/chat_models/test_prompt_cache_key.py index 1f6c8c5d583..1aad6baff79 100644 --- a/libs/partners/openai/tests/unit_tests/chat_models/test_prompt_cache_key.py +++ b/libs/partners/openai/tests/unit_tests/chat_models/test_prompt_cache_key.py @@ -71,7 +71,10 @@ def test_prompt_cache_key_model_kwargs() -> None: def test_prompt_cache_key_responses_api() -> None: """Test that prompt_cache_key works with Responses API.""" chat = ChatOpenAI( - model="gpt-4o-mini", use_responses_api=True, max_completion_tokens=10 + model="gpt-4o-mini", + use_responses_api=True, + output_version="responses/v1", + max_completion_tokens=10, ) messages = [HumanMessage("Hello")] diff --git a/libs/partners/openai/tests/unit_tests/test_tools.py b/libs/partners/openai/tests/unit_tests/test_tools.py index 106aa0aa080..63b097e6248 100644 --- a/libs/partners/openai/tests/unit_tests/test_tools.py +++ b/libs/partners/openai/tests/unit_tests/test_tools.py @@ -37,7 +37,9 @@ def test_custom_tool() -> None: """Do thing.""" pass - llm = ChatOpenAI(model="gpt-4.1", use_responses_api=True).bind_tools([another_tool]) + llm = ChatOpenAI( + model="gpt-4.1", use_responses_api=True, output_version="responses/v1" + ).bind_tools([another_tool]) assert llm.kwargs == { # type: ignore[attr-defined] "tools": [ { @@ -49,7 +51,9 @@ def test_custom_tool() -> None: ] } - llm = ChatOpenAI(model="gpt-4.1", use_responses_api=True).bind_tools([my_tool]) + llm = ChatOpenAI( + model="gpt-4.1", use_responses_api=True, output_version="responses/v1" + ).bind_tools([my_tool]) assert llm.kwargs == { # type: ignore[attr-defined] "tools": [{"type": "custom", "name": "my_tool", "description": "Do thing."}] } diff --git a/libs/partners/openai/uv.lock b/libs/partners/openai/uv.lock index 38b8da21426..58a1807abe3 100644 --- a/libs/partners/openai/uv.lock +++ b/libs/partners/openai/uv.lock @@ -1,5 +1,5 @@ version = 1 -revision = 2 +revision = 3 requires-python = ">=3.9" resolution-markers = [ "python_full_version >= '3.13' and platform_python_implementation == 'PyPy'", @@ -480,7 +480,7 @@ wheels = [ [[package]] name = "langchain-core" -version = "0.3.73" +version = "0.3.74" source = { editable = "../../core" } dependencies = [ { name = "jsonpatch" }, From 02001212b0a2b37d90451d8493089389ea220cab Mon Sep 17 00:00:00 2001 From: ccurme Date: Fri, 8 Aug 2025 09:51:18 -0300 Subject: [PATCH 5/5] fix(openai): revert some changes (#32462) Keep coverage on `output_version="v0"` (increasing coverage is being managed in v0.4 branch). --- .../chat_models/test_responses_api.py | 85 +++++-------------- 1 file changed, 19 insertions(+), 66 deletions(-) diff --git a/libs/partners/openai/tests/integration_tests/chat_models/test_responses_api.py b/libs/partners/openai/tests/integration_tests/chat_models/test_responses_api.py index 3b1a3b41e5d..32d3f199c85 100644 --- a/libs/partners/openai/tests/integration_tests/chat_models/test_responses_api.py +++ b/libs/partners/openai/tests/integration_tests/chat_models/test_responses_api.py @@ -175,9 +175,7 @@ class FooDict(TypedDict): def test_parsed_pydantic_schema() -> None: - llm = ChatOpenAI( - model=MODEL_NAME, use_responses_api=True, output_version="responses/v1" - ) + llm = ChatOpenAI(model=MODEL_NAME, use_responses_api=True) response = llm.invoke("how are ya", response_format=Foo) parsed = Foo(**json.loads(response.text())) assert parsed == response.additional_kwargs["parsed"] @@ -195,9 +193,7 @@ def test_parsed_pydantic_schema() -> None: async def test_parsed_pydantic_schema_async() -> None: - llm = ChatOpenAI( - model=MODEL_NAME, use_responses_api=True, output_version="responses/v1" - ) + llm = ChatOpenAI(model=MODEL_NAME, use_responses_api=True) response = await llm.ainvoke("how are ya", response_format=Foo) parsed = Foo(**json.loads(response.text())) assert parsed == response.additional_kwargs["parsed"] @@ -217,9 +213,7 @@ async def test_parsed_pydantic_schema_async() -> None: @pytest.mark.flaky(retries=3, delay=1) @pytest.mark.parametrize("schema", [Foo.model_json_schema(), FooDict]) def test_parsed_dict_schema(schema: Any) -> None: - llm = ChatOpenAI( - model=MODEL_NAME, use_responses_api=True, output_version="responses/v1" - ) + llm = ChatOpenAI(model=MODEL_NAME, use_responses_api=True) response = llm.invoke("how are ya", response_format=schema) parsed = json.loads(response.text()) assert parsed == response.additional_kwargs["parsed"] @@ -237,9 +231,7 @@ def test_parsed_dict_schema(schema: Any) -> None: def test_parsed_strict() -> None: - llm = ChatOpenAI( - model=MODEL_NAME, use_responses_api=True, output_version="responses/v1" - ) + llm = ChatOpenAI(model=MODEL_NAME, use_responses_api=True) class InvalidJoke(TypedDict): setup: Annotated[str, ..., "The setup of the joke"] @@ -266,9 +258,7 @@ def test_parsed_strict() -> None: @pytest.mark.flaky(retries=3, delay=1) @pytest.mark.parametrize("schema", [Foo.model_json_schema(), FooDict]) async def test_parsed_dict_schema_async(schema: Any) -> None: - llm = ChatOpenAI( - model=MODEL_NAME, use_responses_api=True, output_version="responses/v1" - ) + llm = ChatOpenAI(model=MODEL_NAME, use_responses_api=True) response = await llm.ainvoke("how are ya", response_format=schema) parsed = json.loads(response.text()) assert parsed == response.additional_kwargs["parsed"] @@ -290,9 +280,7 @@ def test_function_calling_and_structured_output() -> None: """return x * y""" return x * y - llm = ChatOpenAI( - model=MODEL_NAME, use_responses_api=True, output_version="responses/v1" - ) + llm = ChatOpenAI(model=MODEL_NAME, use_responses_api=True) bound_llm = llm.bind_tools([multiply], response_format=Foo, strict=True) # Test structured output response = llm.invoke("how are ya", response_format=Foo) @@ -336,9 +324,7 @@ def test_reasoning(output_version: Literal["v0", "responses/v1"]) -> None: def test_stateful_api() -> None: - llm = ChatOpenAI( - model=MODEL_NAME, use_responses_api=True, output_version="responses/v1" - ) + llm = ChatOpenAI(model=MODEL_NAME, use_responses_api=True) response = llm.invoke("how are you, my name is Bobo") assert "id" in response.response_metadata @@ -435,9 +421,7 @@ def test_stream_reasoning_summary( @pytest.mark.vcr def test_code_interpreter() -> None: - llm = ChatOpenAI( - model="o4-mini", use_responses_api=True, output_version="responses/v1" - ) + llm = ChatOpenAI(model="o4-mini", use_responses_api=True) llm_with_tools = llm.bind_tools( [{"type": "code_interpreter", "container": {"type": "auto"}}] ) @@ -447,16 +431,13 @@ def test_code_interpreter() -> None: } response = llm_with_tools.invoke([input_message]) _check_response(response) - tool_outputs = [ - block - for block in response.content - if isinstance(block, dict) and block.get("type") == "code_interpreter_call" - ] + tool_outputs = response.additional_kwargs["tool_outputs"] assert tool_outputs assert any(output["type"] == "code_interpreter_call" for output in tool_outputs) # Test streaming # Use same container + tool_outputs = response.additional_kwargs["tool_outputs"] assert len(tool_outputs) == 1 container_id = tool_outputs[0]["container_id"] llm_with_tools = llm.bind_tools( @@ -468,11 +449,7 @@ def test_code_interpreter() -> None: assert isinstance(chunk, AIMessageChunk) full = chunk if full is None else full + chunk assert isinstance(full, AIMessageChunk) - tool_outputs = [ - block - for block in full.content - if isinstance(block, dict) and block.get("type") == "code_interpreter_call" - ] + tool_outputs = full.additional_kwargs["tool_outputs"] assert tool_outputs assert any(output["type"] == "code_interpreter_call" for output in tool_outputs) @@ -483,9 +460,7 @@ def test_code_interpreter() -> None: @pytest.mark.vcr def test_mcp_builtin() -> None: - llm = ChatOpenAI( - model="o4-mini", use_responses_api=True, output_version="responses/v1" - ) + llm = ChatOpenAI(model="o4-mini", use_responses_api=True) llm_with_tools = llm.bind_tools( [ @@ -514,8 +489,8 @@ def test_mcp_builtin() -> None: "approve": True, "approval_request_id": output["id"], } - for output in response.content - if isinstance(output, dict) and output.get("type") == "mcp_approval_request" + for output in response.additional_kwargs["tool_outputs"] + if output["type"] == "mcp_approval_request" ] ) _ = llm_with_tools.invoke( @@ -574,9 +549,7 @@ def test_mcp_builtin_zdr() -> None: @pytest.mark.vcr() def test_image_generation_streaming() -> None: """Test image generation streaming.""" - llm = ChatOpenAI( - model="gpt-4.1", use_responses_api=True, output_version="responses/v1" - ) + llm = ChatOpenAI(model="gpt-4.1", use_responses_api=True) tool = { "type": "image_generation", # For testing purposes let's keep the quality low, so the test runs faster. @@ -623,13 +596,7 @@ def test_image_generation_streaming() -> None: # At the moment, the streaming API does not pick up annotations fully. # So the following check is commented out. # _check_response(complete_ai_message) - tool_outputs = [ - block - for block in complete_ai_message.content - if isinstance(block, dict) and block.get("type") == "image_generation_call" - ] - assert len(tool_outputs) == 1 - tool_output = tool_outputs[0] + tool_output = complete_ai_message.additional_kwargs["tool_outputs"][0] assert set(tool_output.keys()).issubset(expected_keys) @@ -637,9 +604,7 @@ def test_image_generation_streaming() -> None: def test_image_generation_multi_turn() -> None: """Test multi-turn editing of image generation by passing in history.""" # Test multi-turn - llm = ChatOpenAI( - model="gpt-4.1", use_responses_api=True, output_version="responses/v1" - ) + llm = ChatOpenAI(model="gpt-4.1", use_responses_api=True) # Test invocation tool = { "type": "image_generation", @@ -656,13 +621,7 @@ def test_image_generation_multi_turn() -> None: ] ai_message = llm_with_tools.invoke(chat_history) _check_response(ai_message) - tool_outputs = [ - block - for block in ai_message.content - if isinstance(block, dict) and block.get("type") == "image_generation_call" - ] - assert len(tool_outputs) == 1 - tool_output = tool_outputs[0] + tool_output = ai_message.additional_kwargs["tool_outputs"][0] # Example tool output for an image # { @@ -711,13 +670,7 @@ def test_image_generation_multi_turn() -> None: ai_message2 = llm_with_tools.invoke(chat_history) _check_response(ai_message2) - tool_outputs2 = [ - block - for block in ai_message2.content - if isinstance(block, dict) and block.get("type") == "image_generation_call" - ] - assert len(tool_outputs2) == 1 - tool_output2 = tool_outputs2[0] + tool_output2 = ai_message2.additional_kwargs["tool_outputs"][0] assert set(tool_output2.keys()).issubset(expected_keys)