fix(langchain, openai): fix create_agent / response_format for Responses API (#33939)

2026-06-09 10:17:00 +00:00 · 2025-11-13 10:18:15 -05:00
parent 2bfbc29ccc
commit 74385e0ebd
12 changed files with 234 additions and 99 deletions
--- a/libs/partners/openai/langchain_openai/chat_models/base.py
+++ b/libs/partners/openai/langchain_openai/chat_models/base.py
@@ -1771,6 +1771,7 @@ class BaseChatOpenAI(BaseChatModel):
        tool_choice: dict | str | bool | None = None,
        strict: bool | None = None,
        parallel_tool_calls: bool | None = None,
+        response_format: _DictOrPydanticClass | None = None,
        **kwargs: Any,
    ) -> Runnable[LanguageModelInput, AIMessage]:
        """Bind tool-like objects to this chat model.
@@ -1796,6 +1797,9 @@ class BaseChatOpenAI(BaseChatModel):
                be validated. If `None`, `strict` argument will not be passed to the model.
            parallel_tool_calls: Set to `False` to disable parallel tool use.
                Defaults to `None` (no specification, which allows parallel tool use).
+            response_format: Optional schema to format model response. If provided
+                and the model does not call a tool, the model will generate a
+                [structured response](https://platform.openai.com/docs/guides/structured-outputs).
            kwargs: Any additional parameters are passed directly to `bind`.
        """  # noqa: E501
        if parallel_tool_calls is not None:
@@ -1838,6 +1842,11 @@ class BaseChatOpenAI(BaseChatModel):
                )
                raise ValueError(msg)
            kwargs["tool_choice"] = tool_choice
+
+        if response_format:
+            kwargs["response_format"] = _convert_to_openai_response_format(
+                response_format
+            )
        return super().bind(tools=formatted_tools, **kwargs)

    def with_structured_output(
@@ -3479,6 +3488,7 @@ def _convert_to_openai_response_format(
        strict is not None
        and strict is not response_format["json_schema"].get("strict")
        and isinstance(schema, dict)
+        and "strict" in schema.get("json_schema", {})
    ):
        msg = (
            f"Output schema already has 'strict' value set to "
--- a/libs/partners/openai/tests/integration_tests/chat_models/test_base.py
+++ b/libs/partners/openai/tests/integration_tests/chat_models/test_base.py
@@ -28,6 +28,7 @@ from langchain_tests.integration_tests.chat_models import (
    magic_function,
 )
 from pydantic import BaseModel, Field, field_validator
+from typing_extensions import TypedDict

 from langchain_openai import ChatOpenAI
 from tests.unit_tests.fake.callbacks import FakeCallbackHandler
@@ -1146,17 +1147,33 @@ def test_multi_party_conversation() -> None:
    assert "Bob" in response.content


-def test_structured_output_and_tools() -> None:
-    class ResponseFormat(BaseModel):
-        response: str
-        explanation: str
+class ResponseFormat(BaseModel):
+    response: str
+    explanation: str

-    llm = ChatOpenAI(model="gpt-5-nano").bind_tools(
-        [GenerateUsername], strict=True, response_format=ResponseFormat
+
+class ResponseFormatDict(TypedDict):
+    response: str
+    explanation: str
+
+
+@pytest.mark.parametrize(
+    "schema", [ResponseFormat, ResponseFormat.model_json_schema(), ResponseFormatDict]
+)
+def test_structured_output_and_tools(schema: Any) -> None:
+    llm = ChatOpenAI(model="gpt-5-nano", verbosity="low").bind_tools(
+        [GenerateUsername], strict=True, response_format=schema
    )

    response = llm.invoke("What weighs more, a pound of feathers or a pound of gold?")
-    assert isinstance(response.additional_kwargs["parsed"], ResponseFormat)
+    if schema == ResponseFormat:
+        parsed = response.additional_kwargs["parsed"]
+        assert isinstance(parsed, ResponseFormat)
+    else:
+        parsed = json.loads(response.text)
+        assert isinstance(parsed, dict)
+        assert parsed["response"]
+        assert parsed["explanation"]

    # Test streaming tool calls
    full: BaseMessageChunk | None = None
@@ -1172,10 +1189,6 @@ def test_structured_output_and_tools() -> None:


 def test_tools_and_structured_output() -> None:
-    class ResponseFormat(BaseModel):
-        response: str
-        explanation: str
-
    llm = ChatOpenAI(model="gpt-5-nano").with_structured_output(
        ResponseFormat, strict=True, include_raw=True, tools=[GenerateUsername]
    )
--- a/libs/partners/openai/tests/integration_tests/chat_models/test_responses_api.py
+++ b/libs/partners/openai/tests/integration_tests/chat_models/test_responses_api.py
@@ -318,18 +318,23 @@ async def test_parsed_dict_schema_async(schema: Any) -> None:
    assert isinstance(parsed["response"], str)


-def test_function_calling_and_structured_output() -> None:
+@pytest.mark.parametrize("schema", [Foo, Foo.model_json_schema(), FooDict])
+def test_function_calling_and_structured_output(schema: Any) -> None:
    def multiply(x: int, y: int) -> int:
        """return x * y"""
        return x * y

    llm = ChatOpenAI(model=MODEL_NAME, use_responses_api=True)
-    bound_llm = llm.bind_tools([multiply], response_format=Foo, strict=True)
+    bound_llm = llm.bind_tools([multiply], response_format=schema, strict=True)
    # Test structured output
-    response = llm.invoke("how are ya", response_format=Foo)
-    parsed = Foo(**json.loads(response.text))
+    response = llm.invoke("how are ya", response_format=schema)
+    if schema == Foo:
+        parsed = schema(**json.loads(response.text))
+        assert parsed.response
+    else:
+        parsed = json.loads(response.text)
+        assert parsed["response"]
    assert parsed == response.additional_kwargs["parsed"]
-    assert parsed.response

    # Test function calling
    ai_msg = cast(AIMessage, bound_llm.invoke("whats 5 * 4"))