fix(langchain, openai): fix create_agent / response_format for Responses API (#33939)

2026-06-09 18:50:33 +00:00 · 2025-11-13 10:18:15 -05:00
parent 2bfbc29ccc
commit 74385e0ebd
12 changed files with 234 additions and 99 deletions
--- a/libs/langchain_v1/langchain/agents/factory.py
+++ b/libs/langchain_v1/langchain/agents/factory.py
@@ -1009,8 +1009,9 @@ def create_agent(  # noqa: PLR0915

        # Bind model based on effective response format
        if isinstance(effective_response_format, ProviderStrategy):
-            # Use provider-specific structured output
-            kwargs = effective_response_format.to_model_kwargs()
+            kwargs: dict[str, Any] = {
+                "response_format": effective_response_format.schema_spec.json_schema
+            }
            return (
                request.model.bind_tools(
                    final_tools, strict=True, **kwargs, **request.model_settings
--- a/libs/langchain_v1/tests/cassettes/test_inference_to_native_output[False].yaml.gz
+++ b/libs/langchain_v1/tests/cassettes/test_inference_to_native_output[False].yaml.gz
--- a/libs/langchain_v1/tests/cassettes/test_inference_to_native_output[True].yaml.gz
+++ b/libs/langchain_v1/tests/cassettes/test_inference_to_native_output[True].yaml.gz
--- a/libs/langchain_v1/tests/cassettes/test_inference_to_tool_output[False].yaml.gz
+++ b/libs/langchain_v1/tests/cassettes/test_inference_to_tool_output[False].yaml.gz
--- a/libs/langchain_v1/tests/cassettes/test_inference_to_tool_output[True].yaml.gz
+++ b/libs/langchain_v1/tests/cassettes/test_inference_to_tool_output[True].yaml.gz
--- a/libs/langchain_v1/tests/integration_tests/agents/test_response_format.py
+++ b/libs/langchain_v1/tests/integration_tests/agents/test_response_format.py
@@ -1,79 +0,0 @@
-import pytest
-from langchain_core.messages import HumanMessage
-from pydantic import BaseModel, Field
-
-from langchain.agents import create_agent
-from langchain.agents.structured_output import ToolStrategy
-
-
-class WeatherBaseModel(BaseModel):
-    """Weather response."""
-
-    temperature: float = Field(description="The temperature in fahrenheit")
-    condition: str = Field(description="Weather condition")
-
-
-def get_weather(city: str) -> str:  # noqa: ARG001
-    """Get the weather for a city."""
-    return "The weather is sunny and 75°F."
-
-
-@pytest.mark.requires("langchain_openai")
-def test_inference_to_native_output() -> None:
-    """Test that native output is inferred when a model supports it."""
-    from langchain_openai import ChatOpenAI
-
-    model = ChatOpenAI(model="gpt-5")
-    agent = create_agent(
-        model,
-        system_prompt=(
-            "You are a helpful weather assistant. Please call the get_weather tool, "
-            "then use the WeatherReport tool to generate the final response."
-        ),
-        tools=[get_weather],
-        response_format=WeatherBaseModel,
-    )
-    response = agent.invoke({"messages": [HumanMessage("What's the weather?")]})
-
-    assert isinstance(response["structured_response"], WeatherBaseModel)
-    assert response["structured_response"].temperature == 75.0
-    assert response["structured_response"].condition.lower() == "sunny"
-    assert len(response["messages"]) == 4
-
-    assert [m.type for m in response["messages"]] == [
-        "human",  # "What's the weather?"
-        "ai",  # "What's the weather?"
-        "tool",  # "The weather is sunny and 75°F."
-        "ai",  # structured response
-    ]
-
-
-@pytest.mark.requires("langchain_openai")
-def test_inference_to_tool_output() -> None:
-    """Test that tool output is inferred when a model supports it."""
-    from langchain_openai import ChatOpenAI
-
-    model = ChatOpenAI(model="gpt-4")
-    agent = create_agent(
-        model,
-        system_prompt=(
-            "You are a helpful weather assistant. Please call the get_weather tool, "
-            "then use the WeatherReport tool to generate the final response."
-        ),
-        tools=[get_weather],
-        response_format=ToolStrategy(WeatherBaseModel),
-    )
-    response = agent.invoke({"messages": [HumanMessage("What's the weather?")]})
-
-    assert isinstance(response["structured_response"], WeatherBaseModel)
-    assert response["structured_response"].temperature == 75.0
-    assert response["structured_response"].condition.lower() == "sunny"
-    assert len(response["messages"]) == 5
-
-    assert [m.type for m in response["messages"]] == [
-        "human",  # "What's the weather?"
-        "ai",  # "What's the weather?"
-        "tool",  # "The weather is sunny and 75°F."
-        "ai",  # structured response
-        "tool",  # artificial tool message
-    ]
--- a/libs/langchain_v1/tests/unit_tests/agents/model.py
+++ b/libs/langchain_v1/tests/unit_tests/agents/model.py
@@ -38,8 +38,7 @@ class FakeToolCallingModel(BaseChatModel, Generic[StructuredResponseT]):
        **kwargs: Any,
    ) -> ChatResult:
        """Top Level call"""
-        rf = kwargs.get("response_format")
-        is_native = isinstance(rf, dict) and rf.get("type") == "json_schema"
+        is_native = kwargs.get("response_format")

        if self.tool_calls:
            if is_native:
--- a/libs/langchain_v1/tests/unit_tests/agents/test_response_format_integration.py
+++ b/libs/langchain_v1/tests/unit_tests/agents/test_response_format_integration.py
@@ -0,0 +1,142 @@
+"""Test response_format for langchain-openai.
+
+If tests fail, cassettes may need to be re-recorded.
+
+To re-record cassettes:
+
+1. Delete existing cassettes (`rm tests/cassettes/test_inference_to_*.yaml.gz`)
+2. Re run the tests with a valid OPENAI_API_KEY in your environment:
+```bash
+OPENAI_API_KEY=... uv run python -m pytest tests/unit_tests/agents/test_response_format_integration.py
+```
+
+The cassettes are compressed. To read them:
+```bash
+gunzip -c "tests/cassettes/test_inference_to_native_output[True].yaml.gz" | \
+    yq -o json . | \
+    jq '.requests[].body |= (gsub("\n";"") | @base64d | fromjson) |
+        .responses[].body.string |= (gsub("\n";"") | @base64d | fromjson)'
+```
+
+Or, in  Python:
+```python
+import json
+
+from langchain_tests.conftest import CustomPersister, CustomSerializer
+
+def bytes_encoder(obj):
+    return obj.decode("utf-8", errors="replace")
+
+path = "tests/cassettes/test_inference_to_native_output[True].yaml.gz"
+
+requests, responses = CustomPersister().load_cassette(path, CustomSerializer())
+assert len(requests) == len(responses)
+for request, response in list(zip(requests, responses)):
+    print("------ REQUEST ------")
+    req = request._to_dict()
+    req["body"] = json.loads(req["body"])
+    print(json.dumps(req, indent=2, default=bytes_encoder))
+    print("\n\n ------ RESPONSE ------")
+    resp = response
+    print(json.dumps(resp, indent=2, default=bytes_encoder))
+print("\n\n")
+```
+"""
+
+import os
+
+import pytest
+from langchain_core.messages import HumanMessage
+from pydantic import BaseModel, Field
+
+from langchain.agents import create_agent
+from langchain.agents.structured_output import ToolStrategy
+
+
+class WeatherBaseModel(BaseModel):
+    """Weather response."""
+
+    temperature: float = Field(description="The temperature in fahrenheit")
+    condition: str = Field(description="Weather condition")
+
+
+def get_weather(city: str) -> str:  # noqa: ARG001
+    """Get the weather for a city."""
+    return f"The weather in {city} is sunny and 75°F."
+
+
+@pytest.mark.requires("langchain_openai")
+@pytest.mark.vcr
+@pytest.mark.parametrize("use_responses_api", [False, True])
+def test_inference_to_native_output(use_responses_api: bool) -> None:
+    """Test that native output is inferred when a model supports it."""
+    from langchain_openai import ChatOpenAI
+
+    model_kwargs = {"model": "gpt-5", "use_responses_api": use_responses_api}
+
+    if "OPENAI_API_KEY" not in os.environ:
+        model_kwargs["api_key"] = "foo"
+
+    model = ChatOpenAI(**model_kwargs)
+
+    agent = create_agent(
+        model,
+        system_prompt=(
+            "You are a helpful weather assistant. Please call the get_weather tool "
+            "once, then use the WeatherReport tool to generate the final response."
+        ),
+        tools=[get_weather],
+        response_format=WeatherBaseModel,
+    )
+    response = agent.invoke({"messages": [HumanMessage("What's the weather in Boston?")]})
+
+    assert isinstance(response["structured_response"], WeatherBaseModel)
+    assert response["structured_response"].temperature == 75.0
+    assert response["structured_response"].condition.lower() == "sunny"
+    assert len(response["messages"]) == 4
+
+    assert [m.type for m in response["messages"]] == [
+        "human",  # "What's the weather?"
+        "ai",  # "What's the weather?"
+        "tool",  # "The weather is sunny and 75°F."
+        "ai",  # structured response
+    ]
+
+
+@pytest.mark.requires("langchain_openai")
+@pytest.mark.vcr
+@pytest.mark.parametrize("use_responses_api", [False, True])
+def test_inference_to_tool_output(use_responses_api: bool) -> None:
+    """Test that tool output is inferred when a model supports it."""
+    from langchain_openai import ChatOpenAI
+
+    model_kwargs = {"model": "gpt-5", "use_responses_api": use_responses_api}
+
+    if "OPENAI_API_KEY" not in os.environ:
+        model_kwargs["api_key"] = "foo"
+
+    model = ChatOpenAI(**model_kwargs)
+
+    agent = create_agent(
+        model,
+        system_prompt=(
+            "You are a helpful weather assistant. Please call the get_weather tool "
+            "once, then use the WeatherReport tool to generate the final response."
+        ),
+        tools=[get_weather],
+        response_format=ToolStrategy(WeatherBaseModel),
+    )
+    response = agent.invoke({"messages": [HumanMessage("What's the weather?")]})
+
+    assert isinstance(response["structured_response"], WeatherBaseModel)
+    assert response["structured_response"].temperature == 75.0
+    assert response["structured_response"].condition.lower() == "sunny"
+    assert len(response["messages"]) == 5
+
+    assert [m.type for m in response["messages"]] == [
+        "human",  # "What's the weather?"
+        "ai",  # "What's the weather?"
+        "tool",  # "The weather is sunny and 75°F."
+        "ai",  # structured response
+        "tool",  # artificial tool message
+    ]
--- a/libs/langchain_v1/tests/unit_tests/conftest.py
+++ b/libs/langchain_v1/tests/unit_tests/conftest.py
@@ -2,8 +2,52 @@

 from collections.abc import Sequence
 from importlib import util
+from typing import Any

 import pytest
+from langchain_tests.conftest import CustomPersister, CustomSerializer
+from langchain_tests.conftest import (
+    _base_vcr_config as _base_vcr_config,
+)
+from vcr import VCR
+
+_EXTRA_HEADERS = [
+    ("openai-organization", "PLACEHOLDER"),
+    ("user-agent", "PLACEHOLDER"),
+    ("x-openai-client-user-agent", "PLACEHOLDER"),
+]
+
+
+def remove_request_headers(request: Any) -> Any:
+    """Remove sensitive headers from the request."""
+    for k in request.headers:
+        request.headers[k] = "**REDACTED**"
+    request.uri = "**REDACTED**"
+    return request
+
+
+def remove_response_headers(response: dict) -> dict:
+    """Remove sensitive headers from the response."""
+    for k in response["headers"]:
+        response["headers"][k] = "**REDACTED**"
+    return response
+
+
+@pytest.fixture(scope="session")
+def vcr_config(_base_vcr_config: dict) -> dict:  # noqa: F811
+    """Extend the default configuration coming from langchain_tests."""
+    config = _base_vcr_config.copy()
+    config.setdefault("filter_headers", []).extend(_EXTRA_HEADERS)
+    config["before_record_request"] = remove_request_headers
+    config["before_record_response"] = remove_response_headers
+    config["serializer"] = "yaml.gz"
+    config["path_transformer"] = VCR.ensure_suffix(".yaml.gz")
+    return config
+
+
+def pytest_recording_configure(config: dict, vcr: VCR) -> None:  # noqa: ARG001
+    vcr.register_persister(CustomPersister())
+    vcr.register_serializer("yaml.gz", CustomSerializer())


 def pytest_addoption(parser: pytest.Parser) -> None: