fix(langchain, openai): fix create_agent / response_format for Responses API (#33939)

This commit is contained in:
ccurme
2025-11-13 10:18:15 -05:00
committed by GitHub
parent 2bfbc29ccc
commit 74385e0ebd
12 changed files with 234 additions and 99 deletions

View File

@@ -1009,8 +1009,9 @@ def create_agent( # noqa: PLR0915
# Bind model based on effective response format
if isinstance(effective_response_format, ProviderStrategy):
# Use provider-specific structured output
kwargs = effective_response_format.to_model_kwargs()
kwargs: dict[str, Any] = {
"response_format": effective_response_format.schema_spec.json_schema
}
return (
request.model.bind_tools(
final_tools, strict=True, **kwargs, **request.model_settings

View File

@@ -1,79 +0,0 @@
import pytest
from langchain_core.messages import HumanMessage
from pydantic import BaseModel, Field
from langchain.agents import create_agent
from langchain.agents.structured_output import ToolStrategy
class WeatherBaseModel(BaseModel):
"""Weather response."""
temperature: float = Field(description="The temperature in fahrenheit")
condition: str = Field(description="Weather condition")
def get_weather(city: str) -> str: # noqa: ARG001
"""Get the weather for a city."""
return "The weather is sunny and 75°F."
@pytest.mark.requires("langchain_openai")
def test_inference_to_native_output() -> None:
"""Test that native output is inferred when a model supports it."""
from langchain_openai import ChatOpenAI
model = ChatOpenAI(model="gpt-5")
agent = create_agent(
model,
system_prompt=(
"You are a helpful weather assistant. Please call the get_weather tool, "
"then use the WeatherReport tool to generate the final response."
),
tools=[get_weather],
response_format=WeatherBaseModel,
)
response = agent.invoke({"messages": [HumanMessage("What's the weather?")]})
assert isinstance(response["structured_response"], WeatherBaseModel)
assert response["structured_response"].temperature == 75.0
assert response["structured_response"].condition.lower() == "sunny"
assert len(response["messages"]) == 4
assert [m.type for m in response["messages"]] == [
"human", # "What's the weather?"
"ai", # "What's the weather?"
"tool", # "The weather is sunny and 75°F."
"ai", # structured response
]
@pytest.mark.requires("langchain_openai")
def test_inference_to_tool_output() -> None:
"""Test that tool output is inferred when a model supports it."""
from langchain_openai import ChatOpenAI
model = ChatOpenAI(model="gpt-4")
agent = create_agent(
model,
system_prompt=(
"You are a helpful weather assistant. Please call the get_weather tool, "
"then use the WeatherReport tool to generate the final response."
),
tools=[get_weather],
response_format=ToolStrategy(WeatherBaseModel),
)
response = agent.invoke({"messages": [HumanMessage("What's the weather?")]})
assert isinstance(response["structured_response"], WeatherBaseModel)
assert response["structured_response"].temperature == 75.0
assert response["structured_response"].condition.lower() == "sunny"
assert len(response["messages"]) == 5
assert [m.type for m in response["messages"]] == [
"human", # "What's the weather?"
"ai", # "What's the weather?"
"tool", # "The weather is sunny and 75°F."
"ai", # structured response
"tool", # artificial tool message
]

View File

@@ -38,8 +38,7 @@ class FakeToolCallingModel(BaseChatModel, Generic[StructuredResponseT]):
**kwargs: Any,
) -> ChatResult:
"""Top Level call"""
rf = kwargs.get("response_format")
is_native = isinstance(rf, dict) and rf.get("type") == "json_schema"
is_native = kwargs.get("response_format")
if self.tool_calls:
if is_native:

View File

@@ -0,0 +1,142 @@
"""Test response_format for langchain-openai.
If tests fail, cassettes may need to be re-recorded.
To re-record cassettes:
1. Delete existing cassettes (`rm tests/cassettes/test_inference_to_*.yaml.gz`)
2. Re run the tests with a valid OPENAI_API_KEY in your environment:
```bash
OPENAI_API_KEY=... uv run python -m pytest tests/unit_tests/agents/test_response_format_integration.py
```
The cassettes are compressed. To read them:
```bash
gunzip -c "tests/cassettes/test_inference_to_native_output[True].yaml.gz" | \
yq -o json . | \
jq '.requests[].body |= (gsub("\n";"") | @base64d | fromjson) |
.responses[].body.string |= (gsub("\n";"") | @base64d | fromjson)'
```
Or, in Python:
```python
import json
from langchain_tests.conftest import CustomPersister, CustomSerializer
def bytes_encoder(obj):
return obj.decode("utf-8", errors="replace")
path = "tests/cassettes/test_inference_to_native_output[True].yaml.gz"
requests, responses = CustomPersister().load_cassette(path, CustomSerializer())
assert len(requests) == len(responses)
for request, response in list(zip(requests, responses)):
print("------ REQUEST ------")
req = request._to_dict()
req["body"] = json.loads(req["body"])
print(json.dumps(req, indent=2, default=bytes_encoder))
print("\n\n ------ RESPONSE ------")
resp = response
print(json.dumps(resp, indent=2, default=bytes_encoder))
print("\n\n")
```
"""
import os
import pytest
from langchain_core.messages import HumanMessage
from pydantic import BaseModel, Field
from langchain.agents import create_agent
from langchain.agents.structured_output import ToolStrategy
class WeatherBaseModel(BaseModel):
"""Weather response."""
temperature: float = Field(description="The temperature in fahrenheit")
condition: str = Field(description="Weather condition")
def get_weather(city: str) -> str: # noqa: ARG001
"""Get the weather for a city."""
return f"The weather in {city} is sunny and 75°F."
@pytest.mark.requires("langchain_openai")
@pytest.mark.vcr
@pytest.mark.parametrize("use_responses_api", [False, True])
def test_inference_to_native_output(use_responses_api: bool) -> None:
"""Test that native output is inferred when a model supports it."""
from langchain_openai import ChatOpenAI
model_kwargs = {"model": "gpt-5", "use_responses_api": use_responses_api}
if "OPENAI_API_KEY" not in os.environ:
model_kwargs["api_key"] = "foo"
model = ChatOpenAI(**model_kwargs)
agent = create_agent(
model,
system_prompt=(
"You are a helpful weather assistant. Please call the get_weather tool "
"once, then use the WeatherReport tool to generate the final response."
),
tools=[get_weather],
response_format=WeatherBaseModel,
)
response = agent.invoke({"messages": [HumanMessage("What's the weather in Boston?")]})
assert isinstance(response["structured_response"], WeatherBaseModel)
assert response["structured_response"].temperature == 75.0
assert response["structured_response"].condition.lower() == "sunny"
assert len(response["messages"]) == 4
assert [m.type for m in response["messages"]] == [
"human", # "What's the weather?"
"ai", # "What's the weather?"
"tool", # "The weather is sunny and 75°F."
"ai", # structured response
]
@pytest.mark.requires("langchain_openai")
@pytest.mark.vcr
@pytest.mark.parametrize("use_responses_api", [False, True])
def test_inference_to_tool_output(use_responses_api: bool) -> None:
"""Test that tool output is inferred when a model supports it."""
from langchain_openai import ChatOpenAI
model_kwargs = {"model": "gpt-5", "use_responses_api": use_responses_api}
if "OPENAI_API_KEY" not in os.environ:
model_kwargs["api_key"] = "foo"
model = ChatOpenAI(**model_kwargs)
agent = create_agent(
model,
system_prompt=(
"You are a helpful weather assistant. Please call the get_weather tool "
"once, then use the WeatherReport tool to generate the final response."
),
tools=[get_weather],
response_format=ToolStrategy(WeatherBaseModel),
)
response = agent.invoke({"messages": [HumanMessage("What's the weather?")]})
assert isinstance(response["structured_response"], WeatherBaseModel)
assert response["structured_response"].temperature == 75.0
assert response["structured_response"].condition.lower() == "sunny"
assert len(response["messages"]) == 5
assert [m.type for m in response["messages"]] == [
"human", # "What's the weather?"
"ai", # "What's the weather?"
"tool", # "The weather is sunny and 75°F."
"ai", # structured response
"tool", # artificial tool message
]

View File

@@ -2,8 +2,52 @@
from collections.abc import Sequence
from importlib import util
from typing import Any
import pytest
from langchain_tests.conftest import CustomPersister, CustomSerializer
from langchain_tests.conftest import (
_base_vcr_config as _base_vcr_config,
)
from vcr import VCR
_EXTRA_HEADERS = [
("openai-organization", "PLACEHOLDER"),
("user-agent", "PLACEHOLDER"),
("x-openai-client-user-agent", "PLACEHOLDER"),
]
def remove_request_headers(request: Any) -> Any:
"""Remove sensitive headers from the request."""
for k in request.headers:
request.headers[k] = "**REDACTED**"
request.uri = "**REDACTED**"
return request
def remove_response_headers(response: dict) -> dict:
"""Remove sensitive headers from the response."""
for k in response["headers"]:
response["headers"][k] = "**REDACTED**"
return response
@pytest.fixture(scope="session")
def vcr_config(_base_vcr_config: dict) -> dict: # noqa: F811
"""Extend the default configuration coming from langchain_tests."""
config = _base_vcr_config.copy()
config.setdefault("filter_headers", []).extend(_EXTRA_HEADERS)
config["before_record_request"] = remove_request_headers
config["before_record_response"] = remove_response_headers
config["serializer"] = "yaml.gz"
config["path_transformer"] = VCR.ensure_suffix(".yaml.gz")
return config
def pytest_recording_configure(config: dict, vcr: VCR) -> None: # noqa: ARG001
vcr.register_persister(CustomPersister())
vcr.register_serializer("yaml.gz", CustomSerializer())
def pytest_addoption(parser: pytest.Parser) -> None: