mirror of
https://github.com/hwchase17/langchain.git
synced 2026-04-25 01:16:55 +00:00
fix(langchain, openai): fix create_agent / response_format for Responses API (#33939)
This commit is contained in:
@@ -1009,8 +1009,9 @@ def create_agent( # noqa: PLR0915
|
||||
|
||||
# Bind model based on effective response format
|
||||
if isinstance(effective_response_format, ProviderStrategy):
|
||||
# Use provider-specific structured output
|
||||
kwargs = effective_response_format.to_model_kwargs()
|
||||
kwargs: dict[str, Any] = {
|
||||
"response_format": effective_response_format.schema_spec.json_schema
|
||||
}
|
||||
return (
|
||||
request.model.bind_tools(
|
||||
final_tools, strict=True, **kwargs, **request.model_settings
|
||||
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -1,79 +0,0 @@
|
||||
import pytest
|
||||
from langchain_core.messages import HumanMessage
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from langchain.agents import create_agent
|
||||
from langchain.agents.structured_output import ToolStrategy
|
||||
|
||||
|
||||
class WeatherBaseModel(BaseModel):
|
||||
"""Weather response."""
|
||||
|
||||
temperature: float = Field(description="The temperature in fahrenheit")
|
||||
condition: str = Field(description="Weather condition")
|
||||
|
||||
|
||||
def get_weather(city: str) -> str: # noqa: ARG001
|
||||
"""Get the weather for a city."""
|
||||
return "The weather is sunny and 75°F."
|
||||
|
||||
|
||||
@pytest.mark.requires("langchain_openai")
|
||||
def test_inference_to_native_output() -> None:
|
||||
"""Test that native output is inferred when a model supports it."""
|
||||
from langchain_openai import ChatOpenAI
|
||||
|
||||
model = ChatOpenAI(model="gpt-5")
|
||||
agent = create_agent(
|
||||
model,
|
||||
system_prompt=(
|
||||
"You are a helpful weather assistant. Please call the get_weather tool, "
|
||||
"then use the WeatherReport tool to generate the final response."
|
||||
),
|
||||
tools=[get_weather],
|
||||
response_format=WeatherBaseModel,
|
||||
)
|
||||
response = agent.invoke({"messages": [HumanMessage("What's the weather?")]})
|
||||
|
||||
assert isinstance(response["structured_response"], WeatherBaseModel)
|
||||
assert response["structured_response"].temperature == 75.0
|
||||
assert response["structured_response"].condition.lower() == "sunny"
|
||||
assert len(response["messages"]) == 4
|
||||
|
||||
assert [m.type for m in response["messages"]] == [
|
||||
"human", # "What's the weather?"
|
||||
"ai", # "What's the weather?"
|
||||
"tool", # "The weather is sunny and 75°F."
|
||||
"ai", # structured response
|
||||
]
|
||||
|
||||
|
||||
@pytest.mark.requires("langchain_openai")
|
||||
def test_inference_to_tool_output() -> None:
|
||||
"""Test that tool output is inferred when a model supports it."""
|
||||
from langchain_openai import ChatOpenAI
|
||||
|
||||
model = ChatOpenAI(model="gpt-4")
|
||||
agent = create_agent(
|
||||
model,
|
||||
system_prompt=(
|
||||
"You are a helpful weather assistant. Please call the get_weather tool, "
|
||||
"then use the WeatherReport tool to generate the final response."
|
||||
),
|
||||
tools=[get_weather],
|
||||
response_format=ToolStrategy(WeatherBaseModel),
|
||||
)
|
||||
response = agent.invoke({"messages": [HumanMessage("What's the weather?")]})
|
||||
|
||||
assert isinstance(response["structured_response"], WeatherBaseModel)
|
||||
assert response["structured_response"].temperature == 75.0
|
||||
assert response["structured_response"].condition.lower() == "sunny"
|
||||
assert len(response["messages"]) == 5
|
||||
|
||||
assert [m.type for m in response["messages"]] == [
|
||||
"human", # "What's the weather?"
|
||||
"ai", # "What's the weather?"
|
||||
"tool", # "The weather is sunny and 75°F."
|
||||
"ai", # structured response
|
||||
"tool", # artificial tool message
|
||||
]
|
||||
@@ -38,8 +38,7 @@ class FakeToolCallingModel(BaseChatModel, Generic[StructuredResponseT]):
|
||||
**kwargs: Any,
|
||||
) -> ChatResult:
|
||||
"""Top Level call"""
|
||||
rf = kwargs.get("response_format")
|
||||
is_native = isinstance(rf, dict) and rf.get("type") == "json_schema"
|
||||
is_native = kwargs.get("response_format")
|
||||
|
||||
if self.tool_calls:
|
||||
if is_native:
|
||||
|
||||
@@ -0,0 +1,142 @@
|
||||
"""Test response_format for langchain-openai.
|
||||
|
||||
If tests fail, cassettes may need to be re-recorded.
|
||||
|
||||
To re-record cassettes:
|
||||
|
||||
1. Delete existing cassettes (`rm tests/cassettes/test_inference_to_*.yaml.gz`)
|
||||
2. Re run the tests with a valid OPENAI_API_KEY in your environment:
|
||||
```bash
|
||||
OPENAI_API_KEY=... uv run python -m pytest tests/unit_tests/agents/test_response_format_integration.py
|
||||
```
|
||||
|
||||
The cassettes are compressed. To read them:
|
||||
```bash
|
||||
gunzip -c "tests/cassettes/test_inference_to_native_output[True].yaml.gz" | \
|
||||
yq -o json . | \
|
||||
jq '.requests[].body |= (gsub("\n";"") | @base64d | fromjson) |
|
||||
.responses[].body.string |= (gsub("\n";"") | @base64d | fromjson)'
|
||||
```
|
||||
|
||||
Or, in Python:
|
||||
```python
|
||||
import json
|
||||
|
||||
from langchain_tests.conftest import CustomPersister, CustomSerializer
|
||||
|
||||
def bytes_encoder(obj):
|
||||
return obj.decode("utf-8", errors="replace")
|
||||
|
||||
path = "tests/cassettes/test_inference_to_native_output[True].yaml.gz"
|
||||
|
||||
requests, responses = CustomPersister().load_cassette(path, CustomSerializer())
|
||||
assert len(requests) == len(responses)
|
||||
for request, response in list(zip(requests, responses)):
|
||||
print("------ REQUEST ------")
|
||||
req = request._to_dict()
|
||||
req["body"] = json.loads(req["body"])
|
||||
print(json.dumps(req, indent=2, default=bytes_encoder))
|
||||
print("\n\n ------ RESPONSE ------")
|
||||
resp = response
|
||||
print(json.dumps(resp, indent=2, default=bytes_encoder))
|
||||
print("\n\n")
|
||||
```
|
||||
"""
|
||||
|
||||
import os
|
||||
|
||||
import pytest
|
||||
from langchain_core.messages import HumanMessage
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from langchain.agents import create_agent
|
||||
from langchain.agents.structured_output import ToolStrategy
|
||||
|
||||
|
||||
class WeatherBaseModel(BaseModel):
|
||||
"""Weather response."""
|
||||
|
||||
temperature: float = Field(description="The temperature in fahrenheit")
|
||||
condition: str = Field(description="Weather condition")
|
||||
|
||||
|
||||
def get_weather(city: str) -> str: # noqa: ARG001
|
||||
"""Get the weather for a city."""
|
||||
return f"The weather in {city} is sunny and 75°F."
|
||||
|
||||
|
||||
@pytest.mark.requires("langchain_openai")
|
||||
@pytest.mark.vcr
|
||||
@pytest.mark.parametrize("use_responses_api", [False, True])
|
||||
def test_inference_to_native_output(use_responses_api: bool) -> None:
|
||||
"""Test that native output is inferred when a model supports it."""
|
||||
from langchain_openai import ChatOpenAI
|
||||
|
||||
model_kwargs = {"model": "gpt-5", "use_responses_api": use_responses_api}
|
||||
|
||||
if "OPENAI_API_KEY" not in os.environ:
|
||||
model_kwargs["api_key"] = "foo"
|
||||
|
||||
model = ChatOpenAI(**model_kwargs)
|
||||
|
||||
agent = create_agent(
|
||||
model,
|
||||
system_prompt=(
|
||||
"You are a helpful weather assistant. Please call the get_weather tool "
|
||||
"once, then use the WeatherReport tool to generate the final response."
|
||||
),
|
||||
tools=[get_weather],
|
||||
response_format=WeatherBaseModel,
|
||||
)
|
||||
response = agent.invoke({"messages": [HumanMessage("What's the weather in Boston?")]})
|
||||
|
||||
assert isinstance(response["structured_response"], WeatherBaseModel)
|
||||
assert response["structured_response"].temperature == 75.0
|
||||
assert response["structured_response"].condition.lower() == "sunny"
|
||||
assert len(response["messages"]) == 4
|
||||
|
||||
assert [m.type for m in response["messages"]] == [
|
||||
"human", # "What's the weather?"
|
||||
"ai", # "What's the weather?"
|
||||
"tool", # "The weather is sunny and 75°F."
|
||||
"ai", # structured response
|
||||
]
|
||||
|
||||
|
||||
@pytest.mark.requires("langchain_openai")
|
||||
@pytest.mark.vcr
|
||||
@pytest.mark.parametrize("use_responses_api", [False, True])
|
||||
def test_inference_to_tool_output(use_responses_api: bool) -> None:
|
||||
"""Test that tool output is inferred when a model supports it."""
|
||||
from langchain_openai import ChatOpenAI
|
||||
|
||||
model_kwargs = {"model": "gpt-5", "use_responses_api": use_responses_api}
|
||||
|
||||
if "OPENAI_API_KEY" not in os.environ:
|
||||
model_kwargs["api_key"] = "foo"
|
||||
|
||||
model = ChatOpenAI(**model_kwargs)
|
||||
|
||||
agent = create_agent(
|
||||
model,
|
||||
system_prompt=(
|
||||
"You are a helpful weather assistant. Please call the get_weather tool "
|
||||
"once, then use the WeatherReport tool to generate the final response."
|
||||
),
|
||||
tools=[get_weather],
|
||||
response_format=ToolStrategy(WeatherBaseModel),
|
||||
)
|
||||
response = agent.invoke({"messages": [HumanMessage("What's the weather?")]})
|
||||
|
||||
assert isinstance(response["structured_response"], WeatherBaseModel)
|
||||
assert response["structured_response"].temperature == 75.0
|
||||
assert response["structured_response"].condition.lower() == "sunny"
|
||||
assert len(response["messages"]) == 5
|
||||
|
||||
assert [m.type for m in response["messages"]] == [
|
||||
"human", # "What's the weather?"
|
||||
"ai", # "What's the weather?"
|
||||
"tool", # "The weather is sunny and 75°F."
|
||||
"ai", # structured response
|
||||
"tool", # artificial tool message
|
||||
]
|
||||
@@ -2,8 +2,52 @@
|
||||
|
||||
from collections.abc import Sequence
|
||||
from importlib import util
|
||||
from typing import Any
|
||||
|
||||
import pytest
|
||||
from langchain_tests.conftest import CustomPersister, CustomSerializer
|
||||
from langchain_tests.conftest import (
|
||||
_base_vcr_config as _base_vcr_config,
|
||||
)
|
||||
from vcr import VCR
|
||||
|
||||
_EXTRA_HEADERS = [
|
||||
("openai-organization", "PLACEHOLDER"),
|
||||
("user-agent", "PLACEHOLDER"),
|
||||
("x-openai-client-user-agent", "PLACEHOLDER"),
|
||||
]
|
||||
|
||||
|
||||
def remove_request_headers(request: Any) -> Any:
|
||||
"""Remove sensitive headers from the request."""
|
||||
for k in request.headers:
|
||||
request.headers[k] = "**REDACTED**"
|
||||
request.uri = "**REDACTED**"
|
||||
return request
|
||||
|
||||
|
||||
def remove_response_headers(response: dict) -> dict:
|
||||
"""Remove sensitive headers from the response."""
|
||||
for k in response["headers"]:
|
||||
response["headers"][k] = "**REDACTED**"
|
||||
return response
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def vcr_config(_base_vcr_config: dict) -> dict: # noqa: F811
|
||||
"""Extend the default configuration coming from langchain_tests."""
|
||||
config = _base_vcr_config.copy()
|
||||
config.setdefault("filter_headers", []).extend(_EXTRA_HEADERS)
|
||||
config["before_record_request"] = remove_request_headers
|
||||
config["before_record_response"] = remove_response_headers
|
||||
config["serializer"] = "yaml.gz"
|
||||
config["path_transformer"] = VCR.ensure_suffix(".yaml.gz")
|
||||
return config
|
||||
|
||||
|
||||
def pytest_recording_configure(config: dict, vcr: VCR) -> None: # noqa: ARG001
|
||||
vcr.register_persister(CustomPersister())
|
||||
vcr.register_serializer("yaml.gz", CustomSerializer())
|
||||
|
||||
|
||||
def pytest_addoption(parser: pytest.Parser) -> None:
|
||||
|
||||
@@ -1771,6 +1771,7 @@ class BaseChatOpenAI(BaseChatModel):
|
||||
tool_choice: dict | str | bool | None = None,
|
||||
strict: bool | None = None,
|
||||
parallel_tool_calls: bool | None = None,
|
||||
response_format: _DictOrPydanticClass | None = None,
|
||||
**kwargs: Any,
|
||||
) -> Runnable[LanguageModelInput, AIMessage]:
|
||||
"""Bind tool-like objects to this chat model.
|
||||
@@ -1796,6 +1797,9 @@ class BaseChatOpenAI(BaseChatModel):
|
||||
be validated. If `None`, `strict` argument will not be passed to the model.
|
||||
parallel_tool_calls: Set to `False` to disable parallel tool use.
|
||||
Defaults to `None` (no specification, which allows parallel tool use).
|
||||
response_format: Optional schema to format model response. If provided
|
||||
and the model does not call a tool, the model will generate a
|
||||
[structured response](https://platform.openai.com/docs/guides/structured-outputs).
|
||||
kwargs: Any additional parameters are passed directly to `bind`.
|
||||
""" # noqa: E501
|
||||
if parallel_tool_calls is not None:
|
||||
@@ -1838,6 +1842,11 @@ class BaseChatOpenAI(BaseChatModel):
|
||||
)
|
||||
raise ValueError(msg)
|
||||
kwargs["tool_choice"] = tool_choice
|
||||
|
||||
if response_format:
|
||||
kwargs["response_format"] = _convert_to_openai_response_format(
|
||||
response_format
|
||||
)
|
||||
return super().bind(tools=formatted_tools, **kwargs)
|
||||
|
||||
def with_structured_output(
|
||||
@@ -3479,6 +3488,7 @@ def _convert_to_openai_response_format(
|
||||
strict is not None
|
||||
and strict is not response_format["json_schema"].get("strict")
|
||||
and isinstance(schema, dict)
|
||||
and "strict" in schema.get("json_schema", {})
|
||||
):
|
||||
msg = (
|
||||
f"Output schema already has 'strict' value set to "
|
||||
|
||||
@@ -28,6 +28,7 @@ from langchain_tests.integration_tests.chat_models import (
|
||||
magic_function,
|
||||
)
|
||||
from pydantic import BaseModel, Field, field_validator
|
||||
from typing_extensions import TypedDict
|
||||
|
||||
from langchain_openai import ChatOpenAI
|
||||
from tests.unit_tests.fake.callbacks import FakeCallbackHandler
|
||||
@@ -1146,17 +1147,33 @@ def test_multi_party_conversation() -> None:
|
||||
assert "Bob" in response.content
|
||||
|
||||
|
||||
def test_structured_output_and_tools() -> None:
|
||||
class ResponseFormat(BaseModel):
|
||||
response: str
|
||||
explanation: str
|
||||
class ResponseFormat(BaseModel):
|
||||
response: str
|
||||
explanation: str
|
||||
|
||||
llm = ChatOpenAI(model="gpt-5-nano").bind_tools(
|
||||
[GenerateUsername], strict=True, response_format=ResponseFormat
|
||||
|
||||
class ResponseFormatDict(TypedDict):
|
||||
response: str
|
||||
explanation: str
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"schema", [ResponseFormat, ResponseFormat.model_json_schema(), ResponseFormatDict]
|
||||
)
|
||||
def test_structured_output_and_tools(schema: Any) -> None:
|
||||
llm = ChatOpenAI(model="gpt-5-nano", verbosity="low").bind_tools(
|
||||
[GenerateUsername], strict=True, response_format=schema
|
||||
)
|
||||
|
||||
response = llm.invoke("What weighs more, a pound of feathers or a pound of gold?")
|
||||
assert isinstance(response.additional_kwargs["parsed"], ResponseFormat)
|
||||
if schema == ResponseFormat:
|
||||
parsed = response.additional_kwargs["parsed"]
|
||||
assert isinstance(parsed, ResponseFormat)
|
||||
else:
|
||||
parsed = json.loads(response.text)
|
||||
assert isinstance(parsed, dict)
|
||||
assert parsed["response"]
|
||||
assert parsed["explanation"]
|
||||
|
||||
# Test streaming tool calls
|
||||
full: BaseMessageChunk | None = None
|
||||
@@ -1172,10 +1189,6 @@ def test_structured_output_and_tools() -> None:
|
||||
|
||||
|
||||
def test_tools_and_structured_output() -> None:
|
||||
class ResponseFormat(BaseModel):
|
||||
response: str
|
||||
explanation: str
|
||||
|
||||
llm = ChatOpenAI(model="gpt-5-nano").with_structured_output(
|
||||
ResponseFormat, strict=True, include_raw=True, tools=[GenerateUsername]
|
||||
)
|
||||
|
||||
@@ -318,18 +318,23 @@ async def test_parsed_dict_schema_async(schema: Any) -> None:
|
||||
assert isinstance(parsed["response"], str)
|
||||
|
||||
|
||||
def test_function_calling_and_structured_output() -> None:
|
||||
@pytest.mark.parametrize("schema", [Foo, Foo.model_json_schema(), FooDict])
|
||||
def test_function_calling_and_structured_output(schema: Any) -> None:
|
||||
def multiply(x: int, y: int) -> int:
|
||||
"""return x * y"""
|
||||
return x * y
|
||||
|
||||
llm = ChatOpenAI(model=MODEL_NAME, use_responses_api=True)
|
||||
bound_llm = llm.bind_tools([multiply], response_format=Foo, strict=True)
|
||||
bound_llm = llm.bind_tools([multiply], response_format=schema, strict=True)
|
||||
# Test structured output
|
||||
response = llm.invoke("how are ya", response_format=Foo)
|
||||
parsed = Foo(**json.loads(response.text))
|
||||
response = llm.invoke("how are ya", response_format=schema)
|
||||
if schema == Foo:
|
||||
parsed = schema(**json.loads(response.text))
|
||||
assert parsed.response
|
||||
else:
|
||||
parsed = json.loads(response.text)
|
||||
assert parsed["response"]
|
||||
assert parsed == response.additional_kwargs["parsed"]
|
||||
assert parsed.response
|
||||
|
||||
# Test function calling
|
||||
ai_msg = cast(AIMessage, bound_llm.invoke("whats 5 * 4"))
|
||||
|
||||
Reference in New Issue
Block a user