diff --git a/libs/langchain_v1/langchain/agents/factory.py b/libs/langchain_v1/langchain/agents/factory.py index af02e587d67..87e02e60265 100644 --- a/libs/langchain_v1/langchain/agents/factory.py +++ b/libs/langchain_v1/langchain/agents/factory.py @@ -1009,8 +1009,9 @@ def create_agent( # noqa: PLR0915 # Bind model based on effective response format if isinstance(effective_response_format, ProviderStrategy): - # Use provider-specific structured output - kwargs = effective_response_format.to_model_kwargs() + kwargs: dict[str, Any] = { + "response_format": effective_response_format.schema_spec.json_schema + } return ( request.model.bind_tools( final_tools, strict=True, **kwargs, **request.model_settings diff --git a/libs/langchain_v1/tests/cassettes/test_inference_to_native_output[False].yaml.gz b/libs/langchain_v1/tests/cassettes/test_inference_to_native_output[False].yaml.gz new file mode 100644 index 00000000000..3a7b90f2633 Binary files /dev/null and b/libs/langchain_v1/tests/cassettes/test_inference_to_native_output[False].yaml.gz differ diff --git a/libs/langchain_v1/tests/cassettes/test_inference_to_native_output[True].yaml.gz b/libs/langchain_v1/tests/cassettes/test_inference_to_native_output[True].yaml.gz new file mode 100644 index 00000000000..cdfb2a66ad2 Binary files /dev/null and b/libs/langchain_v1/tests/cassettes/test_inference_to_native_output[True].yaml.gz differ diff --git a/libs/langchain_v1/tests/cassettes/test_inference_to_tool_output[False].yaml.gz b/libs/langchain_v1/tests/cassettes/test_inference_to_tool_output[False].yaml.gz new file mode 100644 index 00000000000..3f37da79af1 Binary files /dev/null and b/libs/langchain_v1/tests/cassettes/test_inference_to_tool_output[False].yaml.gz differ diff --git a/libs/langchain_v1/tests/cassettes/test_inference_to_tool_output[True].yaml.gz b/libs/langchain_v1/tests/cassettes/test_inference_to_tool_output[True].yaml.gz new file mode 100644 index 00000000000..95cc9005e84 Binary files /dev/null and b/libs/langchain_v1/tests/cassettes/test_inference_to_tool_output[True].yaml.gz differ diff --git a/libs/langchain_v1/tests/integration_tests/agents/test_response_format.py b/libs/langchain_v1/tests/integration_tests/agents/test_response_format.py deleted file mode 100644 index db3edf6dcf6..00000000000 --- a/libs/langchain_v1/tests/integration_tests/agents/test_response_format.py +++ /dev/null @@ -1,79 +0,0 @@ -import pytest -from langchain_core.messages import HumanMessage -from pydantic import BaseModel, Field - -from langchain.agents import create_agent -from langchain.agents.structured_output import ToolStrategy - - -class WeatherBaseModel(BaseModel): - """Weather response.""" - - temperature: float = Field(description="The temperature in fahrenheit") - condition: str = Field(description="Weather condition") - - -def get_weather(city: str) -> str: # noqa: ARG001 - """Get the weather for a city.""" - return "The weather is sunny and 75°F." - - -@pytest.mark.requires("langchain_openai") -def test_inference_to_native_output() -> None: - """Test that native output is inferred when a model supports it.""" - from langchain_openai import ChatOpenAI - - model = ChatOpenAI(model="gpt-5") - agent = create_agent( - model, - system_prompt=( - "You are a helpful weather assistant. Please call the get_weather tool, " - "then use the WeatherReport tool to generate the final response." - ), - tools=[get_weather], - response_format=WeatherBaseModel, - ) - response = agent.invoke({"messages": [HumanMessage("What's the weather?")]}) - - assert isinstance(response["structured_response"], WeatherBaseModel) - assert response["structured_response"].temperature == 75.0 - assert response["structured_response"].condition.lower() == "sunny" - assert len(response["messages"]) == 4 - - assert [m.type for m in response["messages"]] == [ - "human", # "What's the weather?" - "ai", # "What's the weather?" - "tool", # "The weather is sunny and 75°F." - "ai", # structured response - ] - - -@pytest.mark.requires("langchain_openai") -def test_inference_to_tool_output() -> None: - """Test that tool output is inferred when a model supports it.""" - from langchain_openai import ChatOpenAI - - model = ChatOpenAI(model="gpt-4") - agent = create_agent( - model, - system_prompt=( - "You are a helpful weather assistant. Please call the get_weather tool, " - "then use the WeatherReport tool to generate the final response." - ), - tools=[get_weather], - response_format=ToolStrategy(WeatherBaseModel), - ) - response = agent.invoke({"messages": [HumanMessage("What's the weather?")]}) - - assert isinstance(response["structured_response"], WeatherBaseModel) - assert response["structured_response"].temperature == 75.0 - assert response["structured_response"].condition.lower() == "sunny" - assert len(response["messages"]) == 5 - - assert [m.type for m in response["messages"]] == [ - "human", # "What's the weather?" - "ai", # "What's the weather?" - "tool", # "The weather is sunny and 75°F." - "ai", # structured response - "tool", # artificial tool message - ] diff --git a/libs/langchain_v1/tests/unit_tests/agents/model.py b/libs/langchain_v1/tests/unit_tests/agents/model.py index 07ed23995eb..8b948f658a9 100644 --- a/libs/langchain_v1/tests/unit_tests/agents/model.py +++ b/libs/langchain_v1/tests/unit_tests/agents/model.py @@ -38,8 +38,7 @@ class FakeToolCallingModel(BaseChatModel, Generic[StructuredResponseT]): **kwargs: Any, ) -> ChatResult: """Top Level call""" - rf = kwargs.get("response_format") - is_native = isinstance(rf, dict) and rf.get("type") == "json_schema" + is_native = kwargs.get("response_format") if self.tool_calls: if is_native: diff --git a/libs/langchain_v1/tests/unit_tests/agents/test_response_format_integration.py b/libs/langchain_v1/tests/unit_tests/agents/test_response_format_integration.py new file mode 100644 index 00000000000..c08497f8b4b --- /dev/null +++ b/libs/langchain_v1/tests/unit_tests/agents/test_response_format_integration.py @@ -0,0 +1,142 @@ +"""Test response_format for langchain-openai. + +If tests fail, cassettes may need to be re-recorded. + +To re-record cassettes: + +1. Delete existing cassettes (`rm tests/cassettes/test_inference_to_*.yaml.gz`) +2. Re run the tests with a valid OPENAI_API_KEY in your environment: +```bash +OPENAI_API_KEY=... uv run python -m pytest tests/unit_tests/agents/test_response_format_integration.py +``` + +The cassettes are compressed. To read them: +```bash +gunzip -c "tests/cassettes/test_inference_to_native_output[True].yaml.gz" | \ + yq -o json . | \ + jq '.requests[].body |= (gsub("\n";"") | @base64d | fromjson) | + .responses[].body.string |= (gsub("\n";"") | @base64d | fromjson)' +``` + +Or, in Python: +```python +import json + +from langchain_tests.conftest import CustomPersister, CustomSerializer + +def bytes_encoder(obj): + return obj.decode("utf-8", errors="replace") + +path = "tests/cassettes/test_inference_to_native_output[True].yaml.gz" + +requests, responses = CustomPersister().load_cassette(path, CustomSerializer()) +assert len(requests) == len(responses) +for request, response in list(zip(requests, responses)): + print("------ REQUEST ------") + req = request._to_dict() + req["body"] = json.loads(req["body"]) + print(json.dumps(req, indent=2, default=bytes_encoder)) + print("\n\n ------ RESPONSE ------") + resp = response + print(json.dumps(resp, indent=2, default=bytes_encoder)) +print("\n\n") +``` +""" + +import os + +import pytest +from langchain_core.messages import HumanMessage +from pydantic import BaseModel, Field + +from langchain.agents import create_agent +from langchain.agents.structured_output import ToolStrategy + + +class WeatherBaseModel(BaseModel): + """Weather response.""" + + temperature: float = Field(description="The temperature in fahrenheit") + condition: str = Field(description="Weather condition") + + +def get_weather(city: str) -> str: # noqa: ARG001 + """Get the weather for a city.""" + return f"The weather in {city} is sunny and 75°F." + + +@pytest.mark.requires("langchain_openai") +@pytest.mark.vcr +@pytest.mark.parametrize("use_responses_api", [False, True]) +def test_inference_to_native_output(use_responses_api: bool) -> None: + """Test that native output is inferred when a model supports it.""" + from langchain_openai import ChatOpenAI + + model_kwargs = {"model": "gpt-5", "use_responses_api": use_responses_api} + + if "OPENAI_API_KEY" not in os.environ: + model_kwargs["api_key"] = "foo" + + model = ChatOpenAI(**model_kwargs) + + agent = create_agent( + model, + system_prompt=( + "You are a helpful weather assistant. Please call the get_weather tool " + "once, then use the WeatherReport tool to generate the final response." + ), + tools=[get_weather], + response_format=WeatherBaseModel, + ) + response = agent.invoke({"messages": [HumanMessage("What's the weather in Boston?")]}) + + assert isinstance(response["structured_response"], WeatherBaseModel) + assert response["structured_response"].temperature == 75.0 + assert response["structured_response"].condition.lower() == "sunny" + assert len(response["messages"]) == 4 + + assert [m.type for m in response["messages"]] == [ + "human", # "What's the weather?" + "ai", # "What's the weather?" + "tool", # "The weather is sunny and 75°F." + "ai", # structured response + ] + + +@pytest.mark.requires("langchain_openai") +@pytest.mark.vcr +@pytest.mark.parametrize("use_responses_api", [False, True]) +def test_inference_to_tool_output(use_responses_api: bool) -> None: + """Test that tool output is inferred when a model supports it.""" + from langchain_openai import ChatOpenAI + + model_kwargs = {"model": "gpt-5", "use_responses_api": use_responses_api} + + if "OPENAI_API_KEY" not in os.environ: + model_kwargs["api_key"] = "foo" + + model = ChatOpenAI(**model_kwargs) + + agent = create_agent( + model, + system_prompt=( + "You are a helpful weather assistant. Please call the get_weather tool " + "once, then use the WeatherReport tool to generate the final response." + ), + tools=[get_weather], + response_format=ToolStrategy(WeatherBaseModel), + ) + response = agent.invoke({"messages": [HumanMessage("What's the weather?")]}) + + assert isinstance(response["structured_response"], WeatherBaseModel) + assert response["structured_response"].temperature == 75.0 + assert response["structured_response"].condition.lower() == "sunny" + assert len(response["messages"]) == 5 + + assert [m.type for m in response["messages"]] == [ + "human", # "What's the weather?" + "ai", # "What's the weather?" + "tool", # "The weather is sunny and 75°F." + "ai", # structured response + "tool", # artificial tool message + ] diff --git a/libs/langchain_v1/tests/unit_tests/conftest.py b/libs/langchain_v1/tests/unit_tests/conftest.py index 96f36ceae77..da921507200 100644 --- a/libs/langchain_v1/tests/unit_tests/conftest.py +++ b/libs/langchain_v1/tests/unit_tests/conftest.py @@ -2,8 +2,52 @@ from collections.abc import Sequence from importlib import util +from typing import Any import pytest +from langchain_tests.conftest import CustomPersister, CustomSerializer +from langchain_tests.conftest import ( + _base_vcr_config as _base_vcr_config, +) +from vcr import VCR + +_EXTRA_HEADERS = [ + ("openai-organization", "PLACEHOLDER"), + ("user-agent", "PLACEHOLDER"), + ("x-openai-client-user-agent", "PLACEHOLDER"), +] + + +def remove_request_headers(request: Any) -> Any: + """Remove sensitive headers from the request.""" + for k in request.headers: + request.headers[k] = "**REDACTED**" + request.uri = "**REDACTED**" + return request + + +def remove_response_headers(response: dict) -> dict: + """Remove sensitive headers from the response.""" + for k in response["headers"]: + response["headers"][k] = "**REDACTED**" + return response + + +@pytest.fixture(scope="session") +def vcr_config(_base_vcr_config: dict) -> dict: # noqa: F811 + """Extend the default configuration coming from langchain_tests.""" + config = _base_vcr_config.copy() + config.setdefault("filter_headers", []).extend(_EXTRA_HEADERS) + config["before_record_request"] = remove_request_headers + config["before_record_response"] = remove_response_headers + config["serializer"] = "yaml.gz" + config["path_transformer"] = VCR.ensure_suffix(".yaml.gz") + return config + + +def pytest_recording_configure(config: dict, vcr: VCR) -> None: # noqa: ARG001 + vcr.register_persister(CustomPersister()) + vcr.register_serializer("yaml.gz", CustomSerializer()) def pytest_addoption(parser: pytest.Parser) -> None: diff --git a/libs/partners/openai/langchain_openai/chat_models/base.py b/libs/partners/openai/langchain_openai/chat_models/base.py index c343bf50f25..e567893cb32 100644 --- a/libs/partners/openai/langchain_openai/chat_models/base.py +++ b/libs/partners/openai/langchain_openai/chat_models/base.py @@ -1771,6 +1771,7 @@ class BaseChatOpenAI(BaseChatModel): tool_choice: dict | str | bool | None = None, strict: bool | None = None, parallel_tool_calls: bool | None = None, + response_format: _DictOrPydanticClass | None = None, **kwargs: Any, ) -> Runnable[LanguageModelInput, AIMessage]: """Bind tool-like objects to this chat model. @@ -1796,6 +1797,9 @@ class BaseChatOpenAI(BaseChatModel): be validated. If `None`, `strict` argument will not be passed to the model. parallel_tool_calls: Set to `False` to disable parallel tool use. Defaults to `None` (no specification, which allows parallel tool use). + response_format: Optional schema to format model response. If provided + and the model does not call a tool, the model will generate a + [structured response](https://platform.openai.com/docs/guides/structured-outputs). kwargs: Any additional parameters are passed directly to `bind`. """ # noqa: E501 if parallel_tool_calls is not None: @@ -1838,6 +1842,11 @@ class BaseChatOpenAI(BaseChatModel): ) raise ValueError(msg) kwargs["tool_choice"] = tool_choice + + if response_format: + kwargs["response_format"] = _convert_to_openai_response_format( + response_format + ) return super().bind(tools=formatted_tools, **kwargs) def with_structured_output( @@ -3479,6 +3488,7 @@ def _convert_to_openai_response_format( strict is not None and strict is not response_format["json_schema"].get("strict") and isinstance(schema, dict) + and "strict" in schema.get("json_schema", {}) ): msg = ( f"Output schema already has 'strict' value set to " diff --git a/libs/partners/openai/tests/integration_tests/chat_models/test_base.py b/libs/partners/openai/tests/integration_tests/chat_models/test_base.py index 6fcccdad56d..e2d0133879a 100644 --- a/libs/partners/openai/tests/integration_tests/chat_models/test_base.py +++ b/libs/partners/openai/tests/integration_tests/chat_models/test_base.py @@ -28,6 +28,7 @@ from langchain_tests.integration_tests.chat_models import ( magic_function, ) from pydantic import BaseModel, Field, field_validator +from typing_extensions import TypedDict from langchain_openai import ChatOpenAI from tests.unit_tests.fake.callbacks import FakeCallbackHandler @@ -1146,17 +1147,33 @@ def test_multi_party_conversation() -> None: assert "Bob" in response.content -def test_structured_output_and_tools() -> None: - class ResponseFormat(BaseModel): - response: str - explanation: str +class ResponseFormat(BaseModel): + response: str + explanation: str - llm = ChatOpenAI(model="gpt-5-nano").bind_tools( - [GenerateUsername], strict=True, response_format=ResponseFormat + +class ResponseFormatDict(TypedDict): + response: str + explanation: str + + +@pytest.mark.parametrize( + "schema", [ResponseFormat, ResponseFormat.model_json_schema(), ResponseFormatDict] +) +def test_structured_output_and_tools(schema: Any) -> None: + llm = ChatOpenAI(model="gpt-5-nano", verbosity="low").bind_tools( + [GenerateUsername], strict=True, response_format=schema ) response = llm.invoke("What weighs more, a pound of feathers or a pound of gold?") - assert isinstance(response.additional_kwargs["parsed"], ResponseFormat) + if schema == ResponseFormat: + parsed = response.additional_kwargs["parsed"] + assert isinstance(parsed, ResponseFormat) + else: + parsed = json.loads(response.text) + assert isinstance(parsed, dict) + assert parsed["response"] + assert parsed["explanation"] # Test streaming tool calls full: BaseMessageChunk | None = None @@ -1172,10 +1189,6 @@ def test_structured_output_and_tools() -> None: def test_tools_and_structured_output() -> None: - class ResponseFormat(BaseModel): - response: str - explanation: str - llm = ChatOpenAI(model="gpt-5-nano").with_structured_output( ResponseFormat, strict=True, include_raw=True, tools=[GenerateUsername] ) diff --git a/libs/partners/openai/tests/integration_tests/chat_models/test_responses_api.py b/libs/partners/openai/tests/integration_tests/chat_models/test_responses_api.py index db18dcf046f..7a08864387e 100644 --- a/libs/partners/openai/tests/integration_tests/chat_models/test_responses_api.py +++ b/libs/partners/openai/tests/integration_tests/chat_models/test_responses_api.py @@ -318,18 +318,23 @@ async def test_parsed_dict_schema_async(schema: Any) -> None: assert isinstance(parsed["response"], str) -def test_function_calling_and_structured_output() -> None: +@pytest.mark.parametrize("schema", [Foo, Foo.model_json_schema(), FooDict]) +def test_function_calling_and_structured_output(schema: Any) -> None: def multiply(x: int, y: int) -> int: """return x * y""" return x * y llm = ChatOpenAI(model=MODEL_NAME, use_responses_api=True) - bound_llm = llm.bind_tools([multiply], response_format=Foo, strict=True) + bound_llm = llm.bind_tools([multiply], response_format=schema, strict=True) # Test structured output - response = llm.invoke("how are ya", response_format=Foo) - parsed = Foo(**json.loads(response.text)) + response = llm.invoke("how are ya", response_format=schema) + if schema == Foo: + parsed = schema(**json.loads(response.text)) + assert parsed.response + else: + parsed = json.loads(response.text) + assert parsed["response"] assert parsed == response.additional_kwargs["parsed"] - assert parsed.response # Test function calling ai_msg = cast(AIMessage, bound_llm.invoke("whats 5 * 4"))