From 00244122bd12cca1bf05584c2abfcf8b9c481fe4 Mon Sep 17 00:00:00 2001 From: Mason Daugherty Date: Thu, 7 Aug 2025 22:24:21 -0400 Subject: [PATCH] feat(openai): `minimal` and `verbosity` (#32455) --- .../tests/unit_tests/chat_models/test_base.py | 53 +++++++++ .../chat_models/test_chat_models.py | 53 +++++++++ .../langchain_openai/chat_models/base.py | 49 +++++++- .../chat_models/test_base.py | 40 +++---- .../chat_models/test_responses_api.py | 109 +++++++++++++++--- .../tests/unit_tests/chat_models/test_base.py | 88 +++++++++++++- .../chat_models/test_prompt_cache_key.py | 5 +- .../openai/tests/unit_tests/test_tools.py | 8 +- libs/partners/openai/uv.lock | 4 +- 9 files changed, 354 insertions(+), 55 deletions(-) diff --git a/libs/langchain/tests/unit_tests/chat_models/test_base.py b/libs/langchain/tests/unit_tests/chat_models/test_base.py index 8cd5e0631b8..65be8a429f2 100644 --- a/libs/langchain/tests/unit_tests/chat_models/test_base.py +++ b/libs/langchain/tests/unit_tests/chat_models/test_base.py @@ -68,6 +68,32 @@ def test_init_unknown_provider() -> None: clear=True, ) def test_configurable() -> None: + """Test configurable chat model behavior without default parameters. + + Verifies that a configurable chat model initialized without default parameters: + - Has access to all standard runnable methods (``invoke``, ``stream``, etc.) + - Blocks access to non-configurable methods until configuration is provided + - Supports declarative operations (``bind_tools``) without mutating original model + - Can chain declarative operations and configuration to access full functionality + - Properly resolves to the configured model type when parameters are provided + + Example: + + .. python:: + + # This creates a configurable model without specifying which model + model = init_chat_model() + + # This will FAIL - no model specified yet + model.get_num_tokens("hello") # AttributeError! + + # This works - provides model at runtime + response = model.invoke( + "Hello", + config={"configurable": {"model": "gpt-4o"}} + ) + + """ model = init_chat_model() for method in ( @@ -125,6 +151,7 @@ def test_configurable() -> None: "presence_penalty": None, "reasoning": None, "reasoning_effort": None, + "verbosity": None, "frequency_penalty": None, "include": None, "seed": None, @@ -170,6 +197,32 @@ def test_configurable() -> None: clear=True, ) def test_configurable_with_default() -> None: + """Test configurable chat model behavior with default parameters. + + Verifies that a configurable chat model initialized with default parameters: + - Has access to all standard runnable methods (``invoke``, ``stream``, etc.) + - Provides immediate access to non-configurable methods (e.g. ``get_num_tokens``) + - Supports model switching through runtime configuration using ``config_prefix`` + - Maintains proper model identity and attributes when reconfigured + - Can be used in chains with different model providers via configuration + + Example: + + .. python:: + + # This creates a configurable model with default parameters (model) + model = init_chat_model("gpt-4o", configurable_fields="any", config_prefix="bar") + + # This works immediately - uses default gpt-4o + tokens = model.get_num_tokens("hello") + + # This also works - switches to Claude at runtime + response = model.invoke( + "Hello", + config={"configurable": {"my_model_model": "claude-3-sonnet-20240229"}} + ) + + """ # noqa: E501 model = init_chat_model("gpt-4o", configurable_fields="any", config_prefix="bar") for method in ( "invoke", diff --git a/libs/langchain_v1/tests/unit_tests/chat_models/test_chat_models.py b/libs/langchain_v1/tests/unit_tests/chat_models/test_chat_models.py index 147d7813f89..0f991195f7d 100644 --- a/libs/langchain_v1/tests/unit_tests/chat_models/test_chat_models.py +++ b/libs/langchain_v1/tests/unit_tests/chat_models/test_chat_models.py @@ -68,6 +68,32 @@ def test_init_unknown_provider() -> None: clear=True, ) def test_configurable() -> None: + """Test configurable chat model behavior without default parameters. + + Verifies that a configurable chat model initialized without default parameters: + - Has access to all standard runnable methods (``invoke``, ``stream``, etc.) + - Blocks access to non-configurable methods until configuration is provided + - Supports declarative operations (``bind_tools``) without mutating original model + - Can chain declarative operations and configuration to access full functionality + - Properly resolves to the configured model type when parameters are provided + + Example: + + .. python:: + + # This creates a configurable model without specifying which model + model = init_chat_model() + + # This will FAIL - no model specified yet + model.get_num_tokens("hello") # AttributeError! + + # This works - provides model at runtime + response = model.invoke( + "Hello", + config={"configurable": {"model": "gpt-4o"}} + ) + + """ model = init_chat_model() for method in ( @@ -125,6 +151,7 @@ def test_configurable() -> None: "presence_penalty": None, "reasoning": None, "reasoning_effort": None, + "verbosity": None, "frequency_penalty": None, "include": None, "seed": None, @@ -170,6 +197,32 @@ def test_configurable() -> None: clear=True, ) def test_configurable_with_default() -> None: + """Test configurable chat model behavior with default parameters. + + Verifies that a configurable chat model initialized with default parameters: + - Has access to all standard runnable methods (``invoke``, ``stream``, etc.) + - Provides immediate access to non-configurable methods (e.g. ``get_num_tokens``) + - Supports model switching through runtime configuration using ``config_prefix`` + - Maintains proper model identity and attributes when reconfigured + - Can be used in chains with different model providers via configuration + + Example: + + .. python:: + + # This creates a configurable model with default parameters (model) + model = init_chat_model("gpt-4o", configurable_fields="any", config_prefix="bar") + + # This works immediately - uses default gpt-4o + tokens = model.get_num_tokens("hello") + + # This also works - switches to Claude at runtime + response = model.invoke( + "Hello", + config={"configurable": {"my_model_model": "claude-3-sonnet-20240229"}} + ) + + """ # noqa: E501 model = init_chat_model("gpt-4o", configurable_fields="any", config_prefix="bar") for method in ( "invoke", diff --git a/libs/partners/openai/langchain_openai/chat_models/base.py b/libs/partners/openai/langchain_openai/chat_models/base.py index eb1fd4506e3..a8702359b36 100644 --- a/libs/partners/openai/langchain_openai/chat_models/base.py +++ b/libs/partners/openai/langchain_openai/chat_models/base.py @@ -458,8 +458,7 @@ class BaseChatOpenAI(BaseChatModel): alias="api_key", default_factory=secret_from_env("OPENAI_API_KEY", default=None) ) openai_api_base: Optional[str] = Field(default=None, alias="base_url") - """Base URL path for API requests, leave blank if not using a proxy or service - emulator.""" + """Base URL path for API requests, leave blank if not using a proxy or service emulator.""" # noqa: E501 openai_organization: Optional[str] = Field(default=None, alias="organization") """Automatically inferred from env var ``OPENAI_ORG_ID`` if not provided.""" # to support explicit proxy for OpenAI @@ -507,8 +506,9 @@ class BaseChatOpenAI(BaseChatModel): Reasoning models only, like OpenAI o1, o3, and o4-mini. - Currently supported values are low, medium, and high. Reducing reasoning effort - can result in faster responses and fewer tokens used on reasoning in a response. + Currently supported values are ``'minimal'``, ``'low'``, ``'medium'``, and + ``'high'``. Reducing reasoning effort can result in faster responses and fewer + tokens used on reasoning in a response. .. versionadded:: 0.2.14 """ @@ -527,6 +527,17 @@ class BaseChatOpenAI(BaseChatModel): .. versionadded:: 0.3.24 + """ + verbosity: Optional[str] = None + """Controls the verbosity level of responses for reasoning models. For use with the + Responses API. + + Currently supported values are ``'low'``, ``'medium'``, and ``'high'``. + + Controls how detailed the model's responses are. + + .. versionadded:: 0.3.28 + """ tiktoken_model_name: Optional[str] = None """The model name to pass to tiktoken when using this class. @@ -654,6 +665,7 @@ class BaseChatOpenAI(BaseChatModel): llm = ChatOpenAI( model="o4-mini", use_responses_api=True, + output_version="responses/v1", ) llm.invoke([HumanMessage("How are you?")], previous_response_id="resp_123") @@ -701,10 +713,24 @@ class BaseChatOpenAI(BaseChatModel): @model_validator(mode="before") @classmethod def validate_temperature(cls, values: dict[str, Any]) -> Any: - """Currently o1 models only allow temperature=1.""" + """Validate temperature parameter for different models. + + - o1 models only allow temperature=1 + - gpt-5 models only allow temperature=1 or unset (defaults to 1) + """ model = values.get("model_name") or values.get("model") or "" + + # For o1 models, set temperature=1 if not provided if model.startswith("o1") and "temperature" not in values: values["temperature"] = 1 + + # For gpt-5 models, handle temperature restrictions + if model.startswith("gpt-5"): + temperature = values.get("temperature") + if temperature is not None and temperature != 1: + # For gpt-5, only temperature=1 is supported, so remove non-defaults + values.pop("temperature", None) + return values @model_validator(mode="after") @@ -805,6 +831,7 @@ class BaseChatOpenAI(BaseChatModel): "temperature": self.temperature, "reasoning_effort": self.reasoning_effort, "reasoning": self.reasoning, + "verbosity": self.verbosity, "include": self.include, "service_tier": self.service_tier, "truncation": self.truncation, @@ -1178,6 +1205,7 @@ class BaseChatOpenAI(BaseChatModel): kwargs["stop"] = stop payload = {**self._default_params, **kwargs} + if self._use_responses_api(payload): if self.use_previous_response_id: last_messages, previous_response_id = _get_last_messages(messages) @@ -2366,7 +2394,11 @@ class ChatOpenAI(BaseChatOpenAI): # type: ignore[override] from langchain_openai import ChatOpenAI - llm = ChatOpenAI(model="gpt-4.1-mini", use_responses_api=True) + llm = ChatOpenAI( + model="gpt-4.1-mini", + use_responses_api=True, + output_version="responses/v1", + ) response = llm.invoke("Hi, I'm Bob.") response.text() @@ -3486,6 +3518,11 @@ def _construct_responses_api_payload( if "reasoning_effort" in payload and "reasoning" not in payload: payload["reasoning"] = {"effort": payload.pop("reasoning_effort")} + # Remove temperature parameter for models that don't support it in responses API + model = payload.get("model", "") + if model.startswith("gpt-5"): + payload.pop("temperature", None) + payload["input"] = _construct_responses_api_input(messages) if tools := payload.pop("tools", None): new_tools: list = [] diff --git a/libs/partners/openai/tests/integration_tests/chat_models/test_base.py b/libs/partners/openai/tests/integration_tests/chat_models/test_base.py index 1bc191d418f..b18bd5f97e9 100644 --- a/libs/partners/openai/tests/integration_tests/chat_models/test_base.py +++ b/libs/partners/openai/tests/integration_tests/chat_models/test_base.py @@ -32,7 +32,7 @@ from pydantic import BaseModel, Field from langchain_openai import ChatOpenAI from tests.unit_tests.fake.callbacks import FakeCallbackHandler -MAX_TOKEN_COUNT = 16 +MAX_TOKEN_COUNT = 100 @pytest.mark.scheduled @@ -219,7 +219,7 @@ async def test_openai_abatch_tags(use_responses_api: bool) -> None: def test_openai_invoke() -> None: """Test invoke tokens from ChatOpenAI.""" llm = ChatOpenAI( - model="o4-mini", + model="gpt-5-nano", service_tier="flex", # Also test service_tier max_retries=3, # Add retries for 503 capacity errors ) @@ -418,7 +418,7 @@ class MakeASandwich(BaseModel): def test_tool_use() -> None: - llm = ChatOpenAI(model="gpt-4-turbo", temperature=0) + llm = ChatOpenAI(model="gpt-5-nano", temperature=0) llm_with_tool = llm.bind_tools(tools=[GenerateUsername], tool_choice=True) msgs: list = [HumanMessage("Sally has green hair, what would her username be?")] ai_msg = llm_with_tool.invoke(msgs) @@ -462,7 +462,7 @@ def test_tool_use() -> None: def test_manual_tool_call_msg(use_responses_api: bool) -> None: """Test passing in manually construct tool call message.""" llm = ChatOpenAI( - model="gpt-3.5-turbo-0125", temperature=0, use_responses_api=use_responses_api + model="gpt-5-nano", temperature=0, use_responses_api=use_responses_api ) llm_with_tool = llm.bind_tools(tools=[GenerateUsername]) msgs: list = [ @@ -508,7 +508,7 @@ def test_manual_tool_call_msg(use_responses_api: bool) -> None: def test_bind_tools_tool_choice(use_responses_api: bool) -> None: """Test passing in manually construct tool call message.""" llm = ChatOpenAI( - model="gpt-3.5-turbo-0125", temperature=0, use_responses_api=use_responses_api + model="gpt-5-nano", temperature=0, use_responses_api=use_responses_api ) for tool_choice in ("any", "required"): llm_with_tools = llm.bind_tools( @@ -523,7 +523,7 @@ def test_bind_tools_tool_choice(use_responses_api: bool) -> None: def test_disable_parallel_tool_calling() -> None: - llm = ChatOpenAI(model="gpt-4o-mini") + llm = ChatOpenAI(model="gpt-5-nano") llm_with_tools = llm.bind_tools([GenerateUsername], parallel_tool_calls=False) result = llm_with_tools.invoke( "Use the GenerateUsername tool to generate user names for:\n\n" @@ -534,7 +534,7 @@ def test_disable_parallel_tool_calling() -> None: assert len(result.tool_calls) == 1 -@pytest.mark.parametrize("model", ["gpt-4o-mini", "o1", "gpt-4"]) +@pytest.mark.parametrize("model", ["gpt-4o-mini", "o1", "gpt-4", "gpt-5-nano"]) def test_openai_structured_output(model: str) -> None: class MyModel(BaseModel): """A Person""" @@ -694,7 +694,7 @@ def test_tool_calling_strict(use_responses_api: bool) -> None: input: Optional[int] = Field(default=None) model = ChatOpenAI( - model="gpt-4.1", temperature=0, use_responses_api=use_responses_api + model="gpt-5-nano", temperature=0, use_responses_api=use_responses_api ) # N.B. magic_function adds metadata to schema (min/max for number fields) model_with_tools = model.bind_tools([magic_function], strict=True) @@ -818,7 +818,7 @@ def test_json_schema_openai_format( strict: bool, method: Literal["json_schema", "function_calling"] ) -> None: """Test we can pass in OpenAI schema format specifying strict.""" - llm = ChatOpenAI(model="gpt-4o-mini") + llm = ChatOpenAI(model="gpt-5-nano") schema = { "name": "get_weather", "description": "Fetches the weather in the given location", @@ -939,7 +939,7 @@ def test_prediction_tokens() -> None: """ ) - llm = ChatOpenAI(model="gpt-4o") + llm = ChatOpenAI(model="gpt-4.1-nano") query = ( "Replace the Username property with an Email property. " "Respond only with code, and with no markdown formatting." @@ -981,7 +981,7 @@ class Foo(BaseModel): def test_stream_response_format() -> None: full: Optional[BaseMessageChunk] = None chunks = [] - for chunk in ChatOpenAI(model="gpt-4o-mini").stream( + for chunk in ChatOpenAI(model="gpt-5-nano").stream( "how are ya", response_format=Foo ): chunks.append(chunk) @@ -998,7 +998,7 @@ def test_stream_response_format() -> None: async def test_astream_response_format() -> None: full: Optional[BaseMessageChunk] = None chunks = [] - async for chunk in ChatOpenAI(model="gpt-4o-mini").astream( + async for chunk in ChatOpenAI(model="gpt-5-nano").astream( "how are ya", response_format=Foo ): chunks.append(chunk) @@ -1042,7 +1042,7 @@ def test_o1_stream_default_works() -> None: def test_multi_party_conversation() -> None: - llm = ChatOpenAI(model="gpt-4o") + llm = ChatOpenAI(model="gpt-5-nano") messages = [ HumanMessage("Hi, I have black hair.", name="Alice"), HumanMessage("Hi, I have brown hair.", name="Bob"), @@ -1057,7 +1057,7 @@ def test_structured_output_and_tools() -> None: response: str explanation: str - llm = ChatOpenAI(model="gpt-4o-mini").bind_tools( + llm = ChatOpenAI(model="gpt-5-nano").bind_tools( [GenerateUsername], strict=True, response_format=ResponseFormat ) @@ -1082,7 +1082,7 @@ def test_tools_and_structured_output() -> None: response: str explanation: str - llm = ChatOpenAI(model="gpt-4o-mini").with_structured_output( + llm = ChatOpenAI(model="gpt-5-nano").with_structured_output( ResponseFormat, strict=True, include_raw=True, tools=[GenerateUsername] ) @@ -1114,8 +1114,8 @@ def test_tools_and_structured_output() -> None: @pytest.mark.scheduled def test_prompt_cache_key_invoke() -> None: - """Test that prompt_cache_key works with invoke calls.""" - chat = ChatOpenAI(model="gpt-4o-mini", max_completion_tokens=20) + """Test that `prompt_cache_key` works with invoke calls.""" + chat = ChatOpenAI(model="gpt-5-nano", max_completion_tokens=500) messages = [HumanMessage("Say hello")] # Test that invoke works with prompt_cache_key parameter @@ -1135,18 +1135,18 @@ def test_prompt_cache_key_invoke() -> None: @pytest.mark.scheduled def test_prompt_cache_key_usage_methods_integration() -> None: - """Integration test for prompt_cache_key usage methods.""" + """Integration test for `prompt_cache_key` usage methods.""" messages = [HumanMessage("Say hi")] # Test keyword argument method - chat = ChatOpenAI(model="gpt-4o-mini", max_completion_tokens=10) + chat = ChatOpenAI(model="gpt-5-nano", max_completion_tokens=10) response = chat.invoke(messages, prompt_cache_key="integration-test-v1") assert isinstance(response, AIMessage) assert isinstance(response.content, str) # Test model-level via model_kwargs chat_model_level = ChatOpenAI( - model="gpt-4o-mini", + model="gpt-5-nano", max_completion_tokens=10, model_kwargs={"prompt_cache_key": "integration-model-level-v1"}, ) diff --git a/libs/partners/openai/tests/integration_tests/chat_models/test_responses_api.py b/libs/partners/openai/tests/integration_tests/chat_models/test_responses_api.py index 4d051c5601e..3b1a3b41e5d 100644 --- a/libs/partners/openai/tests/integration_tests/chat_models/test_responses_api.py +++ b/libs/partners/openai/tests/integration_tests/chat_models/test_responses_api.py @@ -175,7 +175,9 @@ class FooDict(TypedDict): def test_parsed_pydantic_schema() -> None: - llm = ChatOpenAI(model=MODEL_NAME, use_responses_api=True) + llm = ChatOpenAI( + model=MODEL_NAME, use_responses_api=True, output_version="responses/v1" + ) response = llm.invoke("how are ya", response_format=Foo) parsed = Foo(**json.loads(response.text())) assert parsed == response.additional_kwargs["parsed"] @@ -193,7 +195,9 @@ def test_parsed_pydantic_schema() -> None: async def test_parsed_pydantic_schema_async() -> None: - llm = ChatOpenAI(model=MODEL_NAME, use_responses_api=True) + llm = ChatOpenAI( + model=MODEL_NAME, use_responses_api=True, output_version="responses/v1" + ) response = await llm.ainvoke("how are ya", response_format=Foo) parsed = Foo(**json.loads(response.text())) assert parsed == response.additional_kwargs["parsed"] @@ -213,7 +217,9 @@ async def test_parsed_pydantic_schema_async() -> None: @pytest.mark.flaky(retries=3, delay=1) @pytest.mark.parametrize("schema", [Foo.model_json_schema(), FooDict]) def test_parsed_dict_schema(schema: Any) -> None: - llm = ChatOpenAI(model=MODEL_NAME, use_responses_api=True) + llm = ChatOpenAI( + model=MODEL_NAME, use_responses_api=True, output_version="responses/v1" + ) response = llm.invoke("how are ya", response_format=schema) parsed = json.loads(response.text()) assert parsed == response.additional_kwargs["parsed"] @@ -231,7 +237,9 @@ def test_parsed_dict_schema(schema: Any) -> None: def test_parsed_strict() -> None: - llm = ChatOpenAI(model=MODEL_NAME, use_responses_api=True) + llm = ChatOpenAI( + model=MODEL_NAME, use_responses_api=True, output_version="responses/v1" + ) class InvalidJoke(TypedDict): setup: Annotated[str, ..., "The setup of the joke"] @@ -258,7 +266,9 @@ def test_parsed_strict() -> None: @pytest.mark.flaky(retries=3, delay=1) @pytest.mark.parametrize("schema", [Foo.model_json_schema(), FooDict]) async def test_parsed_dict_schema_async(schema: Any) -> None: - llm = ChatOpenAI(model=MODEL_NAME, use_responses_api=True) + llm = ChatOpenAI( + model=MODEL_NAME, use_responses_api=True, output_version="responses/v1" + ) response = await llm.ainvoke("how are ya", response_format=schema) parsed = json.loads(response.text()) assert parsed == response.additional_kwargs["parsed"] @@ -280,7 +290,9 @@ def test_function_calling_and_structured_output() -> None: """return x * y""" return x * y - llm = ChatOpenAI(model=MODEL_NAME, use_responses_api=True) + llm = ChatOpenAI( + model=MODEL_NAME, use_responses_api=True, output_version="responses/v1" + ) bound_llm = llm.bind_tools([multiply], response_format=Foo, strict=True) # Test structured output response = llm.invoke("how are ya", response_format=Foo) @@ -324,7 +336,9 @@ def test_reasoning(output_version: Literal["v0", "responses/v1"]) -> None: def test_stateful_api() -> None: - llm = ChatOpenAI(model=MODEL_NAME, use_responses_api=True) + llm = ChatOpenAI( + model=MODEL_NAME, use_responses_api=True, output_version="responses/v1" + ) response = llm.invoke("how are you, my name is Bobo") assert "id" in response.response_metadata @@ -421,7 +435,9 @@ def test_stream_reasoning_summary( @pytest.mark.vcr def test_code_interpreter() -> None: - llm = ChatOpenAI(model="o4-mini", use_responses_api=True) + llm = ChatOpenAI( + model="o4-mini", use_responses_api=True, output_version="responses/v1" + ) llm_with_tools = llm.bind_tools( [{"type": "code_interpreter", "container": {"type": "auto"}}] ) @@ -431,13 +447,16 @@ def test_code_interpreter() -> None: } response = llm_with_tools.invoke([input_message]) _check_response(response) - tool_outputs = response.additional_kwargs["tool_outputs"] + tool_outputs = [ + block + for block in response.content + if isinstance(block, dict) and block.get("type") == "code_interpreter_call" + ] assert tool_outputs assert any(output["type"] == "code_interpreter_call" for output in tool_outputs) # Test streaming # Use same container - tool_outputs = response.additional_kwargs["tool_outputs"] assert len(tool_outputs) == 1 container_id = tool_outputs[0]["container_id"] llm_with_tools = llm.bind_tools( @@ -449,7 +468,11 @@ def test_code_interpreter() -> None: assert isinstance(chunk, AIMessageChunk) full = chunk if full is None else full + chunk assert isinstance(full, AIMessageChunk) - tool_outputs = full.additional_kwargs["tool_outputs"] + tool_outputs = [ + block + for block in full.content + if isinstance(block, dict) and block.get("type") == "code_interpreter_call" + ] assert tool_outputs assert any(output["type"] == "code_interpreter_call" for output in tool_outputs) @@ -460,7 +483,9 @@ def test_code_interpreter() -> None: @pytest.mark.vcr def test_mcp_builtin() -> None: - llm = ChatOpenAI(model="o4-mini", use_responses_api=True) + llm = ChatOpenAI( + model="o4-mini", use_responses_api=True, output_version="responses/v1" + ) llm_with_tools = llm.bind_tools( [ @@ -489,8 +514,8 @@ def test_mcp_builtin() -> None: "approve": True, "approval_request_id": output["id"], } - for output in response.additional_kwargs["tool_outputs"] - if output["type"] == "mcp_approval_request" + for output in response.content + if isinstance(output, dict) and output.get("type") == "mcp_approval_request" ] ) _ = llm_with_tools.invoke( @@ -549,7 +574,9 @@ def test_mcp_builtin_zdr() -> None: @pytest.mark.vcr() def test_image_generation_streaming() -> None: """Test image generation streaming.""" - llm = ChatOpenAI(model="gpt-4.1", use_responses_api=True) + llm = ChatOpenAI( + model="gpt-4.1", use_responses_api=True, output_version="responses/v1" + ) tool = { "type": "image_generation", # For testing purposes let's keep the quality low, so the test runs faster. @@ -596,7 +623,13 @@ def test_image_generation_streaming() -> None: # At the moment, the streaming API does not pick up annotations fully. # So the following check is commented out. # _check_response(complete_ai_message) - tool_output = complete_ai_message.additional_kwargs["tool_outputs"][0] + tool_outputs = [ + block + for block in complete_ai_message.content + if isinstance(block, dict) and block.get("type") == "image_generation_call" + ] + assert len(tool_outputs) == 1 + tool_output = tool_outputs[0] assert set(tool_output.keys()).issubset(expected_keys) @@ -604,7 +637,9 @@ def test_image_generation_streaming() -> None: def test_image_generation_multi_turn() -> None: """Test multi-turn editing of image generation by passing in history.""" # Test multi-turn - llm = ChatOpenAI(model="gpt-4.1", use_responses_api=True) + llm = ChatOpenAI( + model="gpt-4.1", use_responses_api=True, output_version="responses/v1" + ) # Test invocation tool = { "type": "image_generation", @@ -621,7 +656,13 @@ def test_image_generation_multi_turn() -> None: ] ai_message = llm_with_tools.invoke(chat_history) _check_response(ai_message) - tool_output = ai_message.additional_kwargs["tool_outputs"][0] + tool_outputs = [ + block + for block in ai_message.content + if isinstance(block, dict) and block.get("type") == "image_generation_call" + ] + assert len(tool_outputs) == 1 + tool_output = tool_outputs[0] # Example tool output for an image # { @@ -670,10 +711,40 @@ def test_image_generation_multi_turn() -> None: ai_message2 = llm_with_tools.invoke(chat_history) _check_response(ai_message2) - tool_output2 = ai_message2.additional_kwargs["tool_outputs"][0] + tool_outputs2 = [ + block + for block in ai_message2.content + if isinstance(block, dict) and block.get("type") == "image_generation_call" + ] + assert len(tool_outputs2) == 1 + tool_output2 = tool_outputs2[0] assert set(tool_output2.keys()).issubset(expected_keys) +@pytest.mark.xfail( + reason="verbosity parameter not yet supported by OpenAI Responses API" +) +def test_verbosity_parameter() -> None: + """Test verbosity parameter with Responses API. + + TODO: This test is expected to fail until OpenAI enables verbosity support + in the Responses API for available models. The parameter is properly implemented + in the codebase but the API currently returns 'Unknown parameter: verbosity'. + Remove @pytest.mark.xfail when OpenAI adds support. + """ + llm = ChatOpenAI( + model=MODEL_NAME, + verbosity="medium", + use_responses_api=True, + output_version="responses/v1", + ) + response = llm.invoke([HumanMessage(content="Hello, explain quantum computing.")]) + + assert isinstance(response, AIMessage) + assert response.content + # When verbosity works, we expect the response to respect the verbosity level + + @pytest.mark.vcr() def test_custom_tool() -> None: @custom_tool diff --git a/libs/partners/openai/tests/unit_tests/chat_models/test_base.py b/libs/partners/openai/tests/unit_tests/chat_models/test_base.py index c4176711482..73185790602 100644 --- a/libs/partners/openai/tests/unit_tests/chat_models/test_base.py +++ b/libs/partners/openai/tests/unit_tests/chat_models/test_base.py @@ -874,8 +874,13 @@ def test_get_num_tokens_from_messages() -> None: ), ToolMessage("foobar", tool_call_id="foo"), ] - expected = 176 - actual = llm.get_num_tokens_from_messages(messages) + expected = 431 # Updated to match token count with mocked 100x100 image + + # Mock _url_to_size to avoid PIL dependency in unit tests + with patch("langchain_openai.chat_models.base._url_to_size") as mock_url_to_size: + mock_url_to_size.return_value = (100, 100) # 100x100 pixel image + actual = llm.get_num_tokens_from_messages(messages) + assert expected == actual # Test file inputs @@ -1131,6 +1136,73 @@ def test_init_o1() -> None: assert len(record) == 0 +def test_init_minimal_reasoning_effort() -> None: + with pytest.warns(None) as record: # type: ignore[call-overload] + ChatOpenAI(model="gpt-5", reasoning_effort="minimal") + assert len(record) == 0 + + +@pytest.mark.parametrize("use_responses_api", [False, True]) +@pytest.mark.parametrize("use_max_completion_tokens", [True, False]) +def test_minimal_reasoning_effort_payload( + use_max_completion_tokens: bool, use_responses_api: bool +) -> None: + """Test that minimal reasoning effort is included in request payload.""" + if use_max_completion_tokens: + kwargs = {"max_completion_tokens": 100} + else: + kwargs = {"max_tokens": 100} + + init_kwargs: dict[str, Any] = { + "model": "gpt-5", + "reasoning_effort": "minimal", + "use_responses_api": use_responses_api, + **kwargs, + } + + if use_responses_api: + init_kwargs["output_version"] = "responses/v1" + + llm = ChatOpenAI(**init_kwargs) + + messages = [ + {"role": "developer", "content": "respond with just 'test'"}, + {"role": "user", "content": "hello"}, + ] + + payload = llm._get_request_payload(messages, stop=None) + + # When using responses API, reasoning_effort becomes reasoning.effort + if use_responses_api: + assert "reasoning" in payload + assert payload["reasoning"]["effort"] == "minimal" + # For responses API, tokens param becomes max_output_tokens + assert payload["max_output_tokens"] == 100 + else: + # For non-responses API, reasoning_effort remains as is + assert payload["reasoning_effort"] == "minimal" + if use_max_completion_tokens: + assert payload["max_completion_tokens"] == 100 + else: + # max_tokens gets converted to max_completion_tokens in non-responses API + assert payload["max_completion_tokens"] == 100 + + +def test_verbosity_parameter_payload() -> None: + """Test verbosity parameter is included in request payload for Responses API.""" + llm = ChatOpenAI( + model="gpt-5", + verbosity="high", + use_responses_api=True, + output_version="responses/v1", + ) + + messages = [{"role": "user", "content": "hello"}] + payload = llm._get_request_payload(messages, stop=None) + + assert payload["verbosity"] == "high" + + def test_structured_output_old_model() -> None: class Output(TypedDict): """output.""" @@ -2198,7 +2270,9 @@ def test__construct_responses_api_input_multiple_message_types() -> None: assert messages_copy == messages # Test dict messages - llm = ChatOpenAI(model="o4-mini", use_responses_api=True) + llm = ChatOpenAI( + model="o4-mini", use_responses_api=True, output_version="responses/v1" + ) message_dicts: list = [ {"role": "developer", "content": "This is a developer message."}, { @@ -2239,7 +2313,9 @@ class FakeTracer(BaseTracer): def test_mcp_tracing() -> None: # Test we exclude sensitive information from traces - llm = ChatOpenAI(model="o4-mini", use_responses_api=True) + llm = ChatOpenAI( + model="o4-mini", use_responses_api=True, output_version="responses/v1" + ) tracer = FakeTracer() mock_client = MagicMock() @@ -2430,7 +2506,9 @@ def test_get_last_messages() -> None: def test_get_request_payload_use_previous_response_id() -> None: # Default - don't use previous_response ID - llm = ChatOpenAI(model="o4-mini", use_responses_api=True) + llm = ChatOpenAI( + model="o4-mini", use_responses_api=True, output_version="responses/v1" + ) messages = [ HumanMessage("Hello"), AIMessage("Hi there!", response_metadata={"id": "resp_123"}), diff --git a/libs/partners/openai/tests/unit_tests/chat_models/test_prompt_cache_key.py b/libs/partners/openai/tests/unit_tests/chat_models/test_prompt_cache_key.py index 1f6c8c5d583..1aad6baff79 100644 --- a/libs/partners/openai/tests/unit_tests/chat_models/test_prompt_cache_key.py +++ b/libs/partners/openai/tests/unit_tests/chat_models/test_prompt_cache_key.py @@ -71,7 +71,10 @@ def test_prompt_cache_key_model_kwargs() -> None: def test_prompt_cache_key_responses_api() -> None: """Test that prompt_cache_key works with Responses API.""" chat = ChatOpenAI( - model="gpt-4o-mini", use_responses_api=True, max_completion_tokens=10 + model="gpt-4o-mini", + use_responses_api=True, + output_version="responses/v1", + max_completion_tokens=10, ) messages = [HumanMessage("Hello")] diff --git a/libs/partners/openai/tests/unit_tests/test_tools.py b/libs/partners/openai/tests/unit_tests/test_tools.py index 106aa0aa080..63b097e6248 100644 --- a/libs/partners/openai/tests/unit_tests/test_tools.py +++ b/libs/partners/openai/tests/unit_tests/test_tools.py @@ -37,7 +37,9 @@ def test_custom_tool() -> None: """Do thing.""" pass - llm = ChatOpenAI(model="gpt-4.1", use_responses_api=True).bind_tools([another_tool]) + llm = ChatOpenAI( + model="gpt-4.1", use_responses_api=True, output_version="responses/v1" + ).bind_tools([another_tool]) assert llm.kwargs == { # type: ignore[attr-defined] "tools": [ { @@ -49,7 +51,9 @@ def test_custom_tool() -> None: ] } - llm = ChatOpenAI(model="gpt-4.1", use_responses_api=True).bind_tools([my_tool]) + llm = ChatOpenAI( + model="gpt-4.1", use_responses_api=True, output_version="responses/v1" + ).bind_tools([my_tool]) assert llm.kwargs == { # type: ignore[attr-defined] "tools": [{"type": "custom", "name": "my_tool", "description": "Do thing."}] } diff --git a/libs/partners/openai/uv.lock b/libs/partners/openai/uv.lock index 38b8da21426..58a1807abe3 100644 --- a/libs/partners/openai/uv.lock +++ b/libs/partners/openai/uv.lock @@ -1,5 +1,5 @@ version = 1 -revision = 2 +revision = 3 requires-python = ">=3.9" resolution-markers = [ "python_full_version >= '3.13' and platform_python_implementation == 'PyPy'", @@ -480,7 +480,7 @@ wheels = [ [[package]] name = "langchain-core" -version = "0.3.73" +version = "0.3.74" source = { editable = "../../core" } dependencies = [ { name = "jsonpatch" },