openai[patch]: add Responses API attributes to BaseChatOpenAI (#30329)

`reasoning`, `include`, `store`, `truncation`.

Previously these had to be added through `model_kwargs`.
This commit is contained in:
ccurme 2025-06-17 14:45:50 -04:00 committed by GitHub
parent b610859633
commit c1c3e13a54
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 65 additions and 10 deletions

View File

@ -116,8 +116,10 @@ def test_configurable() -> None:
"request_timeout": None, "request_timeout": None,
"max_retries": None, "max_retries": None,
"presence_penalty": None, "presence_penalty": None,
"reasoning": None,
"reasoning_effort": None, "reasoning_effort": None,
"frequency_penalty": None, "frequency_penalty": None,
"include": None,
"seed": None, "seed": None,
"service_tier": None, "service_tier": None,
"logprobs": None, "logprobs": None,
@ -126,11 +128,13 @@ def test_configurable() -> None:
"streaming": False, "streaming": False,
"n": None, "n": None,
"top_p": None, "top_p": None,
"truncation": None,
"max_tokens": None, "max_tokens": None,
"tiktoken_model_name": None, "tiktoken_model_name": None,
"default_headers": None, "default_headers": None,
"default_query": None, "default_query": None,
"stop": None, "stop": None,
"store": None,
"extra_body": None, "extra_body": None,
"include_response_headers": False, "include_response_headers": False,
"stream_usage": False, "stream_usage": False,

View File

@ -502,15 +502,31 @@ class BaseChatOpenAI(BaseChatModel):
max_tokens: Optional[int] = Field(default=None) max_tokens: Optional[int] = Field(default=None)
"""Maximum number of tokens to generate.""" """Maximum number of tokens to generate."""
reasoning_effort: Optional[str] = None reasoning_effort: Optional[str] = None
"""Constrains effort on reasoning for reasoning models. """Constrains effort on reasoning for reasoning models. For use with the Chat
Completions API.
Reasoning models only, like OpenAI o1 and o3-mini.
Reasoning models only, like OpenAI o1, o3, and o4-mini.
Currently supported values are low, medium, and high. Reducing reasoning effort Currently supported values are low, medium, and high. Reducing reasoning effort
can result in faster responses and fewer tokens used on reasoning in a response. can result in faster responses and fewer tokens used on reasoning in a response.
.. versionadded:: 0.2.14 .. versionadded:: 0.2.14
""" """
reasoning: Optional[dict[str, Any]] = None
"""Reasoning parameters for reasoning models, i.e., OpenAI o-series models (o1, o3,
o4-mini, etc.). For use with the Responses API.
Example:
.. code-block:: python
reasoning={
"effort": "medium", # can be "low", "medium", or "high"
"summary": "auto", # can be "auto", "concise", or "detailed"
}
.. versionadded:: 0.3.24
"""
tiktoken_model_name: Optional[str] = None tiktoken_model_name: Optional[str] = None
"""The model name to pass to tiktoken when using this class. """The model name to pass to tiktoken when using this class.
Tiktoken is used to count the number of tokens in documents to constrain Tiktoken is used to count the number of tokens in documents to constrain
@ -556,11 +572,41 @@ class BaseChatOpenAI(BaseChatModel):
However this does not prevent a user from directly passed in the parameter during However this does not prevent a user from directly passed in the parameter during
invocation. invocation.
""" """
include: Optional[list[str]] = None
"""Additional fields to include in generations from Responses API.
Supported values:
- ``"file_search_call.results"``
- ``"message.input_image.image_url"``
- ``"computer_call_output.output.image_url"``
- ``"reasoning.encrypted_content"``
- ``"code_interpreter_call.outputs"``
.. versionadded:: 0.3.24
"""
service_tier: Optional[str] = None service_tier: Optional[str] = None
"""Latency tier for request. Options are 'auto', 'default', or 'flex'. Relevant """Latency tier for request. Options are 'auto', 'default', or 'flex'. Relevant
for users of OpenAI's scale tier service. for users of OpenAI's scale tier service.
""" """
store: Optional[bool] = None
"""If True, the Responses API may store response data for future use. Defaults to
True.
.. versionadded:: 0.3.24
"""
truncation: Optional[str] = None
"""Truncation strategy (Responses API). Can be ``"auto"`` or ``"disabled"``
(default). If ``"auto"``, model may drop input items from the middle of the
message sequence to fit the context window.
.. versionadded:: 0.3.24
"""
use_responses_api: Optional[bool] = None use_responses_api: Optional[bool] = None
"""Whether to use the Responses API instead of the Chat API. """Whether to use the Responses API instead of the Chat API.
@ -685,7 +731,11 @@ class BaseChatOpenAI(BaseChatModel):
"n": self.n, "n": self.n,
"temperature": self.temperature, "temperature": self.temperature,
"reasoning_effort": self.reasoning_effort, "reasoning_effort": self.reasoning_effort,
"reasoning": self.reasoning,
"include": self.include,
"service_tier": self.service_tier, "service_tier": self.service_tier,
"truncation": self.truncation,
"store": self.store,
} }
params = { params = {
@ -3134,7 +3184,7 @@ def _construct_responses_api_payload(
for legacy_token_param in ["max_tokens", "max_completion_tokens"]: for legacy_token_param in ["max_tokens", "max_completion_tokens"]:
if legacy_token_param in payload: if legacy_token_param in payload:
payload["max_output_tokens"] = payload.pop(legacy_token_param) payload["max_output_tokens"] = payload.pop(legacy_token_param)
if "reasoning_effort" in payload: if "reasoning_effort" in payload and "reasoning" not in payload:
payload["reasoning"] = {"effort": payload.pop("reasoning_effort")} payload["reasoning"] = {"effort": payload.pop("reasoning_effort")}
payload["input"] = _construct_responses_api_input(messages) payload["input"] = _construct_responses_api_input(messages)

View File

@ -323,7 +323,7 @@ def test_route_from_model_kwargs() -> None:
@pytest.mark.flaky(retries=3, delay=1) @pytest.mark.flaky(retries=3, delay=1)
def test_computer_calls() -> None: def test_computer_calls() -> None:
llm = ChatOpenAI(model="computer-use-preview", model_kwargs={"truncation": "auto"}) llm = ChatOpenAI(model="computer-use-preview", truncation="auto")
tool = { tool = {
"type": "computer_use_preview", "type": "computer_use_preview",
"display_width": 1024, "display_width": 1024,
@ -354,10 +354,10 @@ def test_file_search() -> None:
def test_stream_reasoning_summary() -> None: def test_stream_reasoning_summary() -> None:
reasoning = {"effort": "medium", "summary": "auto"}
llm = ChatOpenAI( llm = ChatOpenAI(
model="o4-mini", use_responses_api=True, model_kwargs={"reasoning": reasoning} model="o4-mini",
use_responses_api=True,
reasoning={"effort": "medium", "summary": "auto"},
) )
message_1 = {"role": "user", "content": "What is 3^3?"} message_1 = {"role": "user", "content": "What is 3^3?"}
response_1: Optional[BaseMessageChunk] = None response_1: Optional[BaseMessageChunk] = None
@ -465,7 +465,8 @@ def test_mcp_builtin_zdr() -> None:
llm = ChatOpenAI( llm = ChatOpenAI(
model="o4-mini", model="o4-mini",
use_responses_api=True, use_responses_api=True,
model_kwargs={"store": False, "include": ["reasoning.encrypted_content"]}, store=False,
include=["reasoning.encrypted_content"],
) )
llm_with_tools = llm.bind_tools( llm_with_tools = llm.bind_tools(