openai[patch]: add Responses API attributes to BaseChatOpenAI (#30329)

`reasoning`, `include`, `store`, `truncation`.

Previously these had to be added through `model_kwargs`.
This commit is contained in:
ccurme 2025-06-17 14:45:50 -04:00 committed by GitHub
parent b610859633
commit c1c3e13a54
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 65 additions and 10 deletions

View File

@ -116,8 +116,10 @@ def test_configurable() -> None:
"request_timeout": None,
"max_retries": None,
"presence_penalty": None,
"reasoning": None,
"reasoning_effort": None,
"frequency_penalty": None,
"include": None,
"seed": None,
"service_tier": None,
"logprobs": None,
@ -126,11 +128,13 @@ def test_configurable() -> None:
"streaming": False,
"n": None,
"top_p": None,
"truncation": None,
"max_tokens": None,
"tiktoken_model_name": None,
"default_headers": None,
"default_query": None,
"stop": None,
"store": None,
"extra_body": None,
"include_response_headers": False,
"stream_usage": False,

View File

@ -502,15 +502,31 @@ class BaseChatOpenAI(BaseChatModel):
max_tokens: Optional[int] = Field(default=None)
"""Maximum number of tokens to generate."""
reasoning_effort: Optional[str] = None
"""Constrains effort on reasoning for reasoning models.
Reasoning models only, like OpenAI o1 and o3-mini.
"""Constrains effort on reasoning for reasoning models. For use with the Chat
Completions API.
Reasoning models only, like OpenAI o1, o3, and o4-mini.
Currently supported values are low, medium, and high. Reducing reasoning effort
can result in faster responses and fewer tokens used on reasoning in a response.
.. versionadded:: 0.2.14
"""
reasoning: Optional[dict[str, Any]] = None
"""Reasoning parameters for reasoning models, i.e., OpenAI o-series models (o1, o3,
o4-mini, etc.). For use with the Responses API.
Example:
.. code-block:: python
reasoning={
"effort": "medium", # can be "low", "medium", or "high"
"summary": "auto", # can be "auto", "concise", or "detailed"
}
.. versionadded:: 0.3.24
"""
tiktoken_model_name: Optional[str] = None
"""The model name to pass to tiktoken when using this class.
Tiktoken is used to count the number of tokens in documents to constrain
@ -556,11 +572,41 @@ class BaseChatOpenAI(BaseChatModel):
However this does not prevent a user from directly passed in the parameter during
invocation.
"""
include: Optional[list[str]] = None
"""Additional fields to include in generations from Responses API.
Supported values:
- ``"file_search_call.results"``
- ``"message.input_image.image_url"``
- ``"computer_call_output.output.image_url"``
- ``"reasoning.encrypted_content"``
- ``"code_interpreter_call.outputs"``
.. versionadded:: 0.3.24
"""
service_tier: Optional[str] = None
"""Latency tier for request. Options are 'auto', 'default', or 'flex'. Relevant
for users of OpenAI's scale tier service.
"""
store: Optional[bool] = None
"""If True, the Responses API may store response data for future use. Defaults to
True.
.. versionadded:: 0.3.24
"""
truncation: Optional[str] = None
"""Truncation strategy (Responses API). Can be ``"auto"`` or ``"disabled"``
(default). If ``"auto"``, model may drop input items from the middle of the
message sequence to fit the context window.
.. versionadded:: 0.3.24
"""
use_responses_api: Optional[bool] = None
"""Whether to use the Responses API instead of the Chat API.
@ -685,7 +731,11 @@ class BaseChatOpenAI(BaseChatModel):
"n": self.n,
"temperature": self.temperature,
"reasoning_effort": self.reasoning_effort,
"reasoning": self.reasoning,
"include": self.include,
"service_tier": self.service_tier,
"truncation": self.truncation,
"store": self.store,
}
params = {
@ -3134,7 +3184,7 @@ def _construct_responses_api_payload(
for legacy_token_param in ["max_tokens", "max_completion_tokens"]:
if legacy_token_param in payload:
payload["max_output_tokens"] = payload.pop(legacy_token_param)
if "reasoning_effort" in payload:
if "reasoning_effort" in payload and "reasoning" not in payload:
payload["reasoning"] = {"effort": payload.pop("reasoning_effort")}
payload["input"] = _construct_responses_api_input(messages)

View File

@ -323,7 +323,7 @@ def test_route_from_model_kwargs() -> None:
@pytest.mark.flaky(retries=3, delay=1)
def test_computer_calls() -> None:
llm = ChatOpenAI(model="computer-use-preview", model_kwargs={"truncation": "auto"})
llm = ChatOpenAI(model="computer-use-preview", truncation="auto")
tool = {
"type": "computer_use_preview",
"display_width": 1024,
@ -354,10 +354,10 @@ def test_file_search() -> None:
def test_stream_reasoning_summary() -> None:
reasoning = {"effort": "medium", "summary": "auto"}
llm = ChatOpenAI(
model="o4-mini", use_responses_api=True, model_kwargs={"reasoning": reasoning}
model="o4-mini",
use_responses_api=True,
reasoning={"effort": "medium", "summary": "auto"},
)
message_1 = {"role": "user", "content": "What is 3^3?"}
response_1: Optional[BaseMessageChunk] = None
@ -465,7 +465,8 @@ def test_mcp_builtin_zdr() -> None:
llm = ChatOpenAI(
model="o4-mini",
use_responses_api=True,
model_kwargs={"store": False, "include": ["reasoning.encrypted_content"]},
store=False,
include=["reasoning.encrypted_content"],
)
llm_with_tools = llm.bind_tools(