This commit is contained in:
S. M. Mohiuddin Khan Shiam 2025-06-18 00:47:51 +06:00
commit 4c8d802416
5 changed files with 118 additions and 12 deletions

View File

@ -116,8 +116,10 @@ def test_configurable() -> None:
"request_timeout": None, "request_timeout": None,
"max_retries": None, "max_retries": None,
"presence_penalty": None, "presence_penalty": None,
"reasoning": None,
"reasoning_effort": None, "reasoning_effort": None,
"frequency_penalty": None, "frequency_penalty": None,
"include": None,
"seed": None, "seed": None,
"service_tier": None, "service_tier": None,
"logprobs": None, "logprobs": None,
@ -126,11 +128,13 @@ def test_configurable() -> None:
"streaming": False, "streaming": False,
"n": None, "n": None,
"top_p": None, "top_p": None,
"truncation": None,
"max_tokens": None, "max_tokens": None,
"tiktoken_model_name": None, "tiktoken_model_name": None,
"default_headers": None, "default_headers": None,
"default_query": None, "default_query": None,
"stop": None, "stop": None,
"store": None,
"extra_body": None, "extra_body": None,
"include_response_headers": False, "include_response_headers": False,
"stream_usage": False, "stream_usage": False,

View File

@ -4,14 +4,14 @@ from __future__ import annotations
import logging import logging
import os import os
from collections.abc import Awaitable from collections.abc import AsyncIterator, Awaitable, Iterator
from typing import Any, Callable, Optional, TypedDict, TypeVar, Union from typing import Any, Callable, Optional, TypedDict, TypeVar, Union
import openai import openai
from langchain_core.language_models import LanguageModelInput from langchain_core.language_models import LanguageModelInput
from langchain_core.language_models.chat_models import LangSmithParams from langchain_core.language_models.chat_models import LangSmithParams
from langchain_core.messages import BaseMessage from langchain_core.messages import BaseMessage
from langchain_core.outputs import ChatResult from langchain_core.outputs import ChatGenerationChunk, ChatResult
from langchain_core.runnables import Runnable from langchain_core.runnables import Runnable
from langchain_core.utils import from_env, secret_from_env from langchain_core.utils import from_env, secret_from_env
from langchain_core.utils.pydantic import is_basemodel_subclass from langchain_core.utils.pydantic import is_basemodel_subclass
@ -736,6 +736,24 @@ class AzureChatOpenAI(BaseChatOpenAI):
return chat_result return chat_result
def _stream(self, *args: Any, **kwargs: Any) -> Iterator[ChatGenerationChunk]:
"""Route to Chat Completions or Responses API."""
if self._use_responses_api({**kwargs, **self.model_kwargs}):
return super()._stream_responses(*args, **kwargs)
else:
return super()._stream(*args, **kwargs)
async def _astream(
self, *args: Any, **kwargs: Any
) -> AsyncIterator[ChatGenerationChunk]:
"""Route to Chat Completions or Responses API."""
if self._use_responses_api({**kwargs, **self.model_kwargs}):
async for chunk in super()._astream_responses(*args, **kwargs):
yield chunk
else:
async for chunk in super()._astream(*args, **kwargs):
yield chunk
def with_structured_output( def with_structured_output(
self, self,
schema: Optional[_DictOrPydanticClass] = None, schema: Optional[_DictOrPydanticClass] = None,

View File

@ -502,15 +502,31 @@ class BaseChatOpenAI(BaseChatModel):
max_tokens: Optional[int] = Field(default=None) max_tokens: Optional[int] = Field(default=None)
"""Maximum number of tokens to generate.""" """Maximum number of tokens to generate."""
reasoning_effort: Optional[str] = None reasoning_effort: Optional[str] = None
"""Constrains effort on reasoning for reasoning models. """Constrains effort on reasoning for reasoning models. For use with the Chat
Completions API.
Reasoning models only, like OpenAI o1 and o3-mini. Reasoning models only, like OpenAI o1, o3, and o4-mini.
Currently supported values are low, medium, and high. Reducing reasoning effort Currently supported values are low, medium, and high. Reducing reasoning effort
can result in faster responses and fewer tokens used on reasoning in a response. can result in faster responses and fewer tokens used on reasoning in a response.
.. versionadded:: 0.2.14 .. versionadded:: 0.2.14
""" """
reasoning: Optional[dict[str, Any]] = None
"""Reasoning parameters for reasoning models, i.e., OpenAI o-series models (o1, o3,
o4-mini, etc.). For use with the Responses API.
Example:
.. code-block:: python
reasoning={
"effort": "medium", # can be "low", "medium", or "high"
"summary": "auto", # can be "auto", "concise", or "detailed"
}
.. versionadded:: 0.3.24
"""
tiktoken_model_name: Optional[str] = None tiktoken_model_name: Optional[str] = None
"""The model name to pass to tiktoken when using this class. """The model name to pass to tiktoken when using this class.
Tiktoken is used to count the number of tokens in documents to constrain Tiktoken is used to count the number of tokens in documents to constrain
@ -556,11 +572,41 @@ class BaseChatOpenAI(BaseChatModel):
However this does not prevent a user from directly passed in the parameter during However this does not prevent a user from directly passed in the parameter during
invocation. invocation.
""" """
include: Optional[list[str]] = None
"""Additional fields to include in generations from Responses API.
Supported values:
- ``"file_search_call.results"``
- ``"message.input_image.image_url"``
- ``"computer_call_output.output.image_url"``
- ``"reasoning.encrypted_content"``
- ``"code_interpreter_call.outputs"``
.. versionadded:: 0.3.24
"""
service_tier: Optional[str] = None service_tier: Optional[str] = None
"""Latency tier for request. Options are 'auto', 'default', or 'flex'. Relevant """Latency tier for request. Options are 'auto', 'default', or 'flex'. Relevant
for users of OpenAI's scale tier service. for users of OpenAI's scale tier service.
""" """
store: Optional[bool] = None
"""If True, the Responses API may store response data for future use. Defaults to
True.
.. versionadded:: 0.3.24
"""
truncation: Optional[str] = None
"""Truncation strategy (Responses API). Can be ``"auto"`` or ``"disabled"``
(default). If ``"auto"``, model may drop input items from the middle of the
message sequence to fit the context window.
.. versionadded:: 0.3.24
"""
use_responses_api: Optional[bool] = None use_responses_api: Optional[bool] = None
"""Whether to use the Responses API instead of the Chat API. """Whether to use the Responses API instead of the Chat API.
@ -685,7 +731,11 @@ class BaseChatOpenAI(BaseChatModel):
"n": self.n, "n": self.n,
"temperature": self.temperature, "temperature": self.temperature,
"reasoning_effort": self.reasoning_effort, "reasoning_effort": self.reasoning_effort,
"reasoning": self.reasoning,
"include": self.include,
"service_tier": self.service_tier, "service_tier": self.service_tier,
"truncation": self.truncation,
"store": self.store,
} }
params = { params = {
@ -3134,7 +3184,7 @@ def _construct_responses_api_payload(
for legacy_token_param in ["max_tokens", "max_completion_tokens"]: for legacy_token_param in ["max_tokens", "max_completion_tokens"]:
if legacy_token_param in payload: if legacy_token_param in payload:
payload["max_output_tokens"] = payload.pop(legacy_token_param) payload["max_output_tokens"] = payload.pop(legacy_token_param)
if "reasoning_effort" in payload: if "reasoning_effort" in payload and "reasoning" not in payload:
payload["reasoning"] = {"effort": payload.pop("reasoning_effort")} payload["reasoning"] = {"effort": payload.pop("reasoning_effort")}
payload["input"] = _construct_responses_api_input(messages) payload["input"] = _construct_responses_api_input(messages)

View File

@ -2,6 +2,7 @@
import os import os
import pytest
from langchain_core.language_models import BaseChatModel from langchain_core.language_models import BaseChatModel
from langchain_tests.integration_tests import ChatModelIntegrationTests from langchain_tests.integration_tests import ChatModelIntegrationTests
@ -39,6 +40,38 @@ class TestAzureOpenAIStandard(ChatModelIntegrationTests):
return True return True
class TestAzureOpenAIResponses(ChatModelIntegrationTests):
@property
def chat_model_class(self) -> type[BaseChatModel]:
return AzureChatOpenAI
@property
def chat_model_params(self) -> dict:
return {
"deployment_name": os.environ["AZURE_OPENAI_CHAT_DEPLOYMENT_NAME"],
"model": "gpt-4o-mini",
"openai_api_version": OPENAI_API_VERSION,
"azure_endpoint": OPENAI_API_BASE,
"use_responses_api": True,
}
@property
def supports_image_inputs(self) -> bool:
return True
@property
def supports_image_urls(self) -> bool:
return True
@property
def supports_json_mode(self) -> bool:
return True
@pytest.mark.xfail(reason="Unsupported.")
def test_stop_sequence(self, model: BaseChatModel) -> None:
super().test_stop_sequence(model)
class TestAzureOpenAIStandardLegacy(ChatModelIntegrationTests): class TestAzureOpenAIStandardLegacy(ChatModelIntegrationTests):
"""Test a legacy model.""" """Test a legacy model."""

View File

@ -323,7 +323,7 @@ def test_route_from_model_kwargs() -> None:
@pytest.mark.flaky(retries=3, delay=1) @pytest.mark.flaky(retries=3, delay=1)
def test_computer_calls() -> None: def test_computer_calls() -> None:
llm = ChatOpenAI(model="computer-use-preview", model_kwargs={"truncation": "auto"}) llm = ChatOpenAI(model="computer-use-preview", truncation="auto")
tool = { tool = {
"type": "computer_use_preview", "type": "computer_use_preview",
"display_width": 1024, "display_width": 1024,
@ -354,10 +354,10 @@ def test_file_search() -> None:
def test_stream_reasoning_summary() -> None: def test_stream_reasoning_summary() -> None:
reasoning = {"effort": "medium", "summary": "auto"}
llm = ChatOpenAI( llm = ChatOpenAI(
model="o4-mini", use_responses_api=True, model_kwargs={"reasoning": reasoning} model="o4-mini",
use_responses_api=True,
reasoning={"effort": "medium", "summary": "auto"},
) )
message_1 = {"role": "user", "content": "What is 3^3?"} message_1 = {"role": "user", "content": "What is 3^3?"}
response_1: Optional[BaseMessageChunk] = None response_1: Optional[BaseMessageChunk] = None
@ -465,7 +465,8 @@ def test_mcp_builtin_zdr() -> None:
llm = ChatOpenAI( llm = ChatOpenAI(
model="o4-mini", model="o4-mini",
use_responses_api=True, use_responses_api=True,
model_kwargs={"store": False, "include": ["reasoning.encrypted_content"]}, store=False,
include=["reasoning.encrypted_content"],
) )
llm_with_tools = llm.bind_tools( llm_with_tools = llm.bind_tools(