diff --git a/libs/langchain/tests/unit_tests/chat_models/test_base.py b/libs/langchain/tests/unit_tests/chat_models/test_base.py index e90b7d6dca0..cdf7c680f34 100644 --- a/libs/langchain/tests/unit_tests/chat_models/test_base.py +++ b/libs/langchain/tests/unit_tests/chat_models/test_base.py @@ -116,8 +116,10 @@ def test_configurable() -> None: "request_timeout": None, "max_retries": None, "presence_penalty": None, + "reasoning": None, "reasoning_effort": None, "frequency_penalty": None, + "include": None, "seed": None, "service_tier": None, "logprobs": None, @@ -126,11 +128,13 @@ def test_configurable() -> None: "streaming": False, "n": None, "top_p": None, + "truncation": None, "max_tokens": None, "tiktoken_model_name": None, "default_headers": None, "default_query": None, "stop": None, + "store": None, "extra_body": None, "include_response_headers": False, "stream_usage": False, diff --git a/libs/partners/openai/langchain_openai/chat_models/azure.py b/libs/partners/openai/langchain_openai/chat_models/azure.py index 42af44fbb3d..2c85c892955 100644 --- a/libs/partners/openai/langchain_openai/chat_models/azure.py +++ b/libs/partners/openai/langchain_openai/chat_models/azure.py @@ -4,14 +4,14 @@ from __future__ import annotations import logging import os -from collections.abc import Awaitable +from collections.abc import AsyncIterator, Awaitable, Iterator from typing import Any, Callable, Optional, TypedDict, TypeVar, Union import openai from langchain_core.language_models import LanguageModelInput from langchain_core.language_models.chat_models import LangSmithParams from langchain_core.messages import BaseMessage -from langchain_core.outputs import ChatResult +from langchain_core.outputs import ChatGenerationChunk, ChatResult from langchain_core.runnables import Runnable from langchain_core.utils import from_env, secret_from_env from langchain_core.utils.pydantic import is_basemodel_subclass @@ -736,6 +736,24 @@ class AzureChatOpenAI(BaseChatOpenAI): return chat_result + def _stream(self, *args: Any, **kwargs: Any) -> Iterator[ChatGenerationChunk]: + """Route to Chat Completions or Responses API.""" + if self._use_responses_api({**kwargs, **self.model_kwargs}): + return super()._stream_responses(*args, **kwargs) + else: + return super()._stream(*args, **kwargs) + + async def _astream( + self, *args: Any, **kwargs: Any + ) -> AsyncIterator[ChatGenerationChunk]: + """Route to Chat Completions or Responses API.""" + if self._use_responses_api({**kwargs, **self.model_kwargs}): + async for chunk in super()._astream_responses(*args, **kwargs): + yield chunk + else: + async for chunk in super()._astream(*args, **kwargs): + yield chunk + def with_structured_output( self, schema: Optional[_DictOrPydanticClass] = None, diff --git a/libs/partners/openai/langchain_openai/chat_models/base.py b/libs/partners/openai/langchain_openai/chat_models/base.py index a17f3cc27aa..aa38b4a9338 100644 --- a/libs/partners/openai/langchain_openai/chat_models/base.py +++ b/libs/partners/openai/langchain_openai/chat_models/base.py @@ -502,15 +502,31 @@ class BaseChatOpenAI(BaseChatModel): max_tokens: Optional[int] = Field(default=None) """Maximum number of tokens to generate.""" reasoning_effort: Optional[str] = None - """Constrains effort on reasoning for reasoning models. - - Reasoning models only, like OpenAI o1 and o3-mini. + """Constrains effort on reasoning for reasoning models. For use with the Chat + Completions API. + + Reasoning models only, like OpenAI o1, o3, and o4-mini. Currently supported values are low, medium, and high. Reducing reasoning effort can result in faster responses and fewer tokens used on reasoning in a response. - + .. versionadded:: 0.2.14 """ + reasoning: Optional[dict[str, Any]] = None + """Reasoning parameters for reasoning models, i.e., OpenAI o-series models (o1, o3, + o4-mini, etc.). For use with the Responses API. + + Example: + + .. code-block:: python + + reasoning={ + "effort": "medium", # can be "low", "medium", or "high" + "summary": "auto", # can be "auto", "concise", or "detailed" + } + + .. versionadded:: 0.3.24 + """ tiktoken_model_name: Optional[str] = None """The model name to pass to tiktoken when using this class. Tiktoken is used to count the number of tokens in documents to constrain @@ -556,11 +572,41 @@ class BaseChatOpenAI(BaseChatModel): However this does not prevent a user from directly passed in the parameter during invocation. """ + + include: Optional[list[str]] = None + """Additional fields to include in generations from Responses API. + + Supported values: + + - ``"file_search_call.results"`` + - ``"message.input_image.image_url"`` + - ``"computer_call_output.output.image_url"`` + - ``"reasoning.encrypted_content"`` + - ``"code_interpreter_call.outputs"`` + + .. versionadded:: 0.3.24 + """ + service_tier: Optional[str] = None """Latency tier for request. Options are 'auto', 'default', or 'flex'. Relevant for users of OpenAI's scale tier service. """ + store: Optional[bool] = None + """If True, the Responses API may store response data for future use. Defaults to + True. + + .. versionadded:: 0.3.24 + """ + + truncation: Optional[str] = None + """Truncation strategy (Responses API). Can be ``"auto"`` or ``"disabled"`` + (default). If ``"auto"``, model may drop input items from the middle of the + message sequence to fit the context window. + + .. versionadded:: 0.3.24 + """ + use_responses_api: Optional[bool] = None """Whether to use the Responses API instead of the Chat API. @@ -685,7 +731,11 @@ class BaseChatOpenAI(BaseChatModel): "n": self.n, "temperature": self.temperature, "reasoning_effort": self.reasoning_effort, + "reasoning": self.reasoning, + "include": self.include, "service_tier": self.service_tier, + "truncation": self.truncation, + "store": self.store, } params = { @@ -3134,7 +3184,7 @@ def _construct_responses_api_payload( for legacy_token_param in ["max_tokens", "max_completion_tokens"]: if legacy_token_param in payload: payload["max_output_tokens"] = payload.pop(legacy_token_param) - if "reasoning_effort" in payload: + if "reasoning_effort" in payload and "reasoning" not in payload: payload["reasoning"] = {"effort": payload.pop("reasoning_effort")} payload["input"] = _construct_responses_api_input(messages) diff --git a/libs/partners/openai/tests/integration_tests/chat_models/test_azure_standard.py b/libs/partners/openai/tests/integration_tests/chat_models/test_azure_standard.py index 41e6aec4bd5..278f8dcaaed 100644 --- a/libs/partners/openai/tests/integration_tests/chat_models/test_azure_standard.py +++ b/libs/partners/openai/tests/integration_tests/chat_models/test_azure_standard.py @@ -2,6 +2,7 @@ import os +import pytest from langchain_core.language_models import BaseChatModel from langchain_tests.integration_tests import ChatModelIntegrationTests @@ -39,6 +40,38 @@ class TestAzureOpenAIStandard(ChatModelIntegrationTests): return True +class TestAzureOpenAIResponses(ChatModelIntegrationTests): + @property + def chat_model_class(self) -> type[BaseChatModel]: + return AzureChatOpenAI + + @property + def chat_model_params(self) -> dict: + return { + "deployment_name": os.environ["AZURE_OPENAI_CHAT_DEPLOYMENT_NAME"], + "model": "gpt-4o-mini", + "openai_api_version": OPENAI_API_VERSION, + "azure_endpoint": OPENAI_API_BASE, + "use_responses_api": True, + } + + @property + def supports_image_inputs(self) -> bool: + return True + + @property + def supports_image_urls(self) -> bool: + return True + + @property + def supports_json_mode(self) -> bool: + return True + + @pytest.mark.xfail(reason="Unsupported.") + def test_stop_sequence(self, model: BaseChatModel) -> None: + super().test_stop_sequence(model) + + class TestAzureOpenAIStandardLegacy(ChatModelIntegrationTests): """Test a legacy model.""" diff --git a/libs/partners/openai/tests/integration_tests/chat_models/test_responses_api.py b/libs/partners/openai/tests/integration_tests/chat_models/test_responses_api.py index f4976d43476..7224a7e2958 100644 --- a/libs/partners/openai/tests/integration_tests/chat_models/test_responses_api.py +++ b/libs/partners/openai/tests/integration_tests/chat_models/test_responses_api.py @@ -323,7 +323,7 @@ def test_route_from_model_kwargs() -> None: @pytest.mark.flaky(retries=3, delay=1) def test_computer_calls() -> None: - llm = ChatOpenAI(model="computer-use-preview", model_kwargs={"truncation": "auto"}) + llm = ChatOpenAI(model="computer-use-preview", truncation="auto") tool = { "type": "computer_use_preview", "display_width": 1024, @@ -354,10 +354,10 @@ def test_file_search() -> None: def test_stream_reasoning_summary() -> None: - reasoning = {"effort": "medium", "summary": "auto"} - llm = ChatOpenAI( - model="o4-mini", use_responses_api=True, model_kwargs={"reasoning": reasoning} + model="o4-mini", + use_responses_api=True, + reasoning={"effort": "medium", "summary": "auto"}, ) message_1 = {"role": "user", "content": "What is 3^3?"} response_1: Optional[BaseMessageChunk] = None @@ -465,7 +465,8 @@ def test_mcp_builtin_zdr() -> None: llm = ChatOpenAI( model="o4-mini", use_responses_api=True, - model_kwargs={"store": False, "include": ["reasoning.encrypted_content"]}, + store=False, + include=["reasoning.encrypted_content"], ) llm_with_tools = llm.bind_tools(