Compare commits

...

4 Commits

Author SHA1 Message Date
Mason Daugherty
80fe6e5d10 Merge branch 'master' into copilot/fix-31653 2025-07-21 13:39:25 -04:00
Mason Daugherty
cfa5f565ad lint 2025-07-16 19:42:32 -04:00
copilot-swe-agent[bot]
2113f725ec Implement Azure Responses API fallback handling
Co-authored-by: mdrxy <61371264+mdrxy@users.noreply.github.com>
2025-07-16 23:06:05 +00:00
copilot-swe-agent[bot]
d7c770b51b Initial plan 2025-07-16 22:53:33 +00:00
2 changed files with 193 additions and 8 deletions

View File

@@ -4,12 +4,21 @@ from __future__ import annotations
import logging
import os
import warnings
from collections.abc import AsyncIterator, Awaitable, Iterator
from typing import Any, Callable, Optional, TypedDict, TypeVar, Union
import openai
from langchain_core.callbacks import (
AsyncCallbackManagerForLLMRun,
CallbackManagerForLLMRun,
)
from langchain_core.language_models import LanguageModelInput
from langchain_core.language_models.chat_models import LangSmithParams
from langchain_core.language_models.chat_models import (
LangSmithParams,
agenerate_from_stream,
generate_from_stream,
)
from langchain_core.messages import BaseMessage
from langchain_core.outputs import ChatGenerationChunk, ChatResult
from langchain_core.runnables import Runnable
@@ -18,7 +27,13 @@ from langchain_core.utils.pydantic import is_basemodel_subclass
from pydantic import BaseModel, Field, SecretStr, model_validator
from typing_extensions import Literal, Self
from langchain_openai.chat_models.base import BaseChatOpenAI
from langchain_openai.chat_models.base import (
BaseChatOpenAI,
_construct_lc_result_from_responses_api,
_handle_openai_bad_request,
_is_pydantic_class,
run_in_executor,
)
logger = logging.getLogger(__name__)
@@ -34,7 +49,7 @@ class _AllReturnType(TypedDict):
parsing_error: Optional[BaseException]
def _is_pydantic_class(obj: Any) -> bool:
def _is_pydantic_class(obj: Any) -> bool: # noqa: F811
return isinstance(obj, type) and is_basemodel_subclass(obj)
@@ -749,7 +764,18 @@ class AzureChatOpenAI(BaseChatOpenAI):
def _stream(self, *args: Any, **kwargs: Any) -> Iterator[ChatGenerationChunk]:
"""Route to Chat Completions or Responses API."""
if self._use_responses_api({**kwargs, **self.model_kwargs}):
return super()._stream_responses(*args, **kwargs)
try:
return super()._stream_responses(*args, **kwargs)
except openai.APIStatusError as e:
if e.status_code == 405: # Method Not Allowed
logger.warning(
"Azure OpenAI does not support the Responses API for this "
"deployment. Falling back to Chat Completions API. To avoid "
"this warning, set use_responses_api=False or upgrade to a "
"supported API version."
)
return super()._stream(*args, **kwargs)
raise
else:
return super()._stream(*args, **kwargs)
@@ -758,12 +784,171 @@ class AzureChatOpenAI(BaseChatOpenAI):
) -> AsyncIterator[ChatGenerationChunk]:
"""Route to Chat Completions or Responses API."""
if self._use_responses_api({**kwargs, **self.model_kwargs}):
async for chunk in super()._astream_responses(*args, **kwargs):
yield chunk
try:
async for chunk in super()._astream_responses(*args, **kwargs):
yield chunk
except openai.APIStatusError as e:
if e.status_code == 405: # Method Not Allowed
logger.warning(
"Azure OpenAI does not support the Responses API for this "
"deployment. Falling back to Chat Completions API. To avoid "
"this warning, set use_responses_api=False or upgrade to a "
"supported API version."
)
async for chunk in super()._astream(*args, **kwargs):
yield chunk
else:
raise
else:
async for chunk in super()._astream(*args, **kwargs):
yield chunk
def _generate(
self,
messages: list[BaseMessage],
stop: Optional[list[str]] = None,
run_manager: Optional[CallbackManagerForLLMRun] = None,
**kwargs: Any,
) -> ChatResult:
"""Route to Chat Completions or Responses API with fallback for Azure."""
if self.streaming:
stream_iter = self._stream(
messages, stop=stop, run_manager=run_manager, **kwargs
)
return generate_from_stream(stream_iter)
payload = self._get_request_payload(messages, stop=stop, **kwargs)
generation_info = None
if "response_format" in payload:
if self.include_response_headers:
warnings.warn(
"Cannot currently include response headers when response_format is "
"specified."
)
payload.pop("stream")
try:
response = self.root_client.beta.chat.completions.parse(**payload)
except openai.BadRequestError as e:
_handle_openai_bad_request(e)
elif self._use_responses_api(payload):
try:
original_schema_obj = kwargs.get("response_format")
if original_schema_obj and _is_pydantic_class(original_schema_obj):
response = self.root_client.responses.parse(**payload)
else:
if self.include_response_headers:
raw_response = (
self.root_client.with_raw_response.responses.create(
**payload
)
)
response = raw_response.parse()
generation_info = {"headers": dict(raw_response.headers)}
else:
response = self.root_client.responses.create(**payload)
return _construct_lc_result_from_responses_api(
response,
schema=original_schema_obj,
metadata=generation_info,
output_version=self.output_version,
)
except openai.APIStatusError as e:
if e.status_code == 405: # Method Not Allowed
logger.warning(
"Azure OpenAI does not support the Responses API for this "
"deployment. Falling back to Chat Completions API. To avoid "
"this warning, set use_responses_api=False or upgrade to a "
"supported API version."
)
# Fall through to regular chat completions
else:
raise
# Regular chat completions API
if self.include_response_headers:
raw_response = self.client.with_raw_response.create(**payload)
response = raw_response.parse()
generation_info = {"headers": dict(raw_response.headers)}
else:
response = self.client.create(**payload)
return self._create_chat_result(response, generation_info)
async def _agenerate(
self,
messages: list[BaseMessage],
stop: Optional[list[str]] = None,
run_manager: Optional[AsyncCallbackManagerForLLMRun] = None,
**kwargs: Any,
) -> ChatResult:
"""Route to Chat Completions or Responses API with fallback for Azure."""
if self.streaming:
stream_iter = self._astream(
messages, stop=stop, run_manager=run_manager, **kwargs
)
return await agenerate_from_stream(stream_iter)
payload = self._get_request_payload(messages, stop=stop, **kwargs)
generation_info = None
if "response_format" in payload:
if self.include_response_headers:
warnings.warn(
"Cannot currently include response headers when response_format is "
"specified."
)
payload.pop("stream")
try:
response = await self.root_async_client.beta.chat.completions.parse(
**payload
)
except openai.BadRequestError as e:
_handle_openai_bad_request(e)
elif self._use_responses_api(payload):
try:
original_schema_obj = kwargs.get("response_format")
if original_schema_obj and _is_pydantic_class(original_schema_obj):
response = await self.root_async_client.responses.parse(**payload)
else:
if self.include_response_headers:
raw_response = await self.root_async_client.with_raw_response.responses.create( # noqa: E501
**payload
)
response = raw_response.parse()
generation_info = {"headers": dict(raw_response.headers)}
else:
response = await self.root_async_client.responses.create(
**payload
)
return _construct_lc_result_from_responses_api(
response,
schema=original_schema_obj,
metadata=generation_info,
output_version=self.output_version,
)
except openai.APIStatusError as e:
if e.status_code == 405: # Method Not Allowed
logger.warning(
"Azure OpenAI does not support the Responses API for this "
"deployment. Falling back to Chat Completions API. To avoid "
"this warning, set use_responses_api=False or upgrade to a "
"supported API version."
)
# Fall through to regular chat completions
else:
raise
# Regular chat completions API
if self.include_response_headers:
raw_response = await self.async_client.with_raw_response.create(**payload)
response = raw_response.parse()
generation_info = {"headers": dict(raw_response.headers)}
else:
response = await self.async_client.create(**payload)
return await run_in_executor(
None, self._create_chat_result, response, generation_info
)
def with_structured_output(
self,
schema: Optional[_DictOrPydanticClass] = None,

View File

@@ -480,7 +480,7 @@ wheels = [
[[package]]
name = "langchain-core"
version = "0.3.68"
version = "0.3.69"
source = { editable = "../../core" }
dependencies = [
{ name = "jsonpatch" },
@@ -496,7 +496,7 @@ dependencies = [
requires-dist = [
{ name = "jsonpatch", specifier = ">=1.33,<2.0" },
{ name = "langsmith", specifier = ">=0.3.45" },
{ name = "packaging", specifier = ">=23.2,<25" },
{ name = "packaging", specifier = ">=23.2" },
{ name = "pydantic", specifier = ">=2.7.4" },
{ name = "pyyaml", specifier = ">=5.3" },
{ name = "tenacity", specifier = ">=8.1.0,!=8.4.0,<10.0.0" },