feat(core): add ContextOverflowError, raise in anthropic and openai (#35099)

2026-06-09 10:17:00 +00:00 · 2026-02-09 15:15:34 -05:00
parent 4ca586b322
commit 7c41298355
5 changed files with 445 additions and 137 deletions
--- a/libs/partners/openai/langchain_openai/chat_models/base.py
+++ b/libs/partners/openai/langchain_openai/chat_models/base.py
@@ -40,6 +40,7 @@ from langchain_core.callbacks import (
    AsyncCallbackManagerForLLMRun,
    CallbackManagerForLLMRun,
 )
+from langchain_core.exceptions import ContextOverflowError
 from langchain_core.language_models import (
    LanguageModelInput,
    ModelProfileRegistry,
@@ -449,7 +450,22 @@ def _update_token_usage(
    return new_usage


+class OpenAIContextOverflowError(openai.BadRequestError, ContextOverflowError):
+    """BadRequestError raised when input exceeds OpenAI's context limit."""
+
+
+class OpenAIAPIContextOverflowError(openai.APIError, ContextOverflowError):
+    """APIError raised when input exceeds OpenAI's context limit."""
+
+
 def _handle_openai_bad_request(e: openai.BadRequestError) -> None:
+    if (
+        "context_length_exceeded" in str(e)
+        or "Input tokens exceed the configured limit" in e.message
+    ):
+        raise OpenAIContextOverflowError(
+            message=e.message, response=e.response, body=e.body
+        ) from e
    if (
        "'response_format' of type 'json_schema' is not supported with this model"
    ) in e.message:
@@ -474,6 +490,15 @@ def _handle_openai_bad_request(e: openai.BadRequestError) -> None:
    raise


+def _handle_openai_api_error(e: openai.APIError) -> None:
+    error_message = str(e)
+    if "exceeds the context window" in error_message:
+        raise OpenAIAPIContextOverflowError(
+            message=e.message, request=e.request, body=e.body
+        ) from e
+    raise
+
+
 def _model_prefers_responses_api(model_name: str | None) -> bool:
    if not model_name:
        return False
@@ -1146,49 +1171,54 @@ class BaseChatOpenAI(BaseChatModel):
        self._ensure_sync_client_available()
        kwargs["stream"] = True
        payload = self._get_request_payload(messages, stop=stop, **kwargs)
-        if self.include_response_headers:
-            raw_context_manager = self.root_client.with_raw_response.responses.create(
-                **payload
-            )
-            context_manager = raw_context_manager.parse()
-            headers = {"headers": dict(raw_context_manager.headers)}
-        else:
-            context_manager = self.root_client.responses.create(**payload)
-            headers = {}
-        original_schema_obj = kwargs.get("response_format")
-
-        with context_manager as response:
-            is_first_chunk = True
-            current_index = -1
-            current_output_index = -1
-            current_sub_index = -1
-            has_reasoning = False
-            for chunk in response:
-                metadata = headers if is_first_chunk else {}
-                (
-                    current_index,
-                    current_output_index,
-                    current_sub_index,
-                    generation_chunk,
-                ) = _convert_responses_chunk_to_generation_chunk(
-                    chunk,
-                    current_index,
-                    current_output_index,
-                    current_sub_index,
-                    schema=original_schema_obj,
-                    metadata=metadata,
-                    has_reasoning=has_reasoning,
-                    output_version=self.output_version,
+        try:
+            if self.include_response_headers:
+                raw_context_manager = (
+                    self.root_client.with_raw_response.responses.create(**payload)
                )
-                if generation_chunk:
-                    if run_manager:
-                        run_manager.on_llm_new_token(
-                            generation_chunk.text, chunk=generation_chunk
-                        )
-                    is_first_chunk = False
-                    if "reasoning" in generation_chunk.message.additional_kwargs:
-                        has_reasoning = True
-                    yield generation_chunk
+                context_manager = raw_context_manager.parse()
+                headers = {"headers": dict(raw_context_manager.headers)}
+            else:
+                context_manager = self.root_client.responses.create(**payload)
+                headers = {}
+            original_schema_obj = kwargs.get("response_format")
+
+            with context_manager as response:
+                is_first_chunk = True
+                current_index = -1
+                current_output_index = -1
+                current_sub_index = -1
+                has_reasoning = False
+                for chunk in response:
+                    metadata = headers if is_first_chunk else {}
+                    (
+                        current_index,
+                        current_output_index,
+                        current_sub_index,
+                        generation_chunk,
+                    ) = _convert_responses_chunk_to_generation_chunk(
+                        chunk,
+                        current_index,
+                        current_output_index,
+                        current_sub_index,
+                        schema=original_schema_obj,
+                        metadata=metadata,
+                        has_reasoning=has_reasoning,
+                        output_version=self.output_version,
+                    )
+                    if generation_chunk:
+                        if run_manager:
+                            run_manager.on_llm_new_token(
+                                generation_chunk.text, chunk=generation_chunk
+                            )
+                        is_first_chunk = False
+                        if "reasoning" in generation_chunk.message.additional_kwargs:
+                            has_reasoning = True
+                        yield generation_chunk
+        except openai.BadRequestError as e:
+            _handle_openai_bad_request(e)
+        except openai.APIError as e:
+            _handle_openai_api_error(e)

    async def _astream_responses(
        self,
@@ -1199,51 +1229,58 @@ class BaseChatOpenAI(BaseChatModel):
    ) -> AsyncIterator[ChatGenerationChunk]:
        kwargs["stream"] = True
        payload = self._get_request_payload(messages, stop=stop, **kwargs)
-        if self.include_response_headers:
-            raw_context_manager = (
-                await self.root_async_client.with_raw_response.responses.create(
+        try:
+            if self.include_response_headers:
+                raw_context_manager = (
+                    await self.root_async_client.with_raw_response.responses.create(
+                        **payload
+                    )
+                )
+                context_manager = raw_context_manager.parse()
+                headers = {"headers": dict(raw_context_manager.headers)}
+            else:
+                context_manager = await self.root_async_client.responses.create(
                    **payload
                )
-            )
-            context_manager = raw_context_manager.parse()
-            headers = {"headers": dict(raw_context_manager.headers)}
-        else:
-            context_manager = await self.root_async_client.responses.create(**payload)
-            headers = {}
-        original_schema_obj = kwargs.get("response_format")
+                headers = {}
+            original_schema_obj = kwargs.get("response_format")

-        async with context_manager as response:
-            is_first_chunk = True
-            current_index = -1
-            current_output_index = -1
-            current_sub_index = -1
-            has_reasoning = False
-            async for chunk in response:
-                metadata = headers if is_first_chunk else {}
-                (
-                    current_index,
-                    current_output_index,
-                    current_sub_index,
-                    generation_chunk,
-                ) = _convert_responses_chunk_to_generation_chunk(
-                    chunk,
-                    current_index,
-                    current_output_index,
-                    current_sub_index,
-                    schema=original_schema_obj,
-                    metadata=metadata,
-                    has_reasoning=has_reasoning,
-                    output_version=self.output_version,
-                )
-                if generation_chunk:
-                    if run_manager:
-                        await run_manager.on_llm_new_token(
-                            generation_chunk.text, chunk=generation_chunk
-                        )
-                    is_first_chunk = False
-                    if "reasoning" in generation_chunk.message.additional_kwargs:
-                        has_reasoning = True
-                    yield generation_chunk
+            async with context_manager as response:
+                is_first_chunk = True
+                current_index = -1
+                current_output_index = -1
+                current_sub_index = -1
+                has_reasoning = False
+                async for chunk in response:
+                    metadata = headers if is_first_chunk else {}
+                    (
+                        current_index,
+                        current_output_index,
+                        current_sub_index,
+                        generation_chunk,
+                    ) = _convert_responses_chunk_to_generation_chunk(
+                        chunk,
+                        current_index,
+                        current_output_index,
+                        current_sub_index,
+                        schema=original_schema_obj,
+                        metadata=metadata,
+                        has_reasoning=has_reasoning,
+                        output_version=self.output_version,
+                    )
+                    if generation_chunk:
+                        if run_manager:
+                            await run_manager.on_llm_new_token(
+                                generation_chunk.text, chunk=generation_chunk
+                            )
+                        is_first_chunk = False
+                        if "reasoning" in generation_chunk.message.additional_kwargs:
+                            has_reasoning = True
+                        yield generation_chunk
+        except openai.BadRequestError as e:
+            _handle_openai_bad_request(e)
+        except openai.APIError as e:
+            _handle_openai_api_error(e)

    def _should_stream_usage(
        self, stream_usage: bool | None = None, **kwargs: Any
@@ -1282,24 +1319,26 @@ class BaseChatOpenAI(BaseChatModel):
        default_chunk_class: type[BaseMessageChunk] = AIMessageChunk
        base_generation_info = {}

-        if "response_format" in payload:
-            if self.include_response_headers:
-                warnings.warn(
-                    "Cannot currently include response headers when response_format is "
-                    "specified."
-                )
-            payload.pop("stream")
-            response_stream = self.root_client.beta.chat.completions.stream(**payload)
-            context_manager = response_stream
-        else:
-            if self.include_response_headers:
-                raw_response = self.client.with_raw_response.create(**payload)
-                response = raw_response.parse()
-                base_generation_info = {"headers": dict(raw_response.headers)}
-            else:
-                response = self.client.create(**payload)
-            context_manager = response
        try:
+            if "response_format" in payload:
+                if self.include_response_headers:
+                    warnings.warn(
+                        "Cannot currently include response headers when "
+                        "response_format is specified."
+                    )
+                payload.pop("stream")
+                response_stream = self.root_client.beta.chat.completions.stream(
+                    **payload
+                )
+                context_manager = response_stream
+            else:
+                if self.include_response_headers:
+                    raw_response = self.client.with_raw_response.create(**payload)
+                    response = raw_response.parse()
+                    base_generation_info = {"headers": dict(raw_response.headers)}
+                else:
+                    response = self.client.create(**payload)
+                context_manager = response
            with context_manager as response:
                is_first_chunk = True
                for chunk in response:
@@ -1324,6 +1363,8 @@ class BaseChatOpenAI(BaseChatModel):
                    yield generation_chunk
        except openai.BadRequestError as e:
            _handle_openai_bad_request(e)
+        except openai.APIError as e:
+            _handle_openai_api_error(e)
        if hasattr(response, "get_final_completion") and "response_format" in payload:
            final_completion = response.get_final_completion()
            generation_chunk = self._get_generation_chunk_from_completion(
@@ -1349,15 +1390,10 @@ class BaseChatOpenAI(BaseChatModel):
        try:
            if "response_format" in payload:
                payload.pop("stream")
-                try:
-                    raw_response = (
-                        self.root_client.chat.completions.with_raw_response.parse(
-                            **payload
-                        )
-                    )
-                    response = raw_response.parse()
-                except openai.BadRequestError as e:
-                    _handle_openai_bad_request(e)
+                raw_response = (
+                    self.root_client.chat.completions.with_raw_response.parse(**payload)
+                )
+                response = raw_response.parse()
            elif self._use_responses_api(payload):
                original_schema_obj = kwargs.get("response_format")
                if original_schema_obj and _is_pydantic_class(original_schema_obj):
@@ -1380,6 +1416,10 @@ class BaseChatOpenAI(BaseChatModel):
            else:
                raw_response = self.client.with_raw_response.create(**payload)
                response = raw_response.parse()
+        except openai.BadRequestError as e:
+            _handle_openai_bad_request(e)
+        except openai.APIError as e:
+            _handle_openai_api_error(e)
        except Exception as e:
            if raw_response is not None and hasattr(raw_response, "http_response"):
                e.response = raw_response.http_response  # type: ignore[attr-defined]
@@ -1523,28 +1563,28 @@ class BaseChatOpenAI(BaseChatModel):
        default_chunk_class: type[BaseMessageChunk] = AIMessageChunk
        base_generation_info = {}

-        if "response_format" in payload:
-            if self.include_response_headers:
-                warnings.warn(
-                    "Cannot currently include response headers when response_format is "
-                    "specified."
-                )
-            payload.pop("stream")
-            response_stream = self.root_async_client.beta.chat.completions.stream(
-                **payload
-            )
-            context_manager = response_stream
-        else:
-            if self.include_response_headers:
-                raw_response = await self.async_client.with_raw_response.create(
+        try:
+            if "response_format" in payload:
+                if self.include_response_headers:
+                    warnings.warn(
+                        "Cannot currently include response headers when "
+                        "response_format is specified."
+                    )
+                payload.pop("stream")
+                response_stream = self.root_async_client.beta.chat.completions.stream(
                    **payload
                )
-                response = raw_response.parse()
-                base_generation_info = {"headers": dict(raw_response.headers)}
+                context_manager = response_stream
            else:
-                response = await self.async_client.create(**payload)
-            context_manager = response
-        try:
+                if self.include_response_headers:
+                    raw_response = await self.async_client.with_raw_response.create(
+                        **payload
+                    )
+                    response = raw_response.parse()
+                    base_generation_info = {"headers": dict(raw_response.headers)}
+                else:
+                    response = await self.async_client.create(**payload)
+                context_manager = response
            async with context_manager as response:
                is_first_chunk = True
                async for chunk in response:
@@ -1569,6 +1609,8 @@ class BaseChatOpenAI(BaseChatModel):
                    yield generation_chunk
        except openai.BadRequestError as e:
            _handle_openai_bad_request(e)
+        except openai.APIError as e:
+            _handle_openai_api_error(e)
        if hasattr(response, "get_final_completion") and "response_format" in payload:
            final_completion = await response.get_final_completion()
            generation_chunk = self._get_generation_chunk_from_completion(
@@ -1593,13 +1635,10 @@ class BaseChatOpenAI(BaseChatModel):
        try:
            if "response_format" in payload:
                payload.pop("stream")
-                try:
-                    raw_response = await self.root_async_client.chat.completions.with_raw_response.parse(  # noqa: E501
-                        **payload
-                    )
-                    response = raw_response.parse()
-                except openai.BadRequestError as e:
-                    _handle_openai_bad_request(e)
+                raw_response = await self.root_async_client.chat.completions.with_raw_response.parse(  # noqa: E501
+                    **payload
+                )
+                response = raw_response.parse()
            elif self._use_responses_api(payload):
                original_schema_obj = kwargs.get("response_format")
                if original_schema_obj and _is_pydantic_class(original_schema_obj):
@@ -1628,6 +1667,10 @@ class BaseChatOpenAI(BaseChatModel):
                    **payload
                )
                response = raw_response.parse()
+        except openai.BadRequestError as e:
+            _handle_openai_bad_request(e)
+        except openai.APIError as e:
+            _handle_openai_api_error(e)
        except Exception as e:
            if raw_response is not None and hasattr(raw_response, "http_response"):
                e.response = raw_response.http_response  # type: ignore[attr-defined]