fix(anthropic): restore cache_control on non-direct subclasses (#37057)

Closes #37042

---

`AnthropicPromptCachingMiddleware` was unconditionally setting top-level
`cache_control` in `model_settings` for any `ChatAnthropic` subclass.
That field is direct-Anthropic-API only — `ChatAnthropicBedrock` (which
subclasses `ChatAnthropic` and passed the existing `isinstance` gate)
errored with `cache_control: Extra inputs are not permitted`.
Investigating that surfaced a related regression: PR #35967 also deleted
the block-level `cache_control` injection in `_get_request_payload`,
which silently disabled caching entirely for non-direct subclasses
(Bedrock had been falling back to in-block breakpoints). This restores
both paths.

## Changes
- Add `_is_direct_anthropic_llm_type` predicate that allowlists
`_llm_type == "anthropic-chat"`. Both the middleware's
`_supports_automatic_caching` and the new branch in
`ChatAnthropic._get_request_payload` route through it, so any subclass
that overrides `_llm_type` (Bedrock today, future direct-API variants
tomorrow) is treated as non-direct by default. Replaces the prior
substring-matching denylist on `"bedrock"`/`"vertex"`.
- Restore `_collect_code_execution_tool_ids`,
`_is_code_execution_related_block`, and a new
`_apply_cache_control_to_last_eligible_block` helper in `chat_models`.
For non-direct subclasses, `_get_request_payload` now pops
`cache_control` from kwargs and walks messages newest-to-oldest,
attaching the breakpoint to the last block that isn't
`code_execution`-related (Anthropic forbids breakpoints on those).
- Emit `UserWarning` when `cache_control` is requested but every
candidate block is `code_execution`-related — previously a silent drop.
- `AnthropicPromptCachingMiddleware._apply_caching` now sets the
top-level `cache_control` only when
`_supports_automatic_caching(request.model)`. System-message and
tool-definition breakpoints continue to apply for all `ChatAnthropic`
subclasses, since those are accepted by every transport.
- Note: `ChatAnthropicVertex` does not subclass `ChatAnthropic` (it
lives in `langchain-google-vertexai` and ships its own
`_get_request_payload`), so the chat-models changes here only affect
Bedrock. The middleware-side gate covers Vertex implicitly via the
`isinstance(request.model, ChatAnthropic)` check that already excludes
it.
This commit is contained in:
Mason Daugherty
2026-04-28 16:41:22 -04:00
committed by GitHub
parent 37be34be82
commit 7a4594b682
6 changed files with 462 additions and 17 deletions

View File

@@ -688,6 +688,116 @@ def _format_messages(
return system, formatted_messages
def _collect_code_execution_tool_ids(formatted_messages: list[dict]) -> set[str]:
"""Collect `tool_use` IDs that were called by `code_execution`.
These blocks cannot have `cache_control` applied per Anthropic API
requirements.
"""
code_execution_tool_ids: set[str] = set()
for message in formatted_messages:
if message.get("role") != "assistant":
continue
content = message.get("content", [])
if not isinstance(content, list):
continue
for block in content:
if not isinstance(block, dict):
continue
if block.get("type") != "tool_use":
continue
caller = block.get("caller")
if isinstance(caller, dict):
caller_type = caller.get("type", "")
if caller_type.startswith("code_execution"):
tool_id = block.get("id")
if tool_id:
code_execution_tool_ids.add(tool_id)
return code_execution_tool_ids
def _is_code_execution_related_block(
block: dict,
code_execution_tool_ids: set[str],
) -> bool:
"""Return whether a content block is related to `code_execution`.
Returns `True` for blocks that should NOT have `cache_control` applied.
"""
if not isinstance(block, dict):
return False
block_type = block.get("type")
if block_type == "tool_use":
caller = block.get("caller")
if isinstance(caller, dict):
caller_type = caller.get("type", "")
if caller_type.startswith("code_execution"):
return True
if block_type == "tool_result":
tool_use_id = block.get("tool_use_id")
if tool_use_id and tool_use_id in code_execution_tool_ids:
return True
return False
def _is_direct_anthropic_llm_type(llm_type: object) -> bool:
"""Return whether an `_llm_type` reaches Claude via the direct Anthropic API.
Only the direct API accepts the top-level `cache_control` request param.
Subclasses that route through other transports (Bedrock, future backends)
override `_llm_type` and must expand `cache_control` kwargs into
block-level breakpoints instead.
Non-string `_llm_type` values return `False` rather than raising, so a
misbehaving subclass falls through to the safer non-direct branch.
"""
return llm_type == "anthropic-chat"
def _apply_cache_control_to_last_eligible_block(
formatted_messages: list[dict],
cache_control: Any,
code_execution_tool_ids: set[str],
) -> bool:
"""Place `cache_control` on the last block eligible for a breakpoint.
Walks messages newest-to-oldest and, within each, blocks newest-to-oldest,
skipping `code_execution`-related blocks (Anthropic rejects breakpoints
there). String message content is promoted to a single text block so the
breakpoint can be attached.
Returns:
`True` if a breakpoint was applied, `False` if every candidate was
`code_execution`-related (caller should warn and drop the kwarg).
"""
for formatted_message in reversed(formatted_messages):
content = formatted_message.get("content")
if isinstance(content, list) and content:
for block in reversed(content):
if not isinstance(block, dict):
continue
if _is_code_execution_related_block(block, code_execution_tool_ids):
continue
block["cache_control"] = cache_control
return True
elif isinstance(content, str):
formatted_message["content"] = [
{
"type": "text",
"text": content,
"cache_control": cache_control,
}
]
return True
return False
class AnthropicContextOverflowError(anthropic.BadRequestError, ContextOverflowError):
"""BadRequestError raised when input exceeds Anthropic's context limit."""
@@ -1093,6 +1203,32 @@ class ChatAnthropic(BaseChatModel):
system, formatted_messages = _format_messages(messages)
# Only the direct Anthropic API accepts top-level `cache_control`.
# Subclasses that route through other transports (e.g. Bedrock) expand
# `cache_control` kwargs into block-level breakpoints, the only form
# those transports accept.
if not _is_direct_anthropic_llm_type(getattr(self, "_llm_type", None)):
cache_control = kwargs.pop("cache_control", None)
# Empty `formatted_messages` has nothing to attach a breakpoint to;
# skip silently. The warning below is reserved for the surprising
# case where messages exist but every candidate block is ineligible.
if cache_control and formatted_messages:
code_execution_tool_ids = _collect_code_execution_tool_ids(
formatted_messages
)
applied = _apply_cache_control_to_last_eligible_block(
formatted_messages, cache_control, code_execution_tool_ids
)
if not applied:
warnings.warn(
"`cache_control` kwarg was dropped: no eligible "
"content block found (all candidates are "
"`code_execution`-related, which Anthropic forbids "
"breakpoints on).",
UserWarning,
stacklevel=2,
)
payload = {
"model": self.model,
"max_tokens": self.max_tokens,