fix(anthropic): restore cache_control on non-direct subclasses (#37057)

Closes #37042 --- `AnthropicPromptCachingMiddleware` was unconditionally setting top-level `cache_control` in `model_settings` for any `ChatAnthropic` subclass. That field is direct-Anthropic-API only — `ChatAnthropicBedrock` (which subclasses `ChatAnthropic` and passed the existing `isinstance` gate) errored with `cache_control: Extra inputs are not permitted`. Investigating that surfaced a related regression: PR #35967 also deleted the block-level `cache_control` injection in `_get_request_payload`, which silently disabled caching entirely for non-direct subclasses (Bedrock had been falling back to in-block breakpoints). This restores both paths. ## Changes - Add `_is_direct_anthropic_llm_type` predicate that allowlists `_llm_type == "anthropic-chat"`. Both the middleware's `_supports_automatic_caching` and the new branch in `ChatAnthropic._get_request_payload` route through it, so any subclass that overrides `_llm_type` (Bedrock today, future direct-API variants tomorrow) is treated as non-direct by default. Replaces the prior substring-matching denylist on `"bedrock"`/`"vertex"`. - Restore `_collect_code_execution_tool_ids`, `_is_code_execution_related_block`, and a new `_apply_cache_control_to_last_eligible_block` helper in `chat_models`. For non-direct subclasses, `_get_request_payload` now pops `cache_control` from kwargs and walks messages newest-to-oldest, attaching the breakpoint to the last block that isn't `code_execution`-related (Anthropic forbids breakpoints on those). - Emit `UserWarning` when `cache_control` is requested but every candidate block is `code_execution`-related — previously a silent drop. - `AnthropicPromptCachingMiddleware._apply_caching` now sets the top-level `cache_control` only when `_supports_automatic_caching(request.model)`. System-message and tool-definition breakpoints continue to apply for all `ChatAnthropic` subclasses, since those are accepted by every transport. - Note: `ChatAnthropicVertex` does not subclass `ChatAnthropic` (it lives in `langchain-google-vertexai` and ships its own `_get_request_payload`), so the chat-models changes here only affect Bedrock. The middleware-side gate covers Vertex implicitly via the `isinstance(request.model, ChatAnthropic)` check that already excludes it.
2026-06-09 10:17:00 +00:00 · 2026-04-28 16:41:22 -04:00
parent 37be34be82
commit 7a4594b682
6 changed files with 462 additions and 17 deletions
--- a/libs/partners/anthropic/langchain_anthropic/chat_models.py
+++ b/libs/partners/anthropic/langchain_anthropic/chat_models.py
@@ -688,6 +688,116 @@ def _format_messages(
    return system, formatted_messages


+def _collect_code_execution_tool_ids(formatted_messages: list[dict]) -> set[str]:
+    """Collect `tool_use` IDs that were called by `code_execution`.
+
+    These blocks cannot have `cache_control` applied per Anthropic API
+    requirements.
+    """
+    code_execution_tool_ids: set[str] = set()
+
+    for message in formatted_messages:
+        if message.get("role") != "assistant":
+            continue
+        content = message.get("content", [])
+        if not isinstance(content, list):
+            continue
+        for block in content:
+            if not isinstance(block, dict):
+                continue
+            if block.get("type") != "tool_use":
+                continue
+            caller = block.get("caller")
+            if isinstance(caller, dict):
+                caller_type = caller.get("type", "")
+                if caller_type.startswith("code_execution"):
+                    tool_id = block.get("id")
+                    if tool_id:
+                        code_execution_tool_ids.add(tool_id)
+
+    return code_execution_tool_ids
+
+
+def _is_code_execution_related_block(
+    block: dict,
+    code_execution_tool_ids: set[str],
+) -> bool:
+    """Return whether a content block is related to `code_execution`.
+
+    Returns `True` for blocks that should NOT have `cache_control` applied.
+    """
+    if not isinstance(block, dict):
+        return False
+
+    block_type = block.get("type")
+
+    if block_type == "tool_use":
+        caller = block.get("caller")
+        if isinstance(caller, dict):
+            caller_type = caller.get("type", "")
+            if caller_type.startswith("code_execution"):
+                return True
+
+    if block_type == "tool_result":
+        tool_use_id = block.get("tool_use_id")
+        if tool_use_id and tool_use_id in code_execution_tool_ids:
+            return True
+
+    return False
+
+
+def _is_direct_anthropic_llm_type(llm_type: object) -> bool:
+    """Return whether an `_llm_type` reaches Claude via the direct Anthropic API.
+
+    Only the direct API accepts the top-level `cache_control` request param.
+    Subclasses that route through other transports (Bedrock, future backends)
+    override `_llm_type` and must expand `cache_control` kwargs into
+    block-level breakpoints instead.
+
+    Non-string `_llm_type` values return `False` rather than raising, so a
+    misbehaving subclass falls through to the safer non-direct branch.
+    """
+    return llm_type == "anthropic-chat"
+
+
+def _apply_cache_control_to_last_eligible_block(
+    formatted_messages: list[dict],
+    cache_control: Any,
+    code_execution_tool_ids: set[str],
+) -> bool:
+    """Place `cache_control` on the last block eligible for a breakpoint.
+
+    Walks messages newest-to-oldest and, within each, blocks newest-to-oldest,
+    skipping `code_execution`-related blocks (Anthropic rejects breakpoints
+    there). String message content is promoted to a single text block so the
+    breakpoint can be attached.
+
+    Returns:
+        `True` if a breakpoint was applied, `False` if every candidate was
+            `code_execution`-related (caller should warn and drop the kwarg).
+    """
+    for formatted_message in reversed(formatted_messages):
+        content = formatted_message.get("content")
+        if isinstance(content, list) and content:
+            for block in reversed(content):
+                if not isinstance(block, dict):
+                    continue
+                if _is_code_execution_related_block(block, code_execution_tool_ids):
+                    continue
+                block["cache_control"] = cache_control
+                return True
+        elif isinstance(content, str):
+            formatted_message["content"] = [
+                {
+                    "type": "text",
+                    "text": content,
+                    "cache_control": cache_control,
+                }
+            ]
+            return True
+    return False
+
+
 class AnthropicContextOverflowError(anthropic.BadRequestError, ContextOverflowError):
    """BadRequestError raised when input exceeds Anthropic's context limit."""

@@ -1093,6 +1203,32 @@ class ChatAnthropic(BaseChatModel):

        system, formatted_messages = _format_messages(messages)

+        # Only the direct Anthropic API accepts top-level `cache_control`.
+        # Subclasses that route through other transports (e.g. Bedrock) expand
+        # `cache_control` kwargs into block-level breakpoints, the only form
+        # those transports accept.
+        if not _is_direct_anthropic_llm_type(getattr(self, "_llm_type", None)):
+            cache_control = kwargs.pop("cache_control", None)
+            # Empty `formatted_messages` has nothing to attach a breakpoint to;
+            # skip silently. The warning below is reserved for the surprising
+            # case where messages exist but every candidate block is ineligible.
+            if cache_control and formatted_messages:
+                code_execution_tool_ids = _collect_code_execution_tool_ids(
+                    formatted_messages
+                )
+                applied = _apply_cache_control_to_last_eligible_block(
+                    formatted_messages, cache_control, code_execution_tool_ids
+                )
+                if not applied:
+                    warnings.warn(
+                        "`cache_control` kwarg was dropped: no eligible "
+                        "content block found (all candidates are "
+                        "`code_execution`-related, which Anthropic forbids "
+                        "breakpoints on).",
+                        UserWarning,
+                        stacklevel=2,
+                    )
+
        payload = {
            "model": self.model,
            "max_tokens": self.max_tokens,