mirror of
https://github.com/hwchase17/langchain.git
synced 2026-06-09 10:17:00 +00:00
fix(anthropic): restore cache_control on non-direct subclasses (#37057)
Closes #37042 --- `AnthropicPromptCachingMiddleware` was unconditionally setting top-level `cache_control` in `model_settings` for any `ChatAnthropic` subclass. That field is direct-Anthropic-API only — `ChatAnthropicBedrock` (which subclasses `ChatAnthropic` and passed the existing `isinstance` gate) errored with `cache_control: Extra inputs are not permitted`. Investigating that surfaced a related regression: PR #35967 also deleted the block-level `cache_control` injection in `_get_request_payload`, which silently disabled caching entirely for non-direct subclasses (Bedrock had been falling back to in-block breakpoints). This restores both paths. ## Changes - Add `_is_direct_anthropic_llm_type` predicate that allowlists `_llm_type == "anthropic-chat"`. Both the middleware's `_supports_automatic_caching` and the new branch in `ChatAnthropic._get_request_payload` route through it, so any subclass that overrides `_llm_type` (Bedrock today, future direct-API variants tomorrow) is treated as non-direct by default. Replaces the prior substring-matching denylist on `"bedrock"`/`"vertex"`. - Restore `_collect_code_execution_tool_ids`, `_is_code_execution_related_block`, and a new `_apply_cache_control_to_last_eligible_block` helper in `chat_models`. For non-direct subclasses, `_get_request_payload` now pops `cache_control` from kwargs and walks messages newest-to-oldest, attaching the breakpoint to the last block that isn't `code_execution`-related (Anthropic forbids breakpoints on those). - Emit `UserWarning` when `cache_control` is requested but every candidate block is `code_execution`-related — previously a silent drop. - `AnthropicPromptCachingMiddleware._apply_caching` now sets the top-level `cache_control` only when `_supports_automatic_caching(request.model)`. System-message and tool-definition breakpoints continue to apply for all `ChatAnthropic` subclasses, since those are accepted by every transport. - Note: `ChatAnthropicVertex` does not subclass `ChatAnthropic` (it lives in `langchain-google-vertexai` and ships its own `_get_request_payload`), so the chat-models changes here only affect Bedrock. The middleware-side gate covers Vertex implicitly via the `isinstance(request.model, ChatAnthropic)` check that already excludes it.
This commit is contained in:
@@ -688,6 +688,116 @@ def _format_messages(
|
||||
return system, formatted_messages
|
||||
|
||||
|
||||
def _collect_code_execution_tool_ids(formatted_messages: list[dict]) -> set[str]:
|
||||
"""Collect `tool_use` IDs that were called by `code_execution`.
|
||||
|
||||
These blocks cannot have `cache_control` applied per Anthropic API
|
||||
requirements.
|
||||
"""
|
||||
code_execution_tool_ids: set[str] = set()
|
||||
|
||||
for message in formatted_messages:
|
||||
if message.get("role") != "assistant":
|
||||
continue
|
||||
content = message.get("content", [])
|
||||
if not isinstance(content, list):
|
||||
continue
|
||||
for block in content:
|
||||
if not isinstance(block, dict):
|
||||
continue
|
||||
if block.get("type") != "tool_use":
|
||||
continue
|
||||
caller = block.get("caller")
|
||||
if isinstance(caller, dict):
|
||||
caller_type = caller.get("type", "")
|
||||
if caller_type.startswith("code_execution"):
|
||||
tool_id = block.get("id")
|
||||
if tool_id:
|
||||
code_execution_tool_ids.add(tool_id)
|
||||
|
||||
return code_execution_tool_ids
|
||||
|
||||
|
||||
def _is_code_execution_related_block(
|
||||
block: dict,
|
||||
code_execution_tool_ids: set[str],
|
||||
) -> bool:
|
||||
"""Return whether a content block is related to `code_execution`.
|
||||
|
||||
Returns `True` for blocks that should NOT have `cache_control` applied.
|
||||
"""
|
||||
if not isinstance(block, dict):
|
||||
return False
|
||||
|
||||
block_type = block.get("type")
|
||||
|
||||
if block_type == "tool_use":
|
||||
caller = block.get("caller")
|
||||
if isinstance(caller, dict):
|
||||
caller_type = caller.get("type", "")
|
||||
if caller_type.startswith("code_execution"):
|
||||
return True
|
||||
|
||||
if block_type == "tool_result":
|
||||
tool_use_id = block.get("tool_use_id")
|
||||
if tool_use_id and tool_use_id in code_execution_tool_ids:
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
|
||||
def _is_direct_anthropic_llm_type(llm_type: object) -> bool:
|
||||
"""Return whether an `_llm_type` reaches Claude via the direct Anthropic API.
|
||||
|
||||
Only the direct API accepts the top-level `cache_control` request param.
|
||||
Subclasses that route through other transports (Bedrock, future backends)
|
||||
override `_llm_type` and must expand `cache_control` kwargs into
|
||||
block-level breakpoints instead.
|
||||
|
||||
Non-string `_llm_type` values return `False` rather than raising, so a
|
||||
misbehaving subclass falls through to the safer non-direct branch.
|
||||
"""
|
||||
return llm_type == "anthropic-chat"
|
||||
|
||||
|
||||
def _apply_cache_control_to_last_eligible_block(
|
||||
formatted_messages: list[dict],
|
||||
cache_control: Any,
|
||||
code_execution_tool_ids: set[str],
|
||||
) -> bool:
|
||||
"""Place `cache_control` on the last block eligible for a breakpoint.
|
||||
|
||||
Walks messages newest-to-oldest and, within each, blocks newest-to-oldest,
|
||||
skipping `code_execution`-related blocks (Anthropic rejects breakpoints
|
||||
there). String message content is promoted to a single text block so the
|
||||
breakpoint can be attached.
|
||||
|
||||
Returns:
|
||||
`True` if a breakpoint was applied, `False` if every candidate was
|
||||
`code_execution`-related (caller should warn and drop the kwarg).
|
||||
"""
|
||||
for formatted_message in reversed(formatted_messages):
|
||||
content = formatted_message.get("content")
|
||||
if isinstance(content, list) and content:
|
||||
for block in reversed(content):
|
||||
if not isinstance(block, dict):
|
||||
continue
|
||||
if _is_code_execution_related_block(block, code_execution_tool_ids):
|
||||
continue
|
||||
block["cache_control"] = cache_control
|
||||
return True
|
||||
elif isinstance(content, str):
|
||||
formatted_message["content"] = [
|
||||
{
|
||||
"type": "text",
|
||||
"text": content,
|
||||
"cache_control": cache_control,
|
||||
}
|
||||
]
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
class AnthropicContextOverflowError(anthropic.BadRequestError, ContextOverflowError):
|
||||
"""BadRequestError raised when input exceeds Anthropic's context limit."""
|
||||
|
||||
@@ -1093,6 +1203,32 @@ class ChatAnthropic(BaseChatModel):
|
||||
|
||||
system, formatted_messages = _format_messages(messages)
|
||||
|
||||
# Only the direct Anthropic API accepts top-level `cache_control`.
|
||||
# Subclasses that route through other transports (e.g. Bedrock) expand
|
||||
# `cache_control` kwargs into block-level breakpoints, the only form
|
||||
# those transports accept.
|
||||
if not _is_direct_anthropic_llm_type(getattr(self, "_llm_type", None)):
|
||||
cache_control = kwargs.pop("cache_control", None)
|
||||
# Empty `formatted_messages` has nothing to attach a breakpoint to;
|
||||
# skip silently. The warning below is reserved for the surprising
|
||||
# case where messages exist but every candidate block is ineligible.
|
||||
if cache_control and formatted_messages:
|
||||
code_execution_tool_ids = _collect_code_execution_tool_ids(
|
||||
formatted_messages
|
||||
)
|
||||
applied = _apply_cache_control_to_last_eligible_block(
|
||||
formatted_messages, cache_control, code_execution_tool_ids
|
||||
)
|
||||
if not applied:
|
||||
warnings.warn(
|
||||
"`cache_control` kwarg was dropped: no eligible "
|
||||
"content block found (all candidates are "
|
||||
"`code_execution`-related, which Anthropic forbids "
|
||||
"breakpoints on).",
|
||||
UserWarning,
|
||||
stacklevel=2,
|
||||
)
|
||||
|
||||
payload = {
|
||||
"model": self.model,
|
||||
"max_tokens": self.max_tokens,
|
||||
|
||||
Reference in New Issue
Block a user