From 7a4594b6820983eb652fdc6b180e7f6974c81f75 Mon Sep 17 00:00:00 2001
From: Mason Daugherty <mason@langchain.dev>
Date: Tue, 28 Apr 2026 16:41:22 -0400
Subject: [PATCH] fix(anthropic): restore `cache_control` on non-direct
 subclasses (#37057)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Closes #37042

---

`AnthropicPromptCachingMiddleware` was unconditionally setting top-level
`cache_control` in `model_settings` for any `ChatAnthropic` subclass.
That field is direct-Anthropic-API only — `ChatAnthropicBedrock` (which
subclasses `ChatAnthropic` and passed the existing `isinstance` gate)
errored with `cache_control: Extra inputs are not permitted`.
Investigating that surfaced a related regression: PR #35967 also deleted
the block-level `cache_control` injection in `_get_request_payload`,
which silently disabled caching entirely for non-direct subclasses
(Bedrock had been falling back to in-block breakpoints). This restores
both paths.

## Changes
- Add `_is_direct_anthropic_llm_type` predicate that allowlists
`_llm_type == "anthropic-chat"`. Both the middleware's
`_supports_automatic_caching` and the new branch in
`ChatAnthropic._get_request_payload` route through it, so any subclass
that overrides `_llm_type` (Bedrock today, future direct-API variants
tomorrow) is treated as non-direct by default. Replaces the prior
substring-matching denylist on `"bedrock"`/`"vertex"`.
- Restore `_collect_code_execution_tool_ids`,
`_is_code_execution_related_block`, and a new
`_apply_cache_control_to_last_eligible_block` helper in `chat_models`.
For non-direct subclasses, `_get_request_payload` now pops
`cache_control` from kwargs and walks messages newest-to-oldest,
attaching the breakpoint to the last block that isn't
`code_execution`-related (Anthropic forbids breakpoints on those).
- Emit `UserWarning` when `cache_control` is requested but every
candidate block is `code_execution`-related — previously a silent drop.
- `AnthropicPromptCachingMiddleware._apply_caching` now sets the
top-level `cache_control` only when
`_supports_automatic_caching(request.model)`. System-message and
tool-definition breakpoints continue to apply for all `ChatAnthropic`
subclasses, since those are accepted by every transport.
- Note: `ChatAnthropicVertex` does not subclass `ChatAnthropic` (it
lives in `langchain-google-vertexai` and ships its own
`_get_request_payload`), so the chat-models changes here only affect
Bedrock. The middleware-side gate covers Vertex implicitly via the
`isinstance(request.model, ChatAnthropic)` check that already excludes
it.
---
 libs/model-profiles/uv.lock                   |  14 +-
 .../langchain_anthropic/chat_models.py        | 136 ++++++++++++
 .../middleware/prompt_caching.py              |  10 +-
 .../middleware/test_prompt_caching.py         |  97 ++++++++
 .../tests/unit_tests/test_chat_models.py      | 208 ++++++++++++++++++
 libs/partners/anthropic/uv.lock               |  14 +-
 6 files changed, 462 insertions(+), 17 deletions(-)

diff --git a/libs/model-profiles/uv.lock b/libs/model-profiles/uv.lock
index 895f24b4282..1a4430a17a5 100644
--- a/libs/model-profiles/uv.lock
+++ b/libs/model-profiles/uv.lock
@@ -495,7 +495,7 @@ requires-dist = [
     { name = "langchain-perplexity", marker = "extra == 'perplexity'" },
     { name = "langchain-together", marker = "extra == 'together'" },
     { name = "langchain-xai", marker = "extra == 'xai'" },
-    { name = "langgraph", specifier = ">=1.1.5,<1.2.0" },
+    { name = "langgraph", specifier = ">=1.1.10,<1.2.0" },
     { name = "pydantic", specifier = ">=2.7.4,<3.0.0" },
 ]
 provides-extras = ["community", "anthropic", "openai", "azure-ai", "google-vertexai", "google-genai", "fireworks", "ollama", "together", "mistralai", "huggingface", "groq", "aws", "baseten", "deepseek", "xai", "perplexity"]
@@ -724,7 +724,7 @@ wheels = [
 
 [[package]]
 name = "langgraph"
-version = "1.1.6"
+version = "1.1.10"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "langchain-core" },
@@ -734,9 +734,9 @@ dependencies = [
     { name = "pydantic" },
     { name = "xxhash" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/5c/e5/d3f72ead3c7f15769d5a9c07e373628f1fbaf6cbe7735694d7085859acf6/langgraph-1.1.6.tar.gz", hash = "sha256:1783f764b08a607e9f288dbcf6da61caeb0dd40b337e5c9fb8b412341fbc0b60", size = 549634, upload-time = "2026-04-03T19:01:32.561Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/9a/b3/7dec224369c7938eb3227ff69542a0d0f517862a0d27945b8c395f2a781f/langgraph-1.1.10.tar.gz", hash = "sha256:3115beb58203283c98d8752a90c034f3432177d2979a1fe205f76e5f1b744500", size = 560685, upload-time = "2026-04-27T17:19:10.426Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/71/e6/b36ecdb3ff4ba9a290708d514bae89ebbe2f554b6abbe4642acf3fddbe51/langgraph-1.1.6-py3-none-any.whl", hash = "sha256:fdbf5f54fa5a5a4c4b09b7b5e537f1b2fa283d2f0f610d3457ddeecb479458b9", size = 169755, upload-time = "2026-04-03T19:01:30.686Z" },
+    { url = "https://files.pythonhosted.org/packages/80/07/057dc1aa7991115fca53f1fa6573a7cc0dd296c05360c672cc67fdb6245b/langgraph-1.1.10-py3-none-any.whl", hash = "sha256:8a4f163f72f4401648d0c11b48ee906947d938ba8cf1f474540fe591534f0d17", size = 173750, upload-time = "2026-04-27T17:19:09.073Z" },
 ]
 
 [[package]]
@@ -754,15 +754,15 @@ wheels = [
 
 [[package]]
 name = "langgraph-prebuilt"
-version = "1.0.9"
+version = "1.0.12"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "langchain-core" },
     { name = "langgraph-checkpoint" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/99/4c/06dac899f4945bedb0c3a1583c19484c2cc894114ea30d9a538dd270086e/langgraph_prebuilt-1.0.9.tar.gz", hash = "sha256:93de7512e9caade4b77ead92428f6215c521fdb71b8ffda8cd55f0ad814e64de", size = 165850, upload-time = "2026-04-03T14:06:37.721Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/ed/8b/5fff4c63bbfef1475d577e13f5970f91955a4069d8dc4adbaeef92f36732/langgraph_prebuilt-1.0.12.tar.gz", hash = "sha256:edcb11ff29996def816243f267fb2c85c0a2e4fb618c275f3d238aee8dd6a5ec", size = 172831, upload-time = "2026-04-27T17:14:27.152Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/1d/a2/8368ac187b75e7f9d938ca075d34f116683f5cfc48d924029ee79aea147b/langgraph_prebuilt-1.0.9-py3-none-any.whl", hash = "sha256:776c8e3154a5aef5ad0e5bf3f263f2dcaab3983786cc20014b7f955d99d2d1b2", size = 35958, upload-time = "2026-04-03T14:06:36.58Z" },
+    { url = "https://files.pythonhosted.org/packages/53/75/1e6e6fd478a1b1e643de03505570103dcb89c57c429c0fd3084d521e522e/langgraph_prebuilt-1.0.12-py3-none-any.whl", hash = "sha256:ab83822d2724d434d3536dc127b86c7d16fe3fb8dc02a89a683bc77b2e55f6e9", size = 37195, upload-time = "2026-04-27T17:14:25.788Z" },
 ]
 
 [[package]]
diff --git a/libs/partners/anthropic/langchain_anthropic/chat_models.py b/libs/partners/anthropic/langchain_anthropic/chat_models.py
index 380f0671422..66b288bad6a 100644
--- a/libs/partners/anthropic/langchain_anthropic/chat_models.py
+++ b/libs/partners/anthropic/langchain_anthropic/chat_models.py
@@ -688,6 +688,116 @@ def _format_messages(
     return system, formatted_messages
 
 
+def _collect_code_execution_tool_ids(formatted_messages: list[dict]) -> set[str]:
+    """Collect `tool_use` IDs that were called by `code_execution`.
+
+    These blocks cannot have `cache_control` applied per Anthropic API
+    requirements.
+    """
+    code_execution_tool_ids: set[str] = set()
+
+    for message in formatted_messages:
+        if message.get("role") != "assistant":
+            continue
+        content = message.get("content", [])
+        if not isinstance(content, list):
+            continue
+        for block in content:
+            if not isinstance(block, dict):
+                continue
+            if block.get("type") != "tool_use":
+                continue
+            caller = block.get("caller")
+            if isinstance(caller, dict):
+                caller_type = caller.get("type", "")
+                if caller_type.startswith("code_execution"):
+                    tool_id = block.get("id")
+                    if tool_id:
+                        code_execution_tool_ids.add(tool_id)
+
+    return code_execution_tool_ids
+
+
+def _is_code_execution_related_block(
+    block: dict,
+    code_execution_tool_ids: set[str],
+) -> bool:
+    """Return whether a content block is related to `code_execution`.
+
+    Returns `True` for blocks that should NOT have `cache_control` applied.
+    """
+    if not isinstance(block, dict):
+        return False
+
+    block_type = block.get("type")
+
+    if block_type == "tool_use":
+        caller = block.get("caller")
+        if isinstance(caller, dict):
+            caller_type = caller.get("type", "")
+            if caller_type.startswith("code_execution"):
+                return True
+
+    if block_type == "tool_result":
+        tool_use_id = block.get("tool_use_id")
+        if tool_use_id and tool_use_id in code_execution_tool_ids:
+            return True
+
+    return False
+
+
+def _is_direct_anthropic_llm_type(llm_type: object) -> bool:
+    """Return whether an `_llm_type` reaches Claude via the direct Anthropic API.
+
+    Only the direct API accepts the top-level `cache_control` request param.
+    Subclasses that route through other transports (Bedrock, future backends)
+    override `_llm_type` and must expand `cache_control` kwargs into
+    block-level breakpoints instead.
+
+    Non-string `_llm_type` values return `False` rather than raising, so a
+    misbehaving subclass falls through to the safer non-direct branch.
+    """
+    return llm_type == "anthropic-chat"
+
+
+def _apply_cache_control_to_last_eligible_block(
+    formatted_messages: list[dict],
+    cache_control: Any,
+    code_execution_tool_ids: set[str],
+) -> bool:
+    """Place `cache_control` on the last block eligible for a breakpoint.
+
+    Walks messages newest-to-oldest and, within each, blocks newest-to-oldest,
+    skipping `code_execution`-related blocks (Anthropic rejects breakpoints
+    there). String message content is promoted to a single text block so the
+    breakpoint can be attached.
+
+    Returns:
+        `True` if a breakpoint was applied, `False` if every candidate was
+            `code_execution`-related (caller should warn and drop the kwarg).
+    """
+    for formatted_message in reversed(formatted_messages):
+        content = formatted_message.get("content")
+        if isinstance(content, list) and content:
+            for block in reversed(content):
+                if not isinstance(block, dict):
+                    continue
+                if _is_code_execution_related_block(block, code_execution_tool_ids):
+                    continue
+                block["cache_control"] = cache_control
+                return True
+        elif isinstance(content, str):
+            formatted_message["content"] = [
+                {
+                    "type": "text",
+                    "text": content,
+                    "cache_control": cache_control,
+                }
+            ]
+            return True
+    return False
+
+
 class AnthropicContextOverflowError(anthropic.BadRequestError, ContextOverflowError):
     """BadRequestError raised when input exceeds Anthropic's context limit."""
 
@@ -1093,6 +1203,32 @@ class ChatAnthropic(BaseChatModel):
 
         system, formatted_messages = _format_messages(messages)
 
+        # Only the direct Anthropic API accepts top-level `cache_control`.
+        # Subclasses that route through other transports (e.g. Bedrock) expand
+        # `cache_control` kwargs into block-level breakpoints, the only form
+        # those transports accept.
+        if not _is_direct_anthropic_llm_type(getattr(self, "_llm_type", None)):
+            cache_control = kwargs.pop("cache_control", None)
+            # Empty `formatted_messages` has nothing to attach a breakpoint to;
+            # skip silently. The warning below is reserved for the surprising
+            # case where messages exist but every candidate block is ineligible.
+            if cache_control and formatted_messages:
+                code_execution_tool_ids = _collect_code_execution_tool_ids(
+                    formatted_messages
+                )
+                applied = _apply_cache_control_to_last_eligible_block(
+                    formatted_messages, cache_control, code_execution_tool_ids
+                )
+                if not applied:
+                    warnings.warn(
+                        "`cache_control` kwarg was dropped: no eligible "
+                        "content block found (all candidates are "
+                        "`code_execution`-related, which Anthropic forbids "
+                        "breakpoints on).",
+                        UserWarning,
+                        stacklevel=2,
+                    )
+
         payload = {
             "model": self.model,
             "max_tokens": self.max_tokens,
diff --git a/libs/partners/anthropic/langchain_anthropic/middleware/prompt_caching.py b/libs/partners/anthropic/langchain_anthropic/middleware/prompt_caching.py
index eb35b6b974f..395e7421737 100644
--- a/libs/partners/anthropic/langchain_anthropic/middleware/prompt_caching.py
+++ b/libs/partners/anthropic/langchain_anthropic/middleware/prompt_caching.py
@@ -42,11 +42,11 @@ class AnthropicPromptCachingMiddleware(AgentMiddleware):
     Applies cache control breakpoints to:
 
     - **System message**: Tags the last content block of the system message
-      with `cache_control` so static system prompt content is cached.
+        with `cache_control` so static system prompt content is cached.
     - **Tools**: Tags all tool definitions with `cache_control` so tool
-      schemas are cached across turns.
+        schemas are cached across turns.
     - **Last cacheable block**: Tags last cacheable block of message sequence using
-      Anthropic's automatic caching feature.
+        Anthropic's automatic caching feature.
 
     Learn more about Anthropic prompt caching
     [here](https://platform.claude.com/docs/en/build-with-claude/prompt-caching).
@@ -128,6 +128,10 @@ class AnthropicPromptCachingMiddleware(AgentMiddleware):
         overrides: dict[str, Any] = {}
         cache_control = self._cache_control
 
+        # Always set top-level `cache_control` on model settings. The Anthropic
+        # chat model translates the kwarg to the correct wire format for the
+        # active transport: direct API receives it as-is, while Bedrock has it
+        # expanded into a block-level breakpoint by `_get_request_payload`.
         overrides["model_settings"] = {
             **request.model_settings,
             "cache_control": cache_control,
diff --git a/libs/partners/anthropic/tests/unit_tests/middleware/test_prompt_caching.py b/libs/partners/anthropic/tests/unit_tests/middleware/test_prompt_caching.py
index d8221cfca17..a45d3e0d77e 100644
--- a/libs/partners/anthropic/tests/unit_tests/middleware/test_prompt_caching.py
+++ b/libs/partners/anthropic/tests/unit_tests/middleware/test_prompt_caching.py
@@ -70,6 +70,7 @@ def test_anthropic_prompt_caching_middleware_initialization() -> None:
 
     # Create a mock ChatAnthropic instance
     mock_chat_anthropic = MagicMock(spec=ChatAnthropic)
+    mock_chat_anthropic._llm_type = "anthropic-chat"
 
     fake_request = ModelRequest(
         model=mock_chat_anthropic,
@@ -155,6 +156,7 @@ async def test_anthropic_prompt_caching_middleware_async() -> None:
 
     # Create a mock ChatAnthropic instance
     mock_chat_anthropic = MagicMock(spec=ChatAnthropic)
+    mock_chat_anthropic._llm_type = "anthropic-chat"
 
     fake_request = ModelRequest(
         model=mock_chat_anthropic,
@@ -270,6 +272,7 @@ async def test_anthropic_prompt_caching_middleware_async_with_system_prompt() ->
 
     # Create a mock ChatAnthropic instance
     mock_chat_anthropic = MagicMock(spec=ChatAnthropic)
+    mock_chat_anthropic._llm_type = "anthropic-chat"
 
     # Test with system prompt: 2 messages + 1 system = 3 total (meets minimum)
     fake_request = ModelRequest(
@@ -307,6 +310,7 @@ async def test_anthropic_prompt_caching_middleware_async_default_values() -> Non
 
     # Create a mock ChatAnthropic instance
     mock_chat_anthropic = MagicMock(spec=ChatAnthropic)
+    mock_chat_anthropic._llm_type = "anthropic-chat"
 
     # Single message should trigger caching with default settings
     fake_request = ModelRequest(
@@ -572,3 +576,96 @@ class TestToolCaching:
             "type": "ephemeral",
             "ttl": "1h",
         }
+
+
+class TestBedrockCompatibility:
+    """The middleware applies caching uniformly across transports.
+
+    `model_settings["cache_control"]` is always set; the chat model layer
+    (`ChatAnthropic._get_request_payload`) translates the kwarg to the wire
+    format the active transport accepts — top-level for the direct API,
+    block-level for Bedrock.
+    """
+
+    def _bedrock_model(self) -> Any:
+        mock_model = MagicMock(spec=ChatAnthropic)
+        mock_model._llm_type = "anthropic-bedrock-chat"
+        return mock_model
+
+    def _make_request(self, model: Any, **kwargs: Any) -> ModelRequest:
+        defaults: dict[str, Any] = {
+            "model": model,
+            "messages": [HumanMessage("Hello")],
+            "system_message": None,
+            "tool_choice": None,
+            "tools": [],
+            "response_format": None,
+            "state": {"messages": [HumanMessage("Hello")]},
+            "runtime": cast(Runtime, object()),
+            "model_settings": {},
+        }
+        defaults.update(kwargs)
+        return ModelRequest(**defaults)
+
+    def _capture(self, request: ModelRequest) -> ModelRequest:
+        middleware = AnthropicPromptCachingMiddleware()
+        captured: ModelRequest | None = None
+
+        def handler(req: ModelRequest) -> ModelResponse:
+            nonlocal captured
+            captured = req
+            return ModelResponse(result=[AIMessage(content="ok")])
+
+        middleware.wrap_model_call(request, handler)
+        assert captured is not None
+        return captured
+
+    def test_sets_model_settings_cache_control_for_bedrock(self) -> None:
+        request = self._make_request(self._bedrock_model())
+        captured = self._capture(request)
+        assert captured.model_settings["cache_control"] == {
+            "type": "ephemeral",
+            "ttl": "5m",
+        }
+
+    def test_tags_system_message_for_bedrock(self) -> None:
+        request = self._make_request(
+            self._bedrock_model(),
+            system_message=SystemMessage("Base prompt"),
+        )
+        captured = self._capture(request)
+        assert captured.system_message is not None
+        content = captured.system_message.content
+        assert isinstance(content, list)
+        assert content[-1]["cache_control"] == {"type": "ephemeral", "ttl": "5m"}  # type: ignore[index]
+
+    def test_tags_tools_for_bedrock(self) -> None:
+        @tool
+        def my_tool(x: str) -> str:
+            """A tool."""
+            return x
+
+        request = self._make_request(self._bedrock_model(), tools=[my_tool])
+        captured = self._capture(request)
+        assert captured.tools is not None
+        last = captured.tools[-1]
+        assert isinstance(last, BaseTool)
+        assert last.extras is not None
+        assert last.extras["cache_control"] == {"type": "ephemeral", "ttl": "5m"}
+
+    async def test_sets_model_settings_cache_control_for_bedrock_async(self) -> None:
+        request = self._make_request(self._bedrock_model())
+        middleware = AnthropicPromptCachingMiddleware()
+        captured: ModelRequest | None = None
+
+        async def handler(req: ModelRequest) -> ModelResponse:
+            nonlocal captured
+            captured = req
+            return ModelResponse(result=[AIMessage(content="ok")])
+
+        await middleware.awrap_model_call(request, handler)
+        assert captured is not None
+        assert captured.model_settings["cache_control"] == {
+            "type": "ephemeral",
+            "ttl": "5m",
+        }
diff --git a/libs/partners/anthropic/tests/unit_tests/test_chat_models.py b/libs/partners/anthropic/tests/unit_tests/test_chat_models.py
index b861cc1acaf..710cc9d24e8 100644
--- a/libs/partners/anthropic/tests/unit_tests/test_chat_models.py
+++ b/libs/partners/anthropic/tests/unit_tests/test_chat_models.py
@@ -1723,6 +1723,214 @@ def test_cache_control_kwarg() -> None:
     ]
 
 
+class _BedrockLikeAnthropic(ChatAnthropic):
+    """Stand-in for `ChatAnthropicBedrock` for `_llm_type`-based gating tests.
+
+    Vertex is not modeled here: `langchain-google-vertexai`'s
+    `ChatAnthropicVertex` does not subclass `ChatAnthropic` and ships its own
+    `_get_request_payload`, so it never reaches the gate under test.
+    """
+
+    @property
+    def _llm_type(self) -> str:
+        return "anthropic-bedrock-chat"
+
+
+def test_cache_control_kwarg_bedrock_injects_into_blocks() -> None:
+    """Non-direct subclasses must place `cache_control` inside the last block.
+
+    Transports like Bedrock reject the top-level `cache_control` field, so
+    the kwarg has to be expanded into a nested breakpoint to remain effective.
+    """
+    llm = _BedrockLikeAnthropic(model=MODEL_NAME)
+
+    messages = [HumanMessage("foo"), AIMessage("bar"), HumanMessage("baz")]
+    payload = llm._get_request_payload(messages, cache_control={"type": "ephemeral"})
+
+    assert "cache_control" not in payload
+    last_message = payload["messages"][-1]
+    assert last_message["content"] == [
+        {"type": "text", "text": "baz", "cache_control": {"type": "ephemeral"}}
+    ]
+
+
+def test_cache_control_kwarg_bedrock_with_list_content() -> None:
+    """`cache_control` lands on the last block when content is already a list."""
+    llm = _BedrockLikeAnthropic(model=MODEL_NAME)
+
+    messages = [HumanMessage([{"type": "text", "text": "foo"}])]
+    payload = llm._get_request_payload(
+        messages, cache_control={"type": "ephemeral", "ttl": "1h"}
+    )
+
+    assert "cache_control" not in payload
+    last_block = payload["messages"][-1]["content"][-1]
+    assert last_block["cache_control"] == {"type": "ephemeral", "ttl": "1h"}
+
+
+def test_cache_control_kwarg_bedrock_skips_code_execution_blocks() -> None:
+    """`cache_control` must skip `code_execution`-related blocks.
+
+    Anthropic rejects breakpoints applied to those blocks, so the injector
+    walks backwards until it finds an eligible block.
+    """
+    llm = _BedrockLikeAnthropic(model=MODEL_NAME)
+
+    ai_message = AIMessage(
+        content=[
+            {"type": "text", "text": "earlier text"},
+            {
+                "type": "tool_use",
+                "id": "toolu_code_exec_1",
+                "name": "get_weather",
+                "input": {"location": "NYC"},
+                "caller": {
+                    "type": "code_execution_20250825",
+                    "tool_id": "srvtoolu_abc",
+                },
+            },
+        ]
+    )
+
+    payload = llm._get_request_payload(
+        [HumanMessage("hi"), ai_message],
+        cache_control={"type": "ephemeral"},
+    )
+
+    last_content = payload["messages"][-1]["content"]
+    assert last_content[0]["cache_control"] == {"type": "ephemeral"}
+    assert "cache_control" not in last_content[1]
+
+
+def test_cache_control_kwarg_bedrock_walks_back_to_earlier_message() -> None:
+    """When the last message has no eligible blocks, walk back to a prior one.
+
+    Pins the contract that `reversed(formatted_messages)` is intentional: a
+    refactor that only inspects the last message would silently regress.
+    """
+    llm = _BedrockLikeAnthropic(model=MODEL_NAME)
+
+    ai_message = AIMessage(
+        content=[
+            {
+                "type": "tool_use",
+                "id": "toolu_code_exec_1",
+                "name": "noop",
+                "input": {},
+                "caller": {
+                    "type": "code_execution_20250825",
+                    "tool_id": "srvtoolu_abc",
+                },
+            }
+        ]
+    )
+
+    payload = llm._get_request_payload(
+        [HumanMessage("earlier"), ai_message],
+        cache_control={"type": "ephemeral"},
+    )
+
+    first_message_content = payload["messages"][0]["content"]
+    assert first_message_content == [
+        {"type": "text", "text": "earlier", "cache_control": {"type": "ephemeral"}}
+    ]
+    last_message_content = payload["messages"][-1]["content"]
+    assert all("cache_control" not in block for block in last_message_content)
+
+
+def test_cache_control_kwarg_bedrock_no_eligible_block_warns() -> None:
+    """When every candidate is `code_execution`-related, warn and drop the kwarg.
+
+    Pins the silent-drop contract: payload remains valid for Anthropic, but
+    the caller is told their cache request was skipped.
+    """
+    llm = _BedrockLikeAnthropic(model=MODEL_NAME)
+
+    ai_message = AIMessage(
+        content=[
+            {
+                "type": "tool_use",
+                "id": "toolu_code_exec_1",
+                "name": "noop",
+                "input": {},
+                "caller": {
+                    "type": "code_execution_20250825",
+                    "tool_id": "srvtoolu_abc",
+                },
+            }
+        ]
+    )
+
+    with pytest.warns(UserWarning, match="cache_control.*dropped"):
+        payload = llm._get_request_payload(
+            [ai_message],
+            cache_control={"type": "ephemeral"},
+        )
+
+    assert "cache_control" not in payload
+    only_block = payload["messages"][-1]["content"][0]
+    assert "cache_control" not in only_block
+
+
+def test_cache_control_absent_kwarg_bedrock_is_noop() -> None:
+    """Without a `cache_control` kwarg, the Bedrock branch must not mutate."""
+    llm = _BedrockLikeAnthropic(model=MODEL_NAME)
+
+    messages = [HumanMessage("foo"), AIMessage("bar"), HumanMessage("baz")]
+    payload = llm._get_request_payload(messages)
+
+    assert "cache_control" not in payload
+    for message in payload["messages"]:
+        content = message["content"]
+        if isinstance(content, list):
+            for block in content:
+                assert "cache_control" not in block
+
+
+def test_cache_control_kwarg_unknown_subclass_injects_into_blocks() -> None:
+    """Any subclass that overrides `_llm_type` is treated as non-direct.
+
+    The gate is allowlist-shaped on `"anthropic-chat"`, so a future subclass
+    routing through a new transport is safe by default rather than silently
+    sending an unsupported top-level field.
+    """
+
+    class _FutureTransportAnthropic(ChatAnthropic):
+        @property
+        def _llm_type(self) -> str:
+            return "anthropic-some-future-transport"
+
+    llm = _FutureTransportAnthropic(model=MODEL_NAME)
+    payload = llm._get_request_payload(
+        [HumanMessage("hello")],
+        cache_control={"type": "ephemeral"},
+    )
+
+    assert "cache_control" not in payload
+    assert payload["messages"][-1]["content"] == [
+        {"type": "text", "text": "hello", "cache_control": {"type": "ephemeral"}}
+    ]
+
+
+@pytest.mark.parametrize(
+    ("llm_type", "expected"),
+    [
+        ("anthropic-chat", True),
+        ("anthropic-bedrock-chat", False),
+        ("anthropic-chat-vertexai", False),
+        ("", False),
+        ("ANTHROPIC-CHAT", False),
+        (None, False),
+        (object(), False),
+    ],
+)
+def test_is_direct_anthropic_llm_type(llm_type: object, expected: bool) -> None:  # noqa: FBT001
+    """Predicate is exact-match and tolerates non-string inputs."""
+    from langchain_anthropic.chat_models import _is_direct_anthropic_llm_type
+
+    assert _is_direct_anthropic_llm_type(llm_type) is expected
+
+
 def test_context_management_in_payload() -> None:
     llm = ChatAnthropic(
         model=MODEL_NAME,  # type: ignore[call-arg]
diff --git a/libs/partners/anthropic/uv.lock b/libs/partners/anthropic/uv.lock
index 179f6b6fd1a..060eac1bf5d 100644
--- a/libs/partners/anthropic/uv.lock
+++ b/libs/partners/anthropic/uv.lock
@@ -538,7 +538,7 @@ requires-dist = [
     { name = "langchain-perplexity", marker = "extra == 'perplexity'" },
     { name = "langchain-together", marker = "extra == 'together'" },
     { name = "langchain-xai", marker = "extra == 'xai'" },
-    { name = "langgraph", specifier = ">=1.1.5,<1.2.0" },
+    { name = "langgraph", specifier = ">=1.1.10,<1.2.0" },
     { name = "pydantic", specifier = ">=2.7.4,<3.0.0" },
 ]
 provides-extras = ["community", "anthropic", "openai", "azure-ai", "google-vertexai", "google-genai", "fireworks", "ollama", "together", "mistralai", "huggingface", "groq", "aws", "baseten", "deepseek", "xai", "perplexity"]
@@ -779,7 +779,7 @@ typing = [
 
 [[package]]
 name = "langgraph"
-version = "1.1.6"
+version = "1.1.10"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "langchain-core" },
@@ -789,9 +789,9 @@ dependencies = [
     { name = "pydantic" },
     { name = "xxhash" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/5c/e5/d3f72ead3c7f15769d5a9c07e373628f1fbaf6cbe7735694d7085859acf6/langgraph-1.1.6.tar.gz", hash = "sha256:1783f764b08a607e9f288dbcf6da61caeb0dd40b337e5c9fb8b412341fbc0b60", size = 549634, upload-time = "2026-04-03T19:01:32.561Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/9a/b3/7dec224369c7938eb3227ff69542a0d0f517862a0d27945b8c395f2a781f/langgraph-1.1.10.tar.gz", hash = "sha256:3115beb58203283c98d8752a90c034f3432177d2979a1fe205f76e5f1b744500", size = 560685, upload-time = "2026-04-27T17:19:10.426Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/71/e6/b36ecdb3ff4ba9a290708d514bae89ebbe2f554b6abbe4642acf3fddbe51/langgraph-1.1.6-py3-none-any.whl", hash = "sha256:fdbf5f54fa5a5a4c4b09b7b5e537f1b2fa283d2f0f610d3457ddeecb479458b9", size = 169755, upload-time = "2026-04-03T19:01:30.686Z" },
+    { url = "https://files.pythonhosted.org/packages/80/07/057dc1aa7991115fca53f1fa6573a7cc0dd296c05360c672cc67fdb6245b/langgraph-1.1.10-py3-none-any.whl", hash = "sha256:8a4f163f72f4401648d0c11b48ee906947d938ba8cf1f474540fe591534f0d17", size = 173750, upload-time = "2026-04-27T17:19:09.073Z" },
 ]
 
 [[package]]
@@ -809,15 +809,15 @@ wheels = [
 
 [[package]]
 name = "langgraph-prebuilt"
-version = "1.0.9"
+version = "1.0.12"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "langchain-core" },
     { name = "langgraph-checkpoint" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/99/4c/06dac899f4945bedb0c3a1583c19484c2cc894114ea30d9a538dd270086e/langgraph_prebuilt-1.0.9.tar.gz", hash = "sha256:93de7512e9caade4b77ead92428f6215c521fdb71b8ffda8cd55f0ad814e64de", size = 165850, upload-time = "2026-04-03T14:06:37.721Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/ed/8b/5fff4c63bbfef1475d577e13f5970f91955a4069d8dc4adbaeef92f36732/langgraph_prebuilt-1.0.12.tar.gz", hash = "sha256:edcb11ff29996def816243f267fb2c85c0a2e4fb618c275f3d238aee8dd6a5ec", size = 172831, upload-time = "2026-04-27T17:14:27.152Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/1d/a2/8368ac187b75e7f9d938ca075d34f116683f5cfc48d924029ee79aea147b/langgraph_prebuilt-1.0.9-py3-none-any.whl", hash = "sha256:776c8e3154a5aef5ad0e5bf3f263f2dcaab3983786cc20014b7f955d99d2d1b2", size = 35958, upload-time = "2026-04-03T14:06:36.58Z" },
+    { url = "https://files.pythonhosted.org/packages/53/75/1e6e6fd478a1b1e643de03505570103dcb89c57c429c0fd3084d521e522e/langgraph_prebuilt-1.0.12-py3-none-any.whl", hash = "sha256:ab83822d2724d434d3536dc127b86c7d16fe3fb8dc02a89a683bc77b2e55f6e9", size = 37195, upload-time = "2026-04-27T17:14:25.788Z" },
 ]
 
 [[package]]