fix(anthropic): normalize cross-provider tool-call IDs (#37756)

Anthropic's API rejects `tool_use`/`tool_result` IDs that don't match `^[a-zA-Z0-9_-]+$` with a 400. When a conversation thread is replayed across providers — e.g. a user switches a running thread from Kimi (via Fireworks) to Claude — the prior turns carry tool-call IDs minted by the other provider (Kimi emits `functions.write_todos:0`, whose `.` and `:` are invalid), and the request fails. Tool-call IDs are now normalized to an Anthropic-compatible form during request formatting, with the original `tool_use.id` and its paired `tool_use_id` mapped identically so they stay linked.
2026-06-09 10:17:00 +00:00 · 2026-05-28 16:13:57 -04:00
parent 80ca60014f
commit 1a5403d848
2 changed files with 237 additions and 29 deletions
--- a/libs/partners/anthropic/langchain_anthropic/chat_models.py
+++ b/libs/partners/anthropic/langchain_anthropic/chat_models.py
@@ -4,6 +4,7 @@ from __future__ import annotations

 import copy
 import datetime
+import hashlib
 import json
 import re
 import warnings
@@ -237,6 +238,51 @@ def _format_image(url: str) -> dict:
    )


+_TOOL_CALL_ID_PATTERN = re.compile(r"^[a-zA-Z0-9_-]+$")
+"""Anthropic requires `tool_use`/`tool_result` IDs to match this pattern."""
+
+
+def _normalize_tool_call_id(tool_call_id: str | None) -> str | None:
+    """Map a tool-call ID to an Anthropic-compatible form if needed.
+
+    Anthropic rejects `tool_use`/`tool_result` IDs that don't match
+    `^[a-zA-Z0-9_-]+$`. IDs minted by other providers can violate this when a
+    thread is replayed across providers (e.g. Fireworks/Kimi emits
+    `functions.write_todos:0`, whose `.` and `:` are invalid). Valid IDs are
+    returned unchanged; invalid ones are hashed deterministically so that a
+    rewritten `tool_use.id` and its paired `tool_use_id` resolve to the same
+    value, both within a request and across turns.
+
+    Empty and `None` IDs are passed through unchanged so that a genuinely
+    malformed request surfaces as a clear error from Anthropic rather than
+    being masked by a synthesized ID.
+
+    Args:
+        tool_call_id: The tool-call ID to normalize.
+
+    Returns:
+        The original ID if it is empty, `None`, or already valid; otherwise a
+            deterministic Anthropic-compatible replacement.
+    """
+    if not tool_call_id or _TOOL_CALL_ID_PATTERN.match(tool_call_id):
+        return tool_call_id
+    digest = hashlib.sha256(tool_call_id.encode()).hexdigest()
+    return f"toolu_{digest[:24]}"
+
+
+def _normalize_block_tool_use_id(block: dict) -> dict:
+    """Return `block` with its `tool_use_id` normalized, if it carries one.
+
+    Mirrors `_normalize_tool_call_id` for `tool_result`-style content blocks so
+    that a `tool_use_id` arriving pre-structured (e.g. on a `ToolMessage` whose
+    content is already a list of `tool_result` blocks) stays consistent with its
+    paired, normalized `tool_use.id`. A no-op for already-valid IDs.
+    """
+    if "tool_use_id" in block:
+        return {**block, "tool_use_id": _normalize_tool_call_id(block["tool_use_id"])}
+    return block
+
+
 def _merge_messages(
    messages: Sequence[BaseMessage],
 ) -> list[SystemMessage | AIMessage | HumanMessage]:
@@ -272,7 +318,7 @@ def _merge_messages(
                tool_result: dict = {
                    "type": "tool_result",
                    "content": tool_content,
-                    "tool_use_id": curr.tool_call_id,
+                    "tool_use_id": _normalize_tool_call_id(curr.tool_call_id),
                    "is_error": curr.status == "error",
                }
                if cache_ctrl:
@@ -516,7 +562,7 @@ def _format_messages(
                                type="tool_use",
                                name=block["name"],
                                input=args,
-                                id=block["id"],
+                                id=cast("str", _normalize_tool_call_id(block["id"])),
                            )
                            if caller := block.get("caller"):
                                tool_use_block["caller"] = caller
@@ -595,24 +641,30 @@ def _format_messages(
                    ):
                        # Tool search results with tool_reference blocks
                        content.append(
-                            {
-                                k: v
-                                for k, v in block.items()
-                                if k
-                                in (
-                                    "type",
-                                    "content",
-                                    "tool_use_id",
-                                    "cache_control",
-                                )
-                            },
+                            _normalize_block_tool_use_id(
+                                {
+                                    k: v
+                                    for k, v in block.items()
+                                    if k
+                                    in (
+                                        "type",
+                                        "content",
+                                        "tool_use_id",
+                                        "cache_control",
+                                    )
+                                },
+                            ),
                        )
                    elif block["type"] == "tool_result":
                        # Regular tool results that need content formatting
                        tool_content = _format_messages(
                            [HumanMessage(block["content"])],
                        )[1][0]["content"]
-                        content.append({**block, "content": tool_content})
+                        content.append(
+                            _normalize_block_tool_use_id(
+                                {**block, "content": tool_content},
+                            ),
+                        )
                    elif block["type"] in (
                        "code_execution_tool_result",
                        "bash_code_execution_tool_result",
@@ -622,19 +674,21 @@ def _format_messages(
                        "web_fetch_tool_result",
                    ):
                        content.append(
-                            {
-                                k: v
-                                for k, v in block.items()
-                                if k
-                                in (
-                                    "type",
-                                    "content",
-                                    "tool_use_id",
-                                    "is_error",  # for mcp_tool_result
-                                    "cache_control",
-                                    "retrieved_at",  # for web_fetch_tool_result
-                                )
-                            },
+                            _normalize_block_tool_use_id(
+                                {
+                                    k: v
+                                    for k, v in block.items()
+                                    if k
+                                    in (
+                                        "type",
+                                        "content",
+                                        "tool_use_id",
+                                        "is_error",  # for mcp_tool_result
+                                        "cache_control",
+                                        "retrieved_at",  # for web_fetch_tool_result
+                                    )
+                                },
+                            ),
                        )
                    else:
                        content.append(block)
@@ -662,8 +716,13 @@ def _format_messages(
                for block in content
                if cast("dict", block)["type"] == "tool_use"
            ]
+            # `tool_use_ids` are already normalized via the branches above, so
+            # compare against the normalized tool-call ID to avoid emitting a
+            # duplicate `tool_use` block when the original ID was rewritten.
            missing_tool_calls = [
-                tc for tc in message.tool_calls if tc["id"] not in tool_use_ids
+                tc
+                for tc in message.tool_calls
+                if _normalize_tool_call_id(tc["id"]) not in tool_use_ids
            ]
            cast("list", content).extend(
                _lc_tool_calls_to_anthropic_tool_use_blocks(missing_tool_calls),
@@ -2224,7 +2283,7 @@ def _lc_tool_calls_to_anthropic_tool_use_blocks(
            type="tool_use",
            name=tool_call["name"],
            input=tool_call["args"],
-            id=cast("str", tool_call["id"]),
+            id=cast("str", _normalize_tool_call_id(tool_call["id"])),
        )
        for tool_call in tool_calls
    ]
--- a/libs/partners/anthropic/tests/unit_tests/test_chat_models.py
+++ b/libs/partners/anthropic/tests/unit_tests/test_chat_models.py
@@ -24,11 +24,13 @@ from pytest import CaptureFixture, MonkeyPatch

 from langchain_anthropic import ChatAnthropic
 from langchain_anthropic.chat_models import (
+    _TOOL_CALL_ID_PATTERN,
    _create_usage_metadata,
    _format_image,
    _format_messages,
    _is_builtin_tool,
    _merge_messages,
+    _normalize_tool_call_id,
    _thinking_in_params,
    convert_to_anthropic_tool,
 )
@@ -767,6 +769,153 @@ def test__format_messages_with_tool_calls() -> None:
        ]


+def test__normalize_tool_call_id() -> None:
+    # Already-valid IDs (including native Anthropic and OpenAI styles) pass
+    # through unchanged.
+    for valid in ("1", "toolu_01abcDEF-_", "call_Ao02pnFYXD6GN1yzc0uXPsvF"):
+        assert _normalize_tool_call_id(valid) == valid
+
+    # Empty and None IDs pass through so a malformed request surfaces a clear
+    # error from Anthropic rather than a synthesized ID.
+    assert _normalize_tool_call_id("") == ""
+    assert _normalize_tool_call_id(None) is None
+
+    # Foreign IDs with characters Anthropic rejects (e.g. Fireworks/Kimi's
+    # `functions.write_todos:0`) are rewritten to a compatible form.
+    invalid = "functions.write_todos:0"
+    normalized = _normalize_tool_call_id(invalid)
+    assert normalized is not None
+    assert normalized != invalid
+    assert _TOOL_CALL_ID_PATTERN.match(normalized)
+
+    # Deterministic + idempotent: same input always maps to the same output.
+    assert _normalize_tool_call_id(invalid) == normalized
+    assert _normalize_tool_call_id(normalized) == normalized
+
+    # Distinct invalid IDs map to distinct replacements (no collision that
+    # would break multi-tool turns).
+    other = _normalize_tool_call_id("functions.read_file:1")
+    assert other != normalized
+
+
+def test__format_messages_normalizes_cross_provider_tool_call_ids() -> None:
+    """A `tool_use.id` and its paired `tool_use_id` must normalize identically.
+
+    Reproduces the Fireworks/Kimi -> Anthropic 400 from replaying a thread whose
+    tool-call IDs were minted by another provider.
+    """
+    bad_id = "functions.write_todos:0"
+    ai = AIMessage(
+        "",
+        tool_calls=[{"name": "write_todos", "id": bad_id, "args": {"todos": []}}],
+    )
+    tool = ToolMessage("done", tool_call_id=bad_id)
+
+    _, formatted = _format_messages([HumanMessage("hi"), ai, tool])
+
+    tool_use = formatted[1]["content"][0]
+    tool_result = formatted[2]["content"][0]
+    assert tool_use["type"] == "tool_use"
+    assert tool_result["type"] == "tool_result"
+
+    # The rewritten IDs are valid and still reference each other.
+    assert _TOOL_CALL_ID_PATTERN.match(tool_use["id"])
+    assert tool_use["id"] == tool_result["tool_use_id"]
+    assert tool_use["id"] == _normalize_tool_call_id(bad_id)
+
+
+def test__format_messages_normalizes_prestructured_tool_result_id() -> None:
+    """A `ToolMessage` whose content is already `tool_result` blocks is covered.
+
+    This shape bypasses the `tool_call_id` normalization in `_merge_messages` and
+    flows through the `tool_result` content branch, so its `tool_use_id` must
+    still be normalized to match the paired `tool_use.id`.
+    """
+    bad_id = "functions.write_todos:0"
+    ai = AIMessage(
+        "",
+        tool_calls=[{"name": "write_todos", "id": bad_id, "args": {"todos": []}}],
+    )
+    tool = ToolMessage(
+        [{"type": "tool_result", "tool_use_id": bad_id, "content": "done"}],
+        tool_call_id=bad_id,
+    )
+
+    _, formatted = _format_messages([HumanMessage("hi"), ai, tool])
+
+    tool_use = formatted[1]["content"][0]
+    tool_result = formatted[2]["content"][0]
+    assert tool_use["id"] == tool_result["tool_use_id"]
+    assert tool_use["id"] == _normalize_tool_call_id(bad_id)
+
+
+def test__format_messages_normalizes_inline_tool_use_block() -> None:
+    """An invalid ID on an inline `tool_use` content block is normalized.
+
+    Covers the v1-compat destination where tool calls are stored as content
+    blocks rather than the `tool_calls` attribute, paired with a `ToolMessage`.
+    """
+    bad_id = "functions.search:2"
+    ai = AIMessage(
+        [{"type": "tool_use", "name": "search", "id": bad_id, "input": {"q": "x"}}],
+    )
+    tool = ToolMessage("result", tool_call_id=bad_id)
+
+    _, formatted = _format_messages([HumanMessage("hi"), ai, tool])
+
+    tool_use = formatted[1]["content"][0]
+    tool_result = formatted[2]["content"][0]
+    assert _TOOL_CALL_ID_PATTERN.match(tool_use["id"])
+    assert tool_use["id"] == tool_result["tool_use_id"]
+
+
+def test__format_messages_dedupes_overlapping_normalized_tool_use() -> None:
+    """An invalid ID shared by a `tool_use` block and `tool_calls` yields one block.
+
+    Guards the dedup branch: `tool_use_ids` are normalized, so the comparison
+    against the (also normalized) tool-call ID must not re-emit a duplicate block.
+    """
+    bad_id = "functions.write_todos:0"
+    ai = AIMessage(
+        [{"type": "tool_use", "name": "write_todos", "id": bad_id, "input": {"a": 1}}],
+        tool_calls=[{"name": "write_todos", "id": bad_id, "args": {"a": 1}}],
+    )
+
+    _, formatted = _format_messages([HumanMessage("hi"), ai])
+
+    tool_use_blocks = [b for b in formatted[1]["content"] if b["type"] == "tool_use"]
+    assert len(tool_use_blocks) == 1
+    assert _TOOL_CALL_ID_PATTERN.match(tool_use_blocks[0]["id"])
+
+
+def test__format_messages_normalizes_distinct_ids_independently() -> None:
+    """Multiple distinct invalid IDs in one turn stay distinct and correctly paired."""
+    id_a = "functions.write_todos:0"
+    id_b = "functions.read_file:1"
+    ai = AIMessage(
+        "",
+        tool_calls=[
+            {"name": "write_todos", "id": id_a, "args": {}},
+            {"name": "read_file", "id": id_b, "args": {}},
+        ],
+    )
+    tool_a = ToolMessage("a", tool_call_id=id_a)
+    tool_b = ToolMessage("b", tool_call_id=id_b)
+
+    _, formatted = _format_messages([HumanMessage("hi"), ai, tool_a, tool_b])
+
+    tool_uses = formatted[1]["content"]
+    results = formatted[2]["content"]
+    assert tool_uses[0]["id"] == _normalize_tool_call_id(id_a)
+    assert tool_uses[1]["id"] == _normalize_tool_call_id(id_b)
+    assert tool_uses[0]["id"] != tool_uses[1]["id"]
+    # Each result still pairs with its own tool_use.
+    assert {r["tool_use_id"] for r in results} == {
+        tool_uses[0]["id"],
+        tool_uses[1]["id"],
+    }
+
+
 def test__format_tool_use_block() -> None:
    # Test we correctly format tool_use blocks when there is no corresponding tool_call.
    message = AIMessage(