From 1a5403d84843a5b582319d1f6e78be1c4cf0706e Mon Sep 17 00:00:00 2001 From: Mason Daugherty Date: Thu, 28 May 2026 16:13:57 -0400 Subject: [PATCH] fix(anthropic): normalize cross-provider tool-call IDs (#37756) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Anthropic's API rejects `tool_use`/`tool_result` IDs that don't match `^[a-zA-Z0-9_-]+$` with a 400. When a conversation thread is replayed across providers — e.g. a user switches a running thread from Kimi (via Fireworks) to Claude — the prior turns carry tool-call IDs minted by the other provider (Kimi emits `functions.write_todos:0`, whose `.` and `:` are invalid), and the request fails. Tool-call IDs are now normalized to an Anthropic-compatible form during request formatting, with the original `tool_use.id` and its paired `tool_use_id` mapped identically so they stay linked. --- .../langchain_anthropic/chat_models.py | 117 ++++++++++---- .../tests/unit_tests/test_chat_models.py | 149 ++++++++++++++++++ 2 files changed, 237 insertions(+), 29 deletions(-) diff --git a/libs/partners/anthropic/langchain_anthropic/chat_models.py b/libs/partners/anthropic/langchain_anthropic/chat_models.py index bbdcfd9e4d1..f706374d7fb 100644 --- a/libs/partners/anthropic/langchain_anthropic/chat_models.py +++ b/libs/partners/anthropic/langchain_anthropic/chat_models.py @@ -4,6 +4,7 @@ from __future__ import annotations import copy import datetime +import hashlib import json import re import warnings @@ -237,6 +238,51 @@ def _format_image(url: str) -> dict: ) +_TOOL_CALL_ID_PATTERN = re.compile(r"^[a-zA-Z0-9_-]+$") +"""Anthropic requires `tool_use`/`tool_result` IDs to match this pattern.""" + + +def _normalize_tool_call_id(tool_call_id: str | None) -> str | None: + """Map a tool-call ID to an Anthropic-compatible form if needed. + + Anthropic rejects `tool_use`/`tool_result` IDs that don't match + `^[a-zA-Z0-9_-]+$`. IDs minted by other providers can violate this when a + thread is replayed across providers (e.g. Fireworks/Kimi emits + `functions.write_todos:0`, whose `.` and `:` are invalid). Valid IDs are + returned unchanged; invalid ones are hashed deterministically so that a + rewritten `tool_use.id` and its paired `tool_use_id` resolve to the same + value, both within a request and across turns. + + Empty and `None` IDs are passed through unchanged so that a genuinely + malformed request surfaces as a clear error from Anthropic rather than + being masked by a synthesized ID. + + Args: + tool_call_id: The tool-call ID to normalize. + + Returns: + The original ID if it is empty, `None`, or already valid; otherwise a + deterministic Anthropic-compatible replacement. + """ + if not tool_call_id or _TOOL_CALL_ID_PATTERN.match(tool_call_id): + return tool_call_id + digest = hashlib.sha256(tool_call_id.encode()).hexdigest() + return f"toolu_{digest[:24]}" + + +def _normalize_block_tool_use_id(block: dict) -> dict: + """Return `block` with its `tool_use_id` normalized, if it carries one. + + Mirrors `_normalize_tool_call_id` for `tool_result`-style content blocks so + that a `tool_use_id` arriving pre-structured (e.g. on a `ToolMessage` whose + content is already a list of `tool_result` blocks) stays consistent with its + paired, normalized `tool_use.id`. A no-op for already-valid IDs. + """ + if "tool_use_id" in block: + return {**block, "tool_use_id": _normalize_tool_call_id(block["tool_use_id"])} + return block + + def _merge_messages( messages: Sequence[BaseMessage], ) -> list[SystemMessage | AIMessage | HumanMessage]: @@ -272,7 +318,7 @@ def _merge_messages( tool_result: dict = { "type": "tool_result", "content": tool_content, - "tool_use_id": curr.tool_call_id, + "tool_use_id": _normalize_tool_call_id(curr.tool_call_id), "is_error": curr.status == "error", } if cache_ctrl: @@ -516,7 +562,7 @@ def _format_messages( type="tool_use", name=block["name"], input=args, - id=block["id"], + id=cast("str", _normalize_tool_call_id(block["id"])), ) if caller := block.get("caller"): tool_use_block["caller"] = caller @@ -595,24 +641,30 @@ def _format_messages( ): # Tool search results with tool_reference blocks content.append( - { - k: v - for k, v in block.items() - if k - in ( - "type", - "content", - "tool_use_id", - "cache_control", - ) - }, + _normalize_block_tool_use_id( + { + k: v + for k, v in block.items() + if k + in ( + "type", + "content", + "tool_use_id", + "cache_control", + ) + }, + ), ) elif block["type"] == "tool_result": # Regular tool results that need content formatting tool_content = _format_messages( [HumanMessage(block["content"])], )[1][0]["content"] - content.append({**block, "content": tool_content}) + content.append( + _normalize_block_tool_use_id( + {**block, "content": tool_content}, + ), + ) elif block["type"] in ( "code_execution_tool_result", "bash_code_execution_tool_result", @@ -622,19 +674,21 @@ def _format_messages( "web_fetch_tool_result", ): content.append( - { - k: v - for k, v in block.items() - if k - in ( - "type", - "content", - "tool_use_id", - "is_error", # for mcp_tool_result - "cache_control", - "retrieved_at", # for web_fetch_tool_result - ) - }, + _normalize_block_tool_use_id( + { + k: v + for k, v in block.items() + if k + in ( + "type", + "content", + "tool_use_id", + "is_error", # for mcp_tool_result + "cache_control", + "retrieved_at", # for web_fetch_tool_result + ) + }, + ), ) else: content.append(block) @@ -662,8 +716,13 @@ def _format_messages( for block in content if cast("dict", block)["type"] == "tool_use" ] + # `tool_use_ids` are already normalized via the branches above, so + # compare against the normalized tool-call ID to avoid emitting a + # duplicate `tool_use` block when the original ID was rewritten. missing_tool_calls = [ - tc for tc in message.tool_calls if tc["id"] not in tool_use_ids + tc + for tc in message.tool_calls + if _normalize_tool_call_id(tc["id"]) not in tool_use_ids ] cast("list", content).extend( _lc_tool_calls_to_anthropic_tool_use_blocks(missing_tool_calls), @@ -2224,7 +2283,7 @@ def _lc_tool_calls_to_anthropic_tool_use_blocks( type="tool_use", name=tool_call["name"], input=tool_call["args"], - id=cast("str", tool_call["id"]), + id=cast("str", _normalize_tool_call_id(tool_call["id"])), ) for tool_call in tool_calls ] diff --git a/libs/partners/anthropic/tests/unit_tests/test_chat_models.py b/libs/partners/anthropic/tests/unit_tests/test_chat_models.py index 61549b645f7..fe1f217f851 100644 --- a/libs/partners/anthropic/tests/unit_tests/test_chat_models.py +++ b/libs/partners/anthropic/tests/unit_tests/test_chat_models.py @@ -24,11 +24,13 @@ from pytest import CaptureFixture, MonkeyPatch from langchain_anthropic import ChatAnthropic from langchain_anthropic.chat_models import ( + _TOOL_CALL_ID_PATTERN, _create_usage_metadata, _format_image, _format_messages, _is_builtin_tool, _merge_messages, + _normalize_tool_call_id, _thinking_in_params, convert_to_anthropic_tool, ) @@ -767,6 +769,153 @@ def test__format_messages_with_tool_calls() -> None: ] +def test__normalize_tool_call_id() -> None: + # Already-valid IDs (including native Anthropic and OpenAI styles) pass + # through unchanged. + for valid in ("1", "toolu_01abcDEF-_", "call_Ao02pnFYXD6GN1yzc0uXPsvF"): + assert _normalize_tool_call_id(valid) == valid + + # Empty and None IDs pass through so a malformed request surfaces a clear + # error from Anthropic rather than a synthesized ID. + assert _normalize_tool_call_id("") == "" + assert _normalize_tool_call_id(None) is None + + # Foreign IDs with characters Anthropic rejects (e.g. Fireworks/Kimi's + # `functions.write_todos:0`) are rewritten to a compatible form. + invalid = "functions.write_todos:0" + normalized = _normalize_tool_call_id(invalid) + assert normalized is not None + assert normalized != invalid + assert _TOOL_CALL_ID_PATTERN.match(normalized) + + # Deterministic + idempotent: same input always maps to the same output. + assert _normalize_tool_call_id(invalid) == normalized + assert _normalize_tool_call_id(normalized) == normalized + + # Distinct invalid IDs map to distinct replacements (no collision that + # would break multi-tool turns). + other = _normalize_tool_call_id("functions.read_file:1") + assert other != normalized + + +def test__format_messages_normalizes_cross_provider_tool_call_ids() -> None: + """A `tool_use.id` and its paired `tool_use_id` must normalize identically. + + Reproduces the Fireworks/Kimi -> Anthropic 400 from replaying a thread whose + tool-call IDs were minted by another provider. + """ + bad_id = "functions.write_todos:0" + ai = AIMessage( + "", + tool_calls=[{"name": "write_todos", "id": bad_id, "args": {"todos": []}}], + ) + tool = ToolMessage("done", tool_call_id=bad_id) + + _, formatted = _format_messages([HumanMessage("hi"), ai, tool]) + + tool_use = formatted[1]["content"][0] + tool_result = formatted[2]["content"][0] + assert tool_use["type"] == "tool_use" + assert tool_result["type"] == "tool_result" + + # The rewritten IDs are valid and still reference each other. + assert _TOOL_CALL_ID_PATTERN.match(tool_use["id"]) + assert tool_use["id"] == tool_result["tool_use_id"] + assert tool_use["id"] == _normalize_tool_call_id(bad_id) + + +def test__format_messages_normalizes_prestructured_tool_result_id() -> None: + """A `ToolMessage` whose content is already `tool_result` blocks is covered. + + This shape bypasses the `tool_call_id` normalization in `_merge_messages` and + flows through the `tool_result` content branch, so its `tool_use_id` must + still be normalized to match the paired `tool_use.id`. + """ + bad_id = "functions.write_todos:0" + ai = AIMessage( + "", + tool_calls=[{"name": "write_todos", "id": bad_id, "args": {"todos": []}}], + ) + tool = ToolMessage( + [{"type": "tool_result", "tool_use_id": bad_id, "content": "done"}], + tool_call_id=bad_id, + ) + + _, formatted = _format_messages([HumanMessage("hi"), ai, tool]) + + tool_use = formatted[1]["content"][0] + tool_result = formatted[2]["content"][0] + assert tool_use["id"] == tool_result["tool_use_id"] + assert tool_use["id"] == _normalize_tool_call_id(bad_id) + + +def test__format_messages_normalizes_inline_tool_use_block() -> None: + """An invalid ID on an inline `tool_use` content block is normalized. + + Covers the v1-compat destination where tool calls are stored as content + blocks rather than the `tool_calls` attribute, paired with a `ToolMessage`. + """ + bad_id = "functions.search:2" + ai = AIMessage( + [{"type": "tool_use", "name": "search", "id": bad_id, "input": {"q": "x"}}], + ) + tool = ToolMessage("result", tool_call_id=bad_id) + + _, formatted = _format_messages([HumanMessage("hi"), ai, tool]) + + tool_use = formatted[1]["content"][0] + tool_result = formatted[2]["content"][0] + assert _TOOL_CALL_ID_PATTERN.match(tool_use["id"]) + assert tool_use["id"] == tool_result["tool_use_id"] + + +def test__format_messages_dedupes_overlapping_normalized_tool_use() -> None: + """An invalid ID shared by a `tool_use` block and `tool_calls` yields one block. + + Guards the dedup branch: `tool_use_ids` are normalized, so the comparison + against the (also normalized) tool-call ID must not re-emit a duplicate block. + """ + bad_id = "functions.write_todos:0" + ai = AIMessage( + [{"type": "tool_use", "name": "write_todos", "id": bad_id, "input": {"a": 1}}], + tool_calls=[{"name": "write_todos", "id": bad_id, "args": {"a": 1}}], + ) + + _, formatted = _format_messages([HumanMessage("hi"), ai]) + + tool_use_blocks = [b for b in formatted[1]["content"] if b["type"] == "tool_use"] + assert len(tool_use_blocks) == 1 + assert _TOOL_CALL_ID_PATTERN.match(tool_use_blocks[0]["id"]) + + +def test__format_messages_normalizes_distinct_ids_independently() -> None: + """Multiple distinct invalid IDs in one turn stay distinct and correctly paired.""" + id_a = "functions.write_todos:0" + id_b = "functions.read_file:1" + ai = AIMessage( + "", + tool_calls=[ + {"name": "write_todos", "id": id_a, "args": {}}, + {"name": "read_file", "id": id_b, "args": {}}, + ], + ) + tool_a = ToolMessage("a", tool_call_id=id_a) + tool_b = ToolMessage("b", tool_call_id=id_b) + + _, formatted = _format_messages([HumanMessage("hi"), ai, tool_a, tool_b]) + + tool_uses = formatted[1]["content"] + results = formatted[2]["content"] + assert tool_uses[0]["id"] == _normalize_tool_call_id(id_a) + assert tool_uses[1]["id"] == _normalize_tool_call_id(id_b) + assert tool_uses[0]["id"] != tool_uses[1]["id"] + # Each result still pairs with its own tool_use. + assert {r["tool_use_id"] for r in results} == { + tool_uses[0]["id"], + tool_uses[1]["id"], + } + + def test__format_tool_use_block() -> None: # Test we correctly format tool_use blocks when there is no corresponding tool_call. message = AIMessage(