anthropic[patch]: fix tool call and tool res image_url handling (#26587)

Co-authored-by: ccurme <chester.curme@gmail.com>
2025-08-15 23:57:21 +00:00 · 2024-09-17 14:30:07 -07:00 · 2024-09-17 14:30:07 -07:00 · 5ced41bf50
commit 5ced41bf50
parent c6bdd6f482
6 changed files with 164 additions and 36 deletions
--- a/libs/partners/anthropic/langchain_anthropic/chat_models.py
+++ b/libs/partners/anthropic/langchain_anthropic/chat_models.py
@ -194,35 +194,35 @@ def _format_messages(
            # populate content
            content = []
-            for item in message.content:
+            for block in message.content:
-                if isinstance(item, str):
+                if isinstance(block, str):
-                    content.append({"type": "text", "text": item})
+                    content.append({"type": "text", "text": block})
-                elif isinstance(item, dict):
+                elif isinstance(block, dict):
-                    if "type" not in item:
+                    if "type" not in block:
-                        raise ValueError("Dict content item must have a type key")
+                        raise ValueError("Dict content block must have a type key")
-                    elif item["type"] == "image_url":
+                    elif block["type"] == "image_url":
                        # convert format
-                        source = _format_image(item["image_url"]["url"])
+                        source = _format_image(block["image_url"]["url"])
                        content.append({"type": "image", "source": source})
-                    elif item["type"] == "tool_use":
+                    elif block["type"] == "tool_use":
                        # If a tool_call with the same id as a tool_use content block
                        # exists, the tool_call is preferred.
-                        if isinstance(message, AIMessage) and item["id"] in [
+                        if isinstance(message, AIMessage) and block["id"] in [
                            tc["id"] for tc in message.tool_calls
                        ]:
                            overlapping = [
                                tc
                                for tc in message.tool_calls
-                                if tc["id"] == item["id"]
+                                if tc["id"] == block["id"]
                            ]
                            content.extend(
                                _lc_tool_calls_to_anthropic_tool_use_blocks(overlapping)
                            )
                        else:
-                            item.pop("text", None)
+                            block.pop("text", None)
-                            content.append(item)
+                            content.append(block)
-                    elif item["type"] == "text":
+                    elif block["type"] == "text":
-                        text = item.get("text", "")
+                        text = block.get("text", "")
                        # Only add non-empty strings for now as empty ones are not
                        # accepted.
                        # https://github.com/anthropics/anthropic-sdk-python/issues/461
@ -230,29 +230,45 @@ def _format_messages(
                            content.append(
                                {
                                    k: v
-                                    for k, v in item.items()
+                                    for k, v in block.items()
                                    if k in ("type", "text", "cache_control")
                                }
                            )
                    elif block["type"] == "tool_result":
                        tool_content = _format_messages(
                            [HumanMessage(block["content"])]
                        )[1][0]["content"]
                        content.append({**block, **{"content": tool_content}})
                    else:
-                        content.append(item)
+                        content.append(block)
                else:
                    raise ValueError(
-                        f"Content items must be str or dict, instead was: {type(item)}"
+                        f"Content blocks must be str or dict, instead was: "
                        f"{type(block)}"
                    )
        elif isinstance(message, AIMessage) and message.tool_calls:
            content = (
                []
                if not message.content
                else [{"type": "text", "text": message.content}]
            )
            # Note: Anthropic can't have invalid tool calls as presently defined,
            # since the model already returns dicts args not JSON strings, and invalid
            # tool calls are those with invalid JSON for args.
            content += _lc_tool_calls_to_anthropic_tool_use_blocks(message.tool_calls)
        else:
            content = message.content
        # Ensure all tool_calls have a tool_use content block
        if isinstance(message, AIMessage) and message.tool_calls:
            content = content or []
            content = (
                [{"type": "text", "text": message.content}]
                if isinstance(content, str) and content
                else content
            )
            tool_use_ids = [
                cast(dict, block)["id"]
                for block in content
                if cast(dict, block)["type"] == "tool_use"
            ]
            missing_tool_calls = [
                tc for tc in message.tool_calls if tc["id"] not in tool_use_ids
            ]
            cast(list, content).extend(
                _lc_tool_calls_to_anthropic_tool_use_blocks(missing_tool_calls)
            )
        formatted_messages.append({"role": role, "content": content})
    return system, formatted_messages
--- a/libs/partners/anthropic/tests/integration_tests/test_standard.py
+++ b/libs/partners/anthropic/tests/integration_tests/test_standard.py
@ -21,6 +21,10 @@ class TestAnthropicStandard(ChatModelIntegrationTests):
    def supports_image_inputs(self) -> bool:
        return True
    @property
    def supports_image_tool_message(self) -> bool:
        return True
    @property
    def supports_anthropic_inputs(self) -> bool:
        return True
--- a/libs/partners/anthropic/tests/unit_tests/test_chat_models.py
+++ b/libs/partners/anthropic/tests/unit_tests/test_chat_models.py
@ -366,15 +366,36 @@ def test_convert_to_anthropic_tool(
 def test__format_messages_with_tool_calls() -> None:
    system = SystemMessage("fuzz")  # type: ignore[misc]
    human = HumanMessage("foo")  # type: ignore[misc]
-    ai = AIMessage(  # type: ignore[misc]
+    ai = AIMessage(
-        "",
+        "",  # with empty string
        tool_calls=[{"name": "bar", "id": "1", "args": {"baz": "buzz"}}],
    )
-    tool = ToolMessage(  # type: ignore[misc]
+    ai2 = AIMessage(
        [],  # with empty list
        tool_calls=[{"name": "bar", "id": "2", "args": {"baz": "buzz"}}],
    )
    tool = ToolMessage(
        "blurb",
        tool_call_id="1",
    )
-    messages = [system, human, ai, tool]
+    tool_image_url = ToolMessage(
        [{"type": "image_url", "image_url": {"url": "data:image/jpeg;base64,...."}}],
        tool_call_id="2",
    )
    tool_image = ToolMessage(
        [
            {
                "type": "image",
                "source": {
                    "data": "....",
                    "type": "base64",
                    "media_type": "image/jpeg",
                },
            }
        ],
        tool_call_id="3",
    )
    messages = [system, human, ai, tool, ai2, tool_image_url, tool_image]
    expected = (
        "fuzz",
        [
@ -401,6 +422,52 @@ def test__format_messages_with_tool_calls() -> None:
                    }
                ],
            },
            {
                "role": "assistant",
                "content": [
                    {
                        "type": "tool_use",
                        "name": "bar",
                        "id": "2",
                        "input": {"baz": "buzz"},
                    }
                ],
            },
            {
                "role": "user",
                "content": [
                    {
                        "type": "tool_result",
                        "content": [
                            {
                                "type": "image",
                                "source": {
                                    "data": "....",
                                    "type": "base64",
                                    "media_type": "image/jpeg",
                                },
                            }
                        ],
                        "tool_use_id": "2",
                        "is_error": False,
                    },
                    {
                        "type": "tool_result",
                        "content": [
                            {
                                "type": "image",
                                "source": {
                                    "data": "....",
                                    "type": "base64",
                                    "media_type": "image/jpeg",
                                },
                            }
                        ],
                        "tool_use_id": "3",
                        "is_error": False,
                    },
                ],
            },
        ],
    )
    actual = _format_messages(messages)
@ -454,8 +521,6 @@ def test__format_messages_with_str_content_and_tool_calls() -> None:
 def test__format_messages_with_list_content_and_tool_calls() -> None:
    system = SystemMessage("fuzz")  # type: ignore[misc]
    human = HumanMessage("foo")  # type: ignore[misc]
    # If content and tool_calls are specified and content is a list, then content is
    # preferred.
    ai = AIMessage(  # type: ignore[misc]
        [{"type": "text", "text": "thought"}],
        tool_calls=[{"name": "bar", "id": "1", "args": {"baz": "buzz"}}],
@ -471,7 +536,15 @@ def test__format_messages_with_list_content_and_tool_calls() -> None:
            {"role": "user", "content": "foo"},
            {
                "role": "assistant",
-                "content": [{"type": "text", "text": "thought"}],
+                "content": [
                    {"type": "text", "text": "thought"},
                    {
                        "type": "tool_use",
                        "name": "bar",
                        "id": "1",
                        "input": {"baz": "buzz"},
                    },
                ],
            },
            {
                "role": "user",
--- a/libs/partners/openai/tests/integration_tests/chat_models/test_base_standard.py
+++ b/libs/partners/openai/tests/integration_tests/chat_models/test_base_standard.py
@ -15,7 +15,7 @@ class TestOpenAIStandard(ChatModelIntegrationTests):
    @property
    def chat_model_params(self) -> dict:
-        return {"model": "gpt-4o", "stream_usage": True}
+        return {"model": "gpt-4o-mini", "stream_usage": True}
    @property
    def supports_image_inputs(self) -> bool:
--- a/libs/standard-tests/langchain_standard_tests/integration_tests/chat_models.py
+++ b/libs/standard-tests/langchain_standard_tests/integration_tests/chat_models.py
@ -482,6 +482,37 @@ class ChatModelIntegrationTests(ChatModelTests):
        )
        model.invoke([message])
    def test_image_tool_message(self, model: BaseChatModel) -> None:
        if not self.supports_image_tool_message:
            return
        image_url = "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg"
        image_data = base64.b64encode(httpx.get(image_url).content).decode("utf-8")
        messages = [
            HumanMessage("get a random image using the tool and describe the weather"),
            AIMessage(
                [],
                tool_calls=[
                    {"type": "tool_call", "id": "1", "name": "random_image", "args": {}}
                ],
            ),
            ToolMessage(
                content=[
                    {
                        "type": "image_url",
                        "image_url": {"url": f"data:image/jpeg;base64,{image_data}"},
                    },
                ],
                tool_call_id="1",
                name="random_image",
            ),
        ]
        def random_image() -> str:
            """Return a random image."""
            return ""
        model.bind_tools([random_image]).invoke(messages)
    def test_anthropic_inputs(self, model: BaseChatModel) -> None:
        if not self.supports_anthropic_inputs:
            return
--- a/libs/standard-tests/langchain_standard_tests/unit_tests/chat_models.py
+++ b/libs/standard-tests/langchain_standard_tests/unit_tests/chat_models.py
@ -134,6 +134,10 @@ class ChatModelTests(BaseStandardTests):
    def supports_anthropic_inputs(self) -> bool:
        return False
    @property
    def supports_image_tool_message(self) -> bool:
        return False
 class ChatModelUnitTests(ChatModelTests):
    @property