partners: ChatAnthropic supports urls (#30809)

**Description:** partners-anthropic: ChatAnthropic supports b64 and urls in the part[image_url][url] message variable **Issue**: ChatAnthropic right now only supports b64 encoded images in the part[image_url][url] message variable. This PR enables ChatAnthropic to also accept image urls in said variable and makes it compatible with OpenAI messages to make model switching easier. --------- Co-authored-by: Chester Curme <chester.curme@gmail.com>
2025-09-19 17:36:00 +00:00 · 2025-04-18 21:15:45 +02:00
parent d0cd115356
commit 017c8079e1
2 changed files with 119 additions and 28 deletions
--- a/libs/partners/anthropic/langchain_anthropic/chat_models.py
+++ b/libs/partners/anthropic/langchain_anthropic/chat_models.py
@@ -103,31 +103,47 @@ def _is_builtin_tool(tool: Any) -> bool:
    return any(tool_type.startswith(prefix) for prefix in _builtin_tool_prefixes)
-def _format_image(image_url: str) -> dict:
+def _format_image(url: str) -> dict:
    """
-    Formats an image of format data:image/jpeg;base64,{b64_string}
+    Converts part["image_url"]["url"] strings (OpenAI format)
-    to a dict for anthropic api
+    to the correct Anthropic format:
    {
      "type": "base64",
      "media_type": "image/jpeg",
      "data": "/9j/4AAQSkZJRg...",
    }
-
+    Or
-    And throws an error if it's not a b64 image
+    {
-    """
+      "type": "url",
-    regex = r"^data:(?P<media_type>image/.+);base64,(?P<data>.+)$"
+      "url": "https://example.com/image.jpg",
    match = re.match(regex, image_url)
    if match is None:
        raise ValueError(
            "Anthropic only supports base64-encoded images currently."
            " Example: data:image/png;base64,'/9j/4AAQSk'..."
        )
    return {
        "type": "base64",
        "media_type": match.group("media_type"),
        "data": match.group("data"),
    }
    """
    # Base64 encoded image
    base64_regex = r"^data:(?P<media_type>image/.+);base64,(?P<data>.+)$"
    base64_match = re.match(base64_regex, url)
    if base64_match:
        return {
            "type": "base64",
            "media_type": base64_match.group("media_type"),
            "data": base64_match.group("data"),
        }
    # Url
    url_regex = r"^https?://.*$"
    url_match = re.match(url_regex, url)
    if url_match:
        return {
            "type": "url",
            "url": url,
        }
    raise ValueError(
        "Malformed url parameter."
        " Must be either an image URL (https://example.com/image.jpg)"
        " or base64 encoded string (data:image/png;base64,'/9j/4AAQSk'...)"
    )
 def _merge_messages(
@@ -578,20 +594,37 @@ class ChatAnthropic(BaseChatModel):
        See ``ChatAnthropic.with_structured_output()`` for more.
    Image input:
        See `multimodal guides <https://python.langchain.com/docs/how_to/multimodal_inputs/>`_
        for more detail.
        .. code-block:: python
            import base64
            import httpx
            from langchain_anthropic import ChatAnthropic
            from langchain_core.messages import HumanMessage
            image_url = "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg"
            image_data = base64.b64encode(httpx.get(image_url).content).decode("utf-8")
            llm = ChatAnthropic(model="claude-3-5-sonnet-latest")
            message = HumanMessage(
                content=[
                    {"type": "text", "text": "describe the weather in this image"},
                    {
-                        "type": "image_url",
+                        "type": "text",
-                        "image_url": {"url": f"data:image/jpeg;base64,{image_data}"},
+                        "text": "Can you highlight the differences between these two images?",
                    },
                    {
                        "type": "image",
                        "source_type": "base64",
                        "data": image_data,
                        "mime_type": "image/jpeg",
                    },
                    {
                        "type": "image",
                        "source_type": "url",
                        "url": image_url,
                    },
                ],
            )
@@ -600,9 +633,12 @@ class ChatAnthropic(BaseChatModel):
        .. code-block:: python
-            "The image depicts a sunny day with a partly cloudy sky. The sky is a brilliant blue color with scattered white clouds drifting across. The lighting and cloud patterns suggest pleasant, mild weather conditions. The scene shows a grassy field or meadow with a wooden boardwalk trail leading through it, indicating an outdoor setting on a nice day well-suited for enjoying nature."
+            "After examining both images carefully, I can see that they are actually identical."
    PDF input:
        See `multimodal guides <https://python.langchain.com/docs/how_to/multimodal_inputs/>`_
        for more detail.
        .. code-block:: python
            from base64 import b64encode
@@ -620,12 +656,10 @@ class ChatAnthropic(BaseChatModel):
                        [
                            "Summarize this document.",
                            {
-                                "type": "document",
+                                "type": "file",
-                                "source": {
+                                "source_type": "base64",
-                                    "type": "base64",
+                                "mime_type": "application/pdf",
-                                    "data": data,
+                                "data": data,
                                    "media_type": "application/pdf",
                                },
                            },
                        ]
                    )
--- a/libs/partners/anthropic/tests/unit_tests/test_chat_models.py
+++ b/libs/partners/anthropic/tests/unit_tests/test_chat_models.py
@@ -13,6 +13,7 @@ from pytest import CaptureFixture, MonkeyPatch
 from langchain_anthropic import ChatAnthropic
 from langchain_anthropic.chat_models import (
    _format_image,
    _format_messages,
    _merge_messages,
    convert_to_anthropic_tool,
@@ -296,6 +297,12 @@ def test__merge_messages_mutation() -> None:
    assert messages == original_messages
 def test__format_image() -> None:
    url = "dummyimage.com/600x400/000/fff"
    with pytest.raises(ValueError):
        _format_image(url)
@pytest.fixture()
 def pydantic() -> type[BaseModel]:
    class dummy_function(BaseModel):
@@ -770,6 +777,56 @@ def test__format_messages_with_citations() -> None:
    assert actual_messages == expected_messages
 def test__format_messages_openai_image_format() -> None:
    message = HumanMessage(
        content=[
            {
                "type": "text",
                "text": "Can you highlight the differences between these two images?",
            },
            {
                "type": "image_url",
                "image_url": {"url": "data:image/jpeg;base64,<base64 data>"},
            },
            {
                "type": "image_url",
                "image_url": {"url": "https://<image url>"},
            },
        ],
    )
    actual_system, actual_messages = _format_messages([message])
    assert actual_system is None
    expected_messages = [
        {
            "role": "user",
            "content": [
                {
                    "type": "text",
                    "text": (
                        "Can you highlight the differences between these two images?"
                    ),
                },
                {
                    "type": "image",
                    "source": {
                        "type": "base64",
                        "media_type": "image/jpeg",
                        "data": "<base64 data>",
                    },
                },
                {
                    "type": "image",
                    "source": {
                        "type": "url",
                        "url": "https://<image url>",
                    },
                },
            ],
        }
    ]
    assert actual_messages == expected_messages
 def test__format_messages_with_multiple_system() -> None:
    messages = [
        HumanMessage("baz"),