core[minor]: Add msg content formatting util

2025-08-24 12:00:52 +00:00 · 2024-08-28 16:31:30 -07:00 · 2024-08-28 16:31:30 -07:00 · 49f7c8cdd8
commit 49f7c8cdd8
parent d6c4803ab0
2 changed files with 931 additions and 25 deletions
--- a/libs/core/langchain_core/messages/utils.py
+++ b/libs/core/langchain_core/messages/utils.py
@ -9,8 +9,10 @@ Some examples of what you can do with these functions include:
 from __future__ import annotations
 import base64
 import inspect
 import json
 import re
 from functools import partial
 from typing import (
    TYPE_CHECKING,
@ -37,6 +39,9 @@ from langchain_core.messages.human import HumanMessage, HumanMessageChunk
 from langchain_core.messages.modifier import RemoveMessage
 from langchain_core.messages.system import SystemMessage, SystemMessageChunk
 from langchain_core.messages.tool import ToolMessage, ToolMessageChunk
 from langchain_core.messages.tool import (
    tool_call as create_tool_call,
 )
 if TYPE_CHECKING:
    from langchain_text_splitters import TextSplitter
@ -252,7 +257,9 @@ def _create_message_from_message_type(
    return message
-def _convert_to_message(message: MessageLikeRepresentation) -> BaseMessage:
+def _convert_to_message(
    message: MessageLikeRepresentation, *, copy: bool = False
 ) -> BaseMessage:
    """Instantiate a message from a variety of message formats.
    The message format can be one of the following:
@ -274,7 +281,10 @@ def _convert_to_message(message: MessageLikeRepresentation) -> BaseMessage:
        ValueError: if the message dict does not contain the required keys.
    """
    if isinstance(message, BaseMessage):
-        _message = message
+        if copy:
            _message = message.__class__(**message.dict())
        else:
            _message = message
    elif isinstance(message, str):
        _message = _create_message_from_message_type("human", message)
    elif isinstance(message, Sequence) and len(message) == 2:
@ -305,6 +315,8 @@ def _convert_to_message(message: MessageLikeRepresentation) -> BaseMessage:
 def convert_to_messages(
    messages: Union[Iterable[MessageLikeRepresentation], PromptValue],
    *,
    copy: bool = False,
 ) -> List[BaseMessage]:
    """Convert a sequence of messages to a list of messages.
@ -319,35 +331,87 @@ def convert_to_messages(
    if isinstance(messages, PromptValue):
        return messages.to_messages()
-    return [_convert_to_message(m) for m in messages]
+    return [_convert_to_message(m, copy=copy) for m in messages]
-def _runnable_support(func: Callable) -> Callable:
+def _runnable_support(*args: Callable, supports_single: bool = False) -> Callable:
-    @overload
+    if supports_single:
    def wrapped(
        messages: Literal[None] = None, **kwargs: Any
    ) -> Runnable[Sequence[MessageLikeRepresentation], List[BaseMessage]]: ...
-    @overload
+        def runnable_support(func: Callable) -> Callable:
-    def wrapped(
+            @overload
-        messages: Sequence[MessageLikeRepresentation], **kwargs: Any
+            def wrapped(
-    ) -> List[BaseMessage]: ...
+                messages: Literal[None] = None, **kwargs: Any
            ) -> Runnable[
                Union[MessageLikeRepresentation, Sequence[MessageLikeRepresentation]],
                Union[BaseMessage, List[BaseMessage]],
            ]: ...
-    def wrapped(
+            @overload
-        messages: Optional[Sequence[MessageLikeRepresentation]] = None, **kwargs: Any
+            def wrapped(
-    ) -> Union[
+                messages: Sequence[Union[BaseMessage, Dict, Tuple]], **kwargs: Any
-        List[BaseMessage],
+            ) -> List[BaseMessage]: ...
        Runnable[Sequence[MessageLikeRepresentation], List[BaseMessage]],
    ]:
        from langchain_core.runnables.base import RunnableLambda
-        if messages is not None:
+            @overload
-            return func(messages, **kwargs)
+            def wrapped(
-        else:
+                messages: MessageLikeRepresentation, **kwargs: Any
-            return RunnableLambda(partial(func, **kwargs), name=func.__name__)
+            ) -> BaseMessage: ...
-    wrapped.__doc__ = func.__doc__
+            def wrapped(
-    return wrapped
+                messages: Union[
                    MessageLikeRepresentation, Sequence[MessageLikeRepresentation], None
                ] = None,
                **kwargs: Any,
            ) -> Union[
                BaseMessage,
                List[BaseMessage],
                Runnable[
                    Union[
                        MessageLikeRepresentation, Sequence[MessageLikeRepresentation]
                    ],
                    Union[BaseMessage, List[BaseMessage]],
                ],
            ]:
                from langchain_core.runnables.base import RunnableLambda
                if messages is not None:
                    return func(messages, **kwargs)
                else:
                    return RunnableLambda(partial(func, **kwargs), name=func.__name__)
            wrapped.__doc__ = func.__doc__
            return wrapped
    else:
        def runnable_support(func: Callable) -> Callable:
            @overload
            def wrapped(
                messages: Literal[None] = None, **kwargs: Any
            ) -> Runnable[Sequence[MessageLikeRepresentation], List[BaseMessage]]: ...
            @overload
            def wrapped(
                messages: Sequence[MessageLikeRepresentation], **kwargs: Any
            ) -> List[BaseMessage]: ...
            def wrapped(
                messages: Union[Sequence[MessageLikeRepresentation], None] = None,
                **kwargs: Any,
            ) -> Union[
                Runnable[Sequence[MessageLikeRepresentation], List[BaseMessage]],
                List[BaseMessage],
            ]:
                from langchain_core.runnables.base import RunnableLambda
                if messages is not None:
                    return func(messages, **kwargs)
                else:
                    return RunnableLambda(partial(func, **kwargs), name=func.__name__)
            wrapped.__doc__ = func.__doc__
            return wrapped
    return runnable_support(*args) if args else cast(Callable, runnable_support)
@_runnable_support
@ -845,6 +909,571 @@ def trim_messages(
        )
@_runnable_support(supports_single=True)
 def format_content_as(
    messages: Union[MessageLikeRepresentation, Iterable[MessageLikeRepresentation]],
    *,
    format: Literal["openai", "anthropic"],
    text: Literal["string", "block"],
 ) -> Union[BaseMessage, List[BaseMessage]]:
    """Convert message contents into a standard format.
    .. versionadded:: 0.2.36
    Args:
        messages: Message-like object or iterable of objects whose contents are already
            in OpenAI, Anthropic, Bedrock Converse, or VertexAI formats.
        format: Format to convert message contents to.
        text: How to format text contents. If ``text='string'``  then any string
            contents are left as strings. If a message has content blocks that are all
            of type 'text', these are joined with a newline to make a single string. If
            a message has content blocks and at least one isn't of type 'text', then
            all blocks are left as dicts. If ``text='block'`` then all contents are
            turned into a list of dicts.
    Returns:
        A single BaseMessage is a single message-like object was passed in, else list
        of BaseMessages.
    .. dropdown::  Basic usage
        :open:
        .. code-block:: python
            from langchain_core.messages import format_content_as
            messages = [
                SystemMessage,
                {},
                (),
                AIMessage(),
                ToolMessage(),
            ]
            oai_strings = format_content_as(messages, format="openai", text="string")
            anthropic_blocks = format_content_as(messages, format="anthropic", text="block")
    .. dropdown::  Chain usage
        :open:
        .. code-block:: python
            from langchain_core.messages import format_content_as
            from langchain.chat_models import init_chat_model
            formatter = format_content_as(format="openai", text="block")
            llm = init_chat_model() | formatter
            llm.invoke(
                [{"role": "user", "content": "how are you"}],
                config={"model": "gpt-4o"},
            )
            # -> AIMessage([{"type": "text", "text": ""}], ...)
            llm.invoke(
                [{"role": "user", "content": "whats your name"}],
                config={"model": "claude-3-5-sonnet-20240620"})
            # -> AIMessage([{"type": "text", "text": ""}], ...)
    .. note:: Doesn't support streaming
        This util does not support formatting streamed chunks on the fly (i.e.
        "transforming" chunks). This means if you pipe the outputs of a model to this
        formatter in a chain, the chain will not have token-level streaming when
        using ``chain.stream()/.astream()``. You'll still see the
        token stream when using ``chat.astream_events()`` but the message chunks will
        not yet be formatted.
        .. code-block:: python
            from langchain_core.messages import format_content_as
            from langchain.chat_models import init_chat_model
            formatter = format_content_as(format="openai", text="block")
            llm = init_chat_model() | formatter
            # Will contain a single, completed chunk.
            list(llm.stream(
                [{"role": "user", "content": "how are you"}],
                config={"model": "gpt-4o"},
            ))
            # Will include token-level events, but the streamed chunks will not yet be
            # formatted.
            async for chunk in llm.astream_events(
                    [{"role": "user", "content": "how are you"}],
                    config={"model": "gpt-4o"},
                    version="v2",
                ):
                ...
    """  # noqa: E501
    if is_single := isinstance(messages, (BaseMessage, dict)):
        messages = [messages]
    messages = convert_to_messages(messages, copy=True)
    if format.lower() == "openai":
        formatted = _format_contents_as_openai(messages, text=text)
    elif format.lower() == "anthropic":
        formatted = _format_contents_as_anthropic(messages, text=text)
    else:
        raise ValueError(
            f"Unrecognized {format=}. Expected one of ('openai', 'anthropic')."
        )
    if is_single:
        return formatted[0]
    else:
        return formatted
 def _format_contents_as_openai(
    messages: Sequence[BaseMessage], *, text: Literal["string", "block"]
 ) -> List[BaseMessage]:
    """Mutates messages so their contents match OpenAI messages API."""
    updated_messages: list = []
    for i, message in enumerate(messages):
        tool_messages: list = []
        if not message.content:
            message.content = "" if text == "string" else []
        elif isinstance(message.content, str):
            if text == "string":
                pass
            else:
                message.content = [{"type": "text", "text": message.content}]
        else:
            if text == "string" and all(
                isinstance(block, str) or block.get("type") == "text"
                for block in message.content
            ):
                message.content = "\n".join(
                    block if isinstance(block, str) else block["text"]
                    for block in message.content
                )
            else:
                content: List[dict] = []
                for j, block in enumerate(message.content):
                    # OpenAI format
                    if isinstance(block, str):
                        content.append({"type": "text", "text": block})
                    elif block.get("type") == "text":
                        if missing := [k for k in ("text",) if k not in block]:
                            raise ValueError(
                                f"Unrecognized content block at "
                                f"messages[{i}].content[{j}] has 'type': 'text' "
                                f"but is missing expected key(s) "
                                f"{missing}. Full content block:\n\n{block}"
                            )
                        content.append({"type": block["type"], "text": block["text"]})
                    elif block.get("type") == "image_url":
                        if missing := [k for k in ("image_url",) if k not in block]:
                            raise ValueError(
                                f"Unrecognized content block at "
                                f"messages[{i}].content[{j}] has 'type': 'image_url' "
                                f"but is missing expected key(s) "
                                f"{missing}. Full content block:\n\n{block}"
                            )
                        content.append(
                            {"type": "image_url", "image_url": block["image_url"]}
                        )
                    # Anthropic and Bedrock converse format
                    elif (block.get("type") == "image") or "image" in block:
                        # Anthropic
                        if source := block.get("source"):
                            if missing := [
                                k
                                for k in ("media_type", "type", "data")
                                if k not in source
                            ]:
                                raise ValueError(
                                    f"Unrecognized content block at "
                                    f"messages[{i}].content[{j}] has 'type': 'image' "
                                    f"but 'source' is missing expected key(s) "
                                    f"{missing}. Full content block:\n\n{block}"
                                )
                            content.append(
                                {
                                    "type": "image_url",
                                    "image_url": {
                                        "url": (
                                            f"data:{source['media_type']};"
                                            f"{source['type']},{source['data']}"
                                        )
                                    },
                                }
                            )
                        # Bedrock converse
                        elif image := block.get("image"):
                            raise ValueError("1064")
                            if missing := [
                                k for k in ("source", "format") if k not in image
                            ]:
                                raise ValueError(
                                    f"Unrecognized content block at "
                                    f"messages[{i}].content[{j}] has key 'image', "
                                    f"but 'image' is missing expected key(s) "
                                    f"{missing}. Full content block:\n\n{block}"
                                )
                            b64_image = _bytes_to_b64_str(image["source"]["bytes"])
                            content.append(
                                {
                                    "type": "image_url",
                                    "image_url": {
                                        "url": (
                                            f"data:image/{image['format']};"
                                            f"base64,{b64_image}"
                                        )
                                    },
                                }
                            )
                        else:
                            raise ValueError(
                                f"Unrecognized content block at "
                                f"messages[{i}].content[{j}] has 'type': 'image' "
                                f"but does not have a 'source' or 'image' key. Full "
                                f"content block:\n\n{block}"
                            )
                    elif block.get("type") == "tool_use":
                        if missing := [
                            k for k in ("id", "name", "input") if k not in block
                        ]:
                            raise ValueError(
                                f"Unrecognized content block at "
                                f"messages[{i}].content[{j}] has 'type': 'tool_use', "
                                f"but is missing expected key(s) "
                                f"{missing}. Full content block:\n\n{block}"
                            )
                        if not any(
                            tool_call["id"] == block["id"]
                            for tool_call in cast(AIMessage, message).tool_calls
                        ):
                            cast(AIMessage, message).tool_calls.append(
                                create_tool_call(
                                    name=block["name"],
                                    id=block["id"],
                                    args=block["input"],
                                )
                            )
                    elif block.get("type") == "tool_result":
                        if missing := [
                            k for k in ("content", "tool_use_id") if k not in block
                        ]:
                            raise ValueError(
                                f"Unrecognized content block at "
                                f"messages[{i}].content[{j}] has 'type': "
                                f"'tool_result', but is missing expected key(s) "
                                f"{missing}. Full content block:\n\n{block}"
                            )
                        tool_message = ToolMessage(
                            block["content"],
                            tool_call_id=block["tool_use_id"],
                            status="error" if block.get("is_error") else "success",
                        )
                        # Recurse to make sure tool message contents are OpenAI format.
                        tool_messages.extend(
                            _format_contents_as_openai([tool_message], text=text)
                        )
                    elif (block.get("type") == "json") or "json" in block:
                        if "json" not in block:
                            raise ValueError(
                                f"Unrecognized content block at "
                                f"messages[{i}].content[{j}] has 'type': 'json' "
                                f"but does not have a 'json' key. Full "
                                f"content block:\n\n{block}"
                            )
                        content.append(
                            {"type": "text", "text": json.dumps(block["json"])}
                        )
                    elif (
                        block.get("type") == "guard_content"
                    ) or "guard_content" in block:
                        if (
                            "guard_content" not in block
                            or "text" not in block["guard_content"]
                        ):
                            raise ValueError(
                                f"Unrecognized content block at "
                                f"messages[{i}].content[{j}] has 'type': "
                                f"'guard_content' but does not have a "
                                f"messages[{i}].content[{j}]['guard_content']['text'] "
                                f"key. Full content block:\n\n{block}"
                            )
                        text = block["guard_content"]["text"]
                        if isinstance(text, dict):
                            text = text["text"]
                        content.append({"type": "text", "text": text})
                    # VertexAI format
                    elif block.get("type") == "media":
                        if missing := [
                            k for k in ("mime_type", "data") if k not in block
                        ]:
                            raise ValueError(
                                f"Unrecognized content block at "
                                f"messages[{i}].content[{j}] has 'type': "
                                f"'media' but does not have key(s) {missing}. Full "
                                f"content block:\n\n{block}"
                            )
                        if "image" not in block["mime_type"]:
                            raise ValueError(
                                f"OpenAI messages can only support text and image data."
                                f" Received content block with media of type:"
                                f" {block['mime_type']}"
                            )
                        b64_image = _bytes_to_b64_str(block["data"])
                        content.append(
                            {
                                "type": "image_url",
                                "image_url": {
                                    "url": (
                                        f"data:{block['mime_type']};base64,{b64_image}"
                                    )
                                },
                            }
                        )
                    else:
                        raise ValueError(
                            f"Unrecognized content block at "
                            f"messages[{i}].content[{j}] does not match OpenAI, "
                            f"Anthropic, Bedrock Converse, or VertexAI format. Full "
                            f"content block:\n\n{block}"
                        )
                message.content = content  # type: ignore[assignment]
        updated_messages.extend([message, *tool_messages])
    return updated_messages
 _OPTIONAL_ANTHROPIC_KEYS = ("cache_control", "is_error")
 def _format_contents_as_anthropic(
    messages: Sequence[BaseMessage], *, text: Literal["string", "block"]
 ) -> List[BaseMessage]:
    """Mutates messages so their contents match Anthropic messages API."""
    updated_messages: List = []
    for i, message in enumerate(messages):
        if isinstance(message, ToolMessage):
            tool_result_block = {
                "type": "tool_result",
                "content": message.content,
                "tool_use_id": message.tool_call_id,
                "is_error": message.status == "error",
            }
            if updated_messages and isinstance(updated_messages[-1], HumanMessage):
                if isinstance(updated_messages[-1].content, str):
                    updated_messages[-1].content = [
                        {"type": "text", "text": updated_messages[-1].content}
                    ]
                updated_messages[-1].content.append(tool_result_block)
            else:
                updated_messages.append(HumanMessage([tool_result_block]))
            continue
        elif not message.content:
            message.content = "" if text == "string" else []
        elif isinstance(message.content, str):
            if text == "string":
                pass
            else:
                message.content = [{"type": "text", "text": message.content}]
        else:
            if text == "string" and all(
                isinstance(block, str)
                or (block.get("type") == "text" and "cache_control" not in block)
                for block in message.content
            ):
                message.content = "\n".join(
                    block if isinstance(block, str) else block["text"]
                    for block in message.content
                )
            else:
                content = []
                for j, block in enumerate(message.content):
                    # OpenAI format
                    if isinstance(block, str):
                        content.append({"type": "text", "text": block})
                    elif block.get("type") == "text":
                        block_extra = {
                            k: block[k] for k in _OPTIONAL_ANTHROPIC_KEYS if k in block
                        }
                        if missing := [k for k in ("text",) if k not in block]:
                            raise ValueError(
                                f"Unrecognized content block at "
                                f"messages[{i}].content[{j}] has 'type': 'text' "
                                f"but is missing expected key(s) "
                                f"{missing}. Full content block:\n\n{block}"
                            )
                        content.append(
                            {"type": "text", "text": block["text"], **block_extra}
                        )
                    elif block.get("type") == "image_url":
                        if missing := [k for k in ("image_url",) if k not in block]:
                            raise ValueError(
                                f"Unrecognized content block at "
                                f"messages[{i}].content[{j}] has 'type': 'image_url' "
                                f"but is missing expected key(s) "
                                f"{missing}. Full content block:\n\n{block}"
                            )
                        content.append(
                            {**_openai_image_to_anthropic(block), **block_extra}
                        )
                    # Anthropic and Bedrock converse format
                    elif (block.get("type") == "image") or "image" in block:
                        # Anthropic
                        if source := block.get("source"):
                            if missing := [
                                k
                                for k in ("media_type", "type", "data")
                                if k not in source
                            ]:
                                raise ValueError(
                                    f"Unrecognized content block at "
                                    f"messages[{i}].content[{j}] has 'type': 'image' "
                                    f"but 'source' is missing expected key(s) "
                                    f"{missing}. Full content block:\n\n{block}"
                                )
                            content.append(
                                {
                                    "type": "image",
                                    "source": block["source"],
                                    **block_extra,
                                }
                            )
                        # Bedrock converse
                        elif image := block.get("image"):
                            if missing := [
                                k for k in ("source", "format") if k not in image
                            ]:
                                raise ValueError(
                                    f"Unrecognized content block at "
                                    f"messages[{i}].content[{j}] has key 'image', "
                                    f"but 'image' is missing expected key(s) "
                                    f"{missing}. Full content block:\n\n{block}"
                                )
                            content.append(
                                {
                                    **_bedrock_converse_image_to_anthropic(
                                        block["image"]
                                    ),
                                    **block_extra,
                                }
                            )
                        else:
                            raise ValueError(
                                f"Unrecognized content block at "
                                f"messages[{i}].content[{j}] has 'type': 'image' "
                                f"but does not have a 'source' or 'image' key. Full "
                                f"content block:\n\n{block}"
                            )
                    elif block.get("type") == "tool_use":
                        if missing := [
                            k for k in ("id", "name", "input") if k not in block
                        ]:
                            raise ValueError(
                                f"Unrecognized content block at "
                                f"messages[{i}].content[{j}] has 'type': 'tool_use', "
                                f"but is missing expected key(s) "
                                f"{missing}. Full content block:\n\n{block}"
                            )
                        content.append(
                            {
                                "type": "tool_use",
                                "name": block["name"],
                                "id": block["id"],
                                "input": block["input"],
                                **block_extra,
                            }
                        )
                        if not any(
                            tool_call["id"] == block["id"]
                            for tool_call in cast(AIMessage, message).tool_calls
                        ):
                            cast(AIMessage, message).tool_calls.append(
                                create_tool_call(
                                    name=block["name"],
                                    id=block["id"],
                                    args=block["input"],
                                )
                            )
                    elif block.get("type") == "tool_result":
                        if missing := [
                            k for k in ("content", "tool_use_id") if k not in block
                        ]:
                            raise ValueError(
                                f"Unrecognized content block at "
                                f"messages[{i}].content[{j}] has 'type': "
                                f"'tool_result', but is missing expected key(s) "
                                f"{missing}. Full content block:\n\n{block}"
                            )
                        content.append(
                            {
                                "type": "tool_result",
                                "content": block["content"],
                                "tool_use_id": block["tool_use_id"],
                                **block_extra,
                            }
                        )
                    elif (block.get("type") == "json") or "json" in block:
                        if "json" not in block:
                            raise ValueError(
                                f"Unrecognized content block at "
                                f"messages[{i}].content[{j}] has 'type': 'json' "
                                f"but does not have a 'json' key. Full "
                                f"content block:\n\n{block}"
                            )
                        content.append(
                            {
                                "type": "text",
                                "text": json.dumps(block["json"]),
                                **block_extra,
                            }
                        )
                    elif (
                        block.get("type") == "guard_content"
                    ) or "guard_content" in block:
                        if (
                            "guard_content" not in block
                            or "text" not in block["guard_content"]
                        ):
                            raise ValueError(
                                f"Unrecognized content block at "
                                f"messages[{i}].content[{j}] has 'type': "
                                f"'guard_content' but does not have a "
                                f"messages[{i}].content[{j}]['guard_content']['text'] "
                                f"key. Full content block:\n\n{block}"
                            )
                        text = block["guard_content"]["text"]
                        if isinstance(text, dict):
                            text = text["text"]
                        content.append({"type": "text", "text": text, **block_extra})
                    # VertexAI format
                    elif block.get("type") == "media":
                        if missing := [
                            k for k in ("mime_type", "data") if k not in block
                        ]:
                            raise ValueError(
                                f"Unrecognized content block at "
                                f"messages[{i}].content[{j}] has 'type': "
                                f"'media' but does not have key(s) {missing}. Full "
                                f"content block:\n\n{block}"
                            )
                        if "image" not in block["mime_type"]:
                            raise ValueError(
                                f"Anthropic messages can only support text and image "
                                f"data. Received content block with media of type: "
                                f"{block['mime_type']}"
                            )
                        content.append(
                            {**_vertexai_image_to_anthropic(block), **block_extra}
                        )
                    else:
                        raise ValueError(
                            f"Unrecognized content block at "
                            f"messages[{i}].content[{j}] does not match OpenAI, "
                            f"Anthropic, Bedrock Converse, or VertexAI format. Full "
                            f"content block:\n\n{block}"
                        )
                message.content = content  # type: ignore[assignment]
        updated_messages.append(message)
    return merge_message_runs(updated_messages)
 def _first_max_tokens(
    messages: Sequence[BaseMessage],
    *,
@ -1012,3 +1641,59 @@ def _is_message_type(
    types_types = tuple(t for t in types if isinstance(t, type))
    return message.type in types_str or isinstance(message, types_types)
 def _bytes_to_b64_str(bytes_: bytes) -> str:
    return base64.b64encode(bytes_).decode("utf-8")
 def _openai_image_to_anthropic(image: dict) -> Dict:
    """
    Formats an image of format data:image/jpeg;base64,{b64_string}
    to a dict for anthropic api
    {
      "type": "base64",
      "media_type": "image/jpeg",
      "data": "/9j/4AAQSkZJRg...",
    }
    And throws an error if it's not a b64 image
    """
    regex = r"^data:(?P<media_type>image/.+);base64,(?P<data>.+)$"
    match = re.match(regex, image["image_url"])
    if match is None:
        raise ValueError(
            "Anthropic only supports base64-encoded images currently."
            " Example: data:image/png;base64,'/9j/4AAQSk'..."
        )
    return {
        "type": "image",
        "source": {
            "type": "base64",
            "media_type": match.group("media_type"),
            "data": match.group("data"),
        },
    }
 def _bedrock_converse_image_to_anthropic(image: dict) -> dict:
    return {
        "type": "image",
        "source": {
            "media_type": f"image/{image['format']}",
            "type": "base64",
            "data": _bytes_to_b64_str(image["source"]["bytes"]),
        },
    }
 def _vertexai_image_to_anthropic(image: dict) -> dict:
    return {
        "type": "image",
        "source": {
            "media_type": image["mime_type"],
            "type": "base64",
            "data": _bytes_to_b64_str(image["data"]),
        },
    }
--- a/libs/core/tests/unit_tests/messages/test_utils.py
+++ b/libs/core/tests/unit_tests/messages/test_utils.py
@ -13,8 +13,10 @@ from langchain_core.messages import (
    ToolMessage,
 )
 from langchain_core.messages.utils import (
    _bytes_to_b64_str,
    convert_to_messages,
    filter_messages,
    format_content_as,
    merge_message_runs,
    trim_messages,
 )
@ -556,3 +558,222 @@ def test_convert_to_messages() -> None:
@pytest.mark.xfail(reason="AI message does not support refusal key yet.")
 def test_convert_to_messages_openai_refusal() -> None:
    convert_to_messages([{"role": "assistant", "refusal": "9.1"}])
 def create_base64_image(format: str = "jpeg") -> str:
    return f"data:image/{format};base64,/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAgGBgcGBQgHBwcJCQgKDBQNDAsLDBkSEw8UHRofHh0aHBwgJC4nICIsIxwcKDcpLDAxNDQ0Hyc5PTgyPC4zNDL/2wBDAQkJCQwLDBgNDRgyIRwhMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjL/wAARCAABAAEDASIAAhEBAxEB/8QAHwAAAQUBAQEBAQEAAAAAAAAAAAECAwQFBgcICQoL/8QAtRAAAgEDAwIEAwUFBAQAAAF9AQIDAAQRBRIhMUEGE1FhByJxFDKBkaEII0KxwRVS0fAkM2JyggkKFhcYGRolJicoKSo0NTY3ODk6Q0RFRkdISUpTVFVWV1hZWmNkZWZnaGlqc3R1dnd4eXqDhIWGh4iJipKTlJWWl5iZmqKjpKWmp6ipqrKztLW2t7i5usLDxMXGx8jJytLT1NXW19jZ2uHi4+Tl5ufo6erx8vP09fb3+Pn6/8QAHwEAAwEBAQEBAQEBAQAAAAAAAAECAwQFBgcICQoL/8QAtREAAgECBAQDBAcFBAQAAQJ3AAECAxEEBSExBhJBUQdhcRMiMoEIFEKRobHBCSMzUvAVYnLRChYkNOEl8RcYGRomJygpKjU2Nzg5OkNERUZHSElKU1RVVldYWVpjZGVmZ2hpanN0dXZ3eHl6goOEhYaHiImKkpOUlZaXmJmaoqOkpaanqKmqsrO0tba3uLm6wsPExcbHyMnK0tPU1dbX2Nna4uPk5ebn6Onq8vP09fb3+Pn6/9oADAMBAAIRAxEAPwD3+iiigD//2Q=="  # noqa: E501
 def test_format_content_as_single_message() -> None:
    message = HumanMessage(content="Hello")
    result = format_content_as(message, format="openai", text="string")
    assert isinstance(result, BaseMessage)
    assert result.content == "Hello"
 def test_format_content_as_multiple_messages() -> None:
    messages = [
        SystemMessage(content="System message"),
        HumanMessage(content="Human message"),
        AIMessage(content="AI message"),
    ]
    result = format_content_as(messages, format="openai", text="string")
    assert isinstance(result, list)
    assert len(result) == 3
    assert all(isinstance(msg, BaseMessage) for msg in result)
    assert [msg.content for msg in result] == [
        "System message",
        "Human message",
        "AI message",
    ]
 def test_format_content_as_openai_string() -> None:
    messages = [
        HumanMessage(
            content=[
                {"type": "text", "text": "Hello"},
                {"type": "text", "text": "World"},
            ]
        ),
        AIMessage(
            content=[{"type": "text", "text": "Hi"}, {"type": "text", "text": "there"}]
        ),
    ]
    result = format_content_as(messages, format="openai", text="string")
    assert [msg.content for msg in result] == ["Hello\nWorld", "Hi\nthere"]
 def test_format_content_as_openai_block() -> None:
    messages = [
        HumanMessage(content="Hello"),
        AIMessage(content="Hi there"),
    ]
    result = format_content_as(messages, format="openai", text="block")
    assert [msg.content for msg in result] == [
        [{"type": "text", "text": "Hello"}],
        [{"type": "text", "text": "Hi there"}],
    ]
 def test_format_content_as_anthropic_string() -> None:
    messages = [
        HumanMessage(
            content=[
                {"type": "text", "text": "Hello"},
                {"type": "text", "text": "World"},
            ]
        ),
        AIMessage(
            content=[{"type": "text", "text": "Hi"}, {"type": "text", "text": "there"}]
        ),
    ]
    result = format_content_as(messages, format="anthropic", text="string")
    assert [msg.content for msg in result] == ["Hello\nWorld", "Hi\nthere"]
 def test_format_content_as_anthropic_block() -> None:
    messages = [
        HumanMessage(content="Hello"),
        AIMessage(content="Hi there"),
    ]
    result = format_content_as(messages, format="anthropic", text="block")
    assert [msg.content for msg in result] == [
        [{"type": "text", "text": "Hello"}],
        [{"type": "text", "text": "Hi there"}],
    ]
 def test_format_content_as_invalid_format() -> None:
    with pytest.raises(ValueError, match="Unrecognized format="):
        format_content_as(
            [HumanMessage(content="Hello")], format="invalid", text="string"
        )
 def test_format_content_as_openai_image() -> None:
    base64_image = create_base64_image()
    messages = [
        HumanMessage(
            content=[
                {"type": "text", "text": "Here's an image:"},
                {"type": "image_url", "image_url": {"url": base64_image}},
            ]
        )
    ]
    result = format_content_as(messages, format="openai", text="block")
    assert result[0].content[1]["type"] == "image_url"
    assert result[0].content[1]["image_url"]["url"] == base64_image
 def test_format_content_as_anthropic_image() -> None:
    base64_image = create_base64_image()
    messages = [
        HumanMessage(
            content=[
                {"type": "text", "text": "Here's an image:"},
                {"type": "image_url", "image_url": base64_image},
            ]
        )
    ]
    result = format_content_as(messages, format="anthropic", text="block")
    assert result[0].content[1]["type"] == "image"
    assert result[0].content[1]["source"]["type"] == "base64"
    assert result[0].content[1]["source"]["media_type"] == "image/jpeg"
 def test_format_content_as_tool_message() -> None:
    tool_message = ToolMessage(content="Tool result", tool_call_id="123")
    result = format_content_as([tool_message], format="openai", text="block")
    assert isinstance(result[0], ToolMessage)
    assert result[0].content == [{"type": "text", "text": "Tool result"}]
    assert result[0].tool_call_id == "123"
 def test_format_content_as_tool_use() -> None:
    messages = [
        AIMessage(
            content=[
                {"type": "tool_use", "id": "123", "name": "calculator", "input": "2+2"}
            ]
        )
    ]
    result = format_content_as(messages, format="openai", text="block")
    assert result[0].tool_calls[0]["id"] == "123"
    assert result[0].tool_calls[0]["name"] == "calculator"
    assert result[0].tool_calls[0]["args"] == "2+2"
 def test_format_content_as_json() -> None:
    json_data = {"key": "value"}
    messages = [HumanMessage(content=[{"type": "json", "json": json_data}])]
    result = format_content_as(messages, format="openai", text="block")
    assert result[0].content[0]["type"] == "text"
    assert json.loads(result[0].content[0]["text"]) == json_data
 def test_format_content_as_guard_content() -> None:
    messages = [
        HumanMessage(
            content=[
                {
                    "type": "guard_content",
                    "guard_content": {"text": "Protected content"},
                }
            ]
        )
    ]
    result = format_content_as(messages, format="openai", text="block")
    assert result[0].content[0]["type"] == "text"
    assert result[0].content[0]["text"] == "Protected content"
 def test_format_content_as_vertexai_image() -> None:
    messages = [
        HumanMessage(
            content=[
                {"type": "media", "mime_type": "image/jpeg", "data": b"image_bytes"}
            ]
        )
    ]
    result = format_content_as(messages, format="openai", text="block")
    assert result[0].content[0]["type"] == "image_url"
    assert (
        result[0].content[0]["image_url"]["url"]
        == f"data:image/jpeg;base64,{_bytes_to_b64_str(b'image_bytes')}"
    )
 def test_format_content_as_invalid_block() -> None:
    messages = [HumanMessage(content=[{"type": "invalid", "foo": "bar"}])]
    with pytest.raises(ValueError, match="Unrecognized content block"):
        format_content_as(messages, format="openai", text="block")
    with pytest.raises(ValueError, match="Unrecognized content block"):
        format_content_as(messages, format="anthropic", text="block")
 def test_format_content_as_empty_message() -> None:
    result = format_content_as(HumanMessage(content=""), format="openai", text="string")
    assert result.content == ""
 def test_format_content_as_empty_list() -> None:
    result = format_content_as([], format="openai", text="string")
    assert result == []
 def test_format_content_as_mixed_content_types() -> None:
    messages = [
        HumanMessage(
            content=[
                "Text message",
                {"type": "text", "text": "Structured text"},
                {"type": "image_url", "image_url": create_base64_image()},
            ]
        )
    ]
    result = format_content_as(messages, format="openai", text="block")
    assert len(result[0].content) == 3
    assert isinstance(result[0].content[0], dict)
    assert isinstance(result[0].content[1], dict)
    assert isinstance(result[0].content[2], dict)