core, openai: support standard multi-modal blocks in convert_to_openai_messages (#30968)

2025-08-17 08:29:28 +00:00 · 2025-04-23 11:20:44 -04:00 · 2025-04-23 11:20:44 -04:00 · 4bc70766b5
commit 4bc70766b5
parent e4877e5ef1
6 changed files with 104 additions and 41 deletions
--- a/libs/core/langchain_core/messages/init.py
+++ b/libs/core/langchain_core/messages/init.py
@ -33,6 +33,7 @@ if TYPE_CHECKING:
    )
    from langchain_core.messages.chat import ChatMessage, ChatMessageChunk
    from langchain_core.messages.content_blocks import (
        convert_to_openai_data_block,
        convert_to_openai_image_block,
        is_data_content_block,
    )
@ -83,6 +84,7 @@ __all__ = (
    "ToolMessageChunk",
    "RemoveMessage",
    "_message_from_dict",
    "convert_to_openai_data_block",
    "convert_to_openai_image_block",
    "convert_to_messages",
    "get_buffer_string",
@ -124,6 +126,7 @@ _dynamic_imports = {
    "MessageLikeRepresentation": "utils",
    "_message_from_dict": "utils",
    "convert_to_messages": "utils",
    "convert_to_openai_data_block": "content_blocks",
    "convert_to_openai_image_block": "content_blocks",
    "convert_to_openai_messages": "utils",
    "filter_messages": "utils",
--- a/libs/core/langchain_core/messages/content_blocks.py
+++ b/libs/core/langchain_core/messages/content_blocks.py
@ -1,5 +1,6 @@
 """Types for content blocks."""
 import warnings
 from typing import Any, Literal, Union
 from pydantic import TypeAdapter, ValidationError
@ -108,3 +109,47 @@ def convert_to_openai_image_block(content_block: dict[str, Any]) -> dict:
        }
    error_message = "Unsupported source type. Only 'url' and 'base64' are supported."
    raise ValueError(error_message)
 def convert_to_openai_data_block(block: dict) -> dict:
    """Format standard data content block to format expected by OpenAI."""
    if block["type"] == "image":
        formatted_block = convert_to_openai_image_block(block)
    elif block["type"] == "file":
        if block["source_type"] == "base64":
            file = {"file_data": f"data:{block['mime_type']};base64,{block['data']}"}
            if filename := block.get("filename"):
                file["filename"] = filename
            elif (metadata := block.get("metadata")) and ("filename" in metadata):
                file["filename"] = metadata["filename"]
            else:
                warnings.warn(
                    "OpenAI may require a filename for file inputs. Specify a filename "
                    "in the content block: {'type': 'file', 'source_type': 'base64', "
                    "'mime_type': 'application/pdf', 'data': '...', "
                    "'filename': 'my-pdf'}",
                    stacklevel=1,
                )
            formatted_block = {"type": "file", "file": file}
        elif block["source_type"] == "id":
            formatted_block = {"type": "file", "file": {"file_id": block["id"]}}
        else:
            error_msg = "source_type base64 or id is required for file blocks."
            raise ValueError(error_msg)
    elif block["type"] == "audio":
        if block["source_type"] == "base64":
            format = block["mime_type"].split("/")[-1]
            formatted_block = {
                "type": "input_audio",
                "input_audio": {"data": block["data"], "format": format},
            }
        else:
            error_msg = "source_type base64 is required for audio blocks."
            raise ValueError(error_msg)
    else:
        error_msg = f"Block of type {block['type']} is not supported."
        raise ValueError(error_msg)
    return formatted_block
--- a/libs/core/langchain_core/messages/utils.py
+++ b/libs/core/langchain_core/messages/utils.py
@ -30,6 +30,7 @@ from typing import (
 from pydantic import Discriminator, Field, Tag
 from langchain_core.exceptions import ErrorCode, create_message
 from langchain_core.messages import convert_to_openai_data_block, is_data_content_block
 from langchain_core.messages.ai import AIMessage, AIMessageChunk
 from langchain_core.messages.base import BaseMessage, BaseMessageChunk
 from langchain_core.messages.chat import ChatMessage, ChatMessageChunk
@ -1067,6 +1068,9 @@ def convert_to_openai_messages(
                            "image_url": block["image_url"],
                        }
                    )
                # Standard multi-modal content block
                elif is_data_content_block(block):
                    content.append(convert_to_openai_data_block(block))
                # Anthropic and Bedrock converse format
                elif (block.get("type") == "image") or "image" in block:
                    # Anthropic
--- a/libs/core/tests/unit_tests/messages/test_imports.py
+++ b/libs/core/tests/unit_tests/messages/test_imports.py
@ -33,6 +33,7 @@ EXPECTED_ALL = [
    "filter_messages",
    "merge_message_runs",
    "trim_messages",
    "convert_to_openai_data_block",
    "convert_to_openai_image_block",
    "convert_to_openai_messages",
 ]
--- a/libs/core/tests/unit_tests/messages/test_utils.py
+++ b/libs/core/tests/unit_tests/messages/test_utils.py
@ -1186,6 +1186,55 @@ def test_convert_to_openai_messages_developer() -> None:
    assert result == [{"role": "developer", "content": "a"}] * 2
 def test_convert_to_openai_messages_multimodal() -> None:
    messages = [
        HumanMessage(
            content=[
                {"type": "text", "text": "Text message"},
                {
                    "type": "image",
                    "source_type": "url",
                    "url": "https://example.com/test.png",
                },
                {
                    "type": "image",
                    "source_type": "base64",
                    "data": "<base64 string>",
                    "mime_type": "image/png",
                },
                {
                    "type": "file",
                    "source_type": "base64",
                    "data": "<base64 string>",
                    "mime_type": "application/pdf",
                },
                {
                    "type": "file",
                    "source_type": "base64",
                    "data": "<base64 string>",
                    "mime_type": "application/pdf",
                    "filename": "test.pdf",
                },
                {
                    "type": "file",
                    "source_type": "id",
                    "id": "file-abc123",
                },
                {
                    "type": "audio",
                    "source_type": "base64",
                    "data": "<base64 string>",
                    "mime_type": "audio/wav",
                },
            ]
        )
    ]
    result = convert_to_openai_messages(messages, text_format="block")
    assert len(result) == 1
    message = result[0]
    assert len(message["content"]) == 7
 def test_count_tokens_approximately_empty_messages() -> None:
    # Test with empty message list
    assert count_tokens_approximately([]) == 0
--- a/libs/partners/openai/langchain_openai/chat_models/base.py
+++ b/libs/partners/openai/langchain_openai/chat_models/base.py
@ -61,7 +61,7 @@ from langchain_core.messages import (
    ToolCall,
    ToolMessage,
    ToolMessageChunk,
-    convert_to_openai_image_block,
+    convert_to_openai_data_block,
    is_data_content_block,
 )
 from langchain_core.messages.ai import (
@ -186,45 +186,6 @@ def _convert_dict_to_message(_dict: Mapping[str, Any]) -> BaseMessage:
        return ChatMessage(content=_dict.get("content", ""), role=role, id=id_)  # type: ignore[arg-type]
 def _format_data_content_block(block: dict) -> dict:
    """Format standard data content block to format expected by OpenAI."""
    if block["type"] == "image":
        formatted_block = convert_to_openai_image_block(block)
    elif block["type"] == "file":
        if block["source_type"] == "base64":
            file = {"file_data": f"data:{block['mime_type']};base64,{block['data']}"}
            if filename := block.get("filename"):
                file["filename"] = filename
            elif (metadata := block.get("metadata")) and ("filename" in metadata):
                file["filename"] = metadata["filename"]
            else:
                warnings.warn(
                    "OpenAI may require a filename for file inputs. Specify a filename "
                    "in the content block: {'type': 'file', 'source_type': 'base64', "
                    "'mime_type': 'application/pdf', 'data': '...', "
                    "'filename': 'my-pdf'}"
                )
            formatted_block = {"type": "file", "file": file}
        elif block["source_type"] == "id":
            formatted_block = {"type": "file", "file": {"file_id": block["id"]}}
        else:
            raise ValueError("source_type base64 or id is required for file blocks.")
    elif block["type"] == "audio":
        if block["source_type"] == "base64":
            format = block["mime_type"].split("/")[-1]
            formatted_block = {
                "type": "input_audio",
                "input_audio": {"data": block["data"], "format": format},
            }
        else:
            raise ValueError("source_type base64 is required for audio blocks.")
    else:
        raise ValueError(f"Block of type {block['type']} is not supported.")
    return formatted_block
 def _format_message_content(content: Any) -> Any:
    """Format message content."""
    if content and isinstance(content, list):
@ -238,7 +199,7 @@ def _format_message_content(content: Any) -> Any:
            ):
                continue
            elif isinstance(block, dict) and is_data_content_block(block):
-                formatted_content.append(_format_data_content_block(block))
+                formatted_content.append(convert_to_openai_data_block(block))
            # Anthropic image blocks
            elif (
                isinstance(block, dict)