From 4bc70766b583837669fec9d3393a45e24d226369 Mon Sep 17 00:00:00 2001
From: ccurme <chester.curme@gmail.com>
Date: Wed, 23 Apr 2025 11:20:44 -0400
Subject: [PATCH] core, openai: support standard multi-modal blocks in
 convert_to_openai_messages (#30968)

---
 libs/core/langchain_core/messages/__init__.py |  3 ++
 .../langchain_core/messages/content_blocks.py | 45 +++++++++++++++++
 libs/core/langchain_core/messages/utils.py    |  4 ++
 .../tests/unit_tests/messages/test_imports.py |  1 +
 .../tests/unit_tests/messages/test_utils.py   | 49 +++++++++++++++++++
 .../langchain_openai/chat_models/base.py      | 43 +---------------
 6 files changed, 104 insertions(+), 41 deletions(-)

diff --git a/libs/core/langchain_core/messages/__init__.py b/libs/core/langchain_core/messages/__init__.py
index d4e22138cef..a36042ba86d 100644
--- a/libs/core/langchain_core/messages/__init__.py
+++ b/libs/core/langchain_core/messages/__init__.py
@@ -33,6 +33,7 @@ if TYPE_CHECKING:
     )
     from langchain_core.messages.chat import ChatMessage, ChatMessageChunk
     from langchain_core.messages.content_blocks import (
+        convert_to_openai_data_block,
         convert_to_openai_image_block,
         is_data_content_block,
     )
@@ -83,6 +84,7 @@ __all__ = (
     "ToolMessageChunk",
     "RemoveMessage",
     "_message_from_dict",
+    "convert_to_openai_data_block",
     "convert_to_openai_image_block",
     "convert_to_messages",
     "get_buffer_string",
@@ -124,6 +126,7 @@ _dynamic_imports = {
     "MessageLikeRepresentation": "utils",
     "_message_from_dict": "utils",
     "convert_to_messages": "utils",
+    "convert_to_openai_data_block": "content_blocks",
     "convert_to_openai_image_block": "content_blocks",
     "convert_to_openai_messages": "utils",
     "filter_messages": "utils",
diff --git a/libs/core/langchain_core/messages/content_blocks.py b/libs/core/langchain_core/messages/content_blocks.py
index 3e3ee11a7ea..cbd9bfa2f00 100644
--- a/libs/core/langchain_core/messages/content_blocks.py
+++ b/libs/core/langchain_core/messages/content_blocks.py
@@ -1,5 +1,6 @@
 """Types for content blocks."""
 
+import warnings
 from typing import Any, Literal, Union
 
 from pydantic import TypeAdapter, ValidationError
@@ -108,3 +109,47 @@ def convert_to_openai_image_block(content_block: dict[str, Any]) -> dict:
         }
     error_message = "Unsupported source type. Only 'url' and 'base64' are supported."
     raise ValueError(error_message)
+
+
+def convert_to_openai_data_block(block: dict) -> dict:
+    """Format standard data content block to format expected by OpenAI."""
+    if block["type"] == "image":
+        formatted_block = convert_to_openai_image_block(block)
+
+    elif block["type"] == "file":
+        if block["source_type"] == "base64":
+            file = {"file_data": f"data:{block['mime_type']};base64,{block['data']}"}
+            if filename := block.get("filename"):
+                file["filename"] = filename
+            elif (metadata := block.get("metadata")) and ("filename" in metadata):
+                file["filename"] = metadata["filename"]
+            else:
+                warnings.warn(
+                    "OpenAI may require a filename for file inputs. Specify a filename "
+                    "in the content block: {'type': 'file', 'source_type': 'base64', "
+                    "'mime_type': 'application/pdf', 'data': '...', "
+                    "'filename': 'my-pdf'}",
+                    stacklevel=1,
+                )
+            formatted_block = {"type": "file", "file": file}
+        elif block["source_type"] == "id":
+            formatted_block = {"type": "file", "file": {"file_id": block["id"]}}
+        else:
+            error_msg = "source_type base64 or id is required for file blocks."
+            raise ValueError(error_msg)
+
+    elif block["type"] == "audio":
+        if block["source_type"] == "base64":
+            format = block["mime_type"].split("/")[-1]
+            formatted_block = {
+                "type": "input_audio",
+                "input_audio": {"data": block["data"], "format": format},
+            }
+        else:
+            error_msg = "source_type base64 is required for audio blocks."
+            raise ValueError(error_msg)
+    else:
+        error_msg = f"Block of type {block['type']} is not supported."
+        raise ValueError(error_msg)
+
+    return formatted_block
diff --git a/libs/core/langchain_core/messages/utils.py b/libs/core/langchain_core/messages/utils.py
index 92a658afd23..6582f298abe 100644
--- a/libs/core/langchain_core/messages/utils.py
+++ b/libs/core/langchain_core/messages/utils.py
@@ -30,6 +30,7 @@ from typing import (
 from pydantic import Discriminator, Field, Tag
 
 from langchain_core.exceptions import ErrorCode, create_message
+from langchain_core.messages import convert_to_openai_data_block, is_data_content_block
 from langchain_core.messages.ai import AIMessage, AIMessageChunk
 from langchain_core.messages.base import BaseMessage, BaseMessageChunk
 from langchain_core.messages.chat import ChatMessage, ChatMessageChunk
@@ -1067,6 +1068,9 @@ def convert_to_openai_messages(
                             "image_url": block["image_url"],
                         }
                     )
+                # Standard multi-modal content block
+                elif is_data_content_block(block):
+                    content.append(convert_to_openai_data_block(block))
                 # Anthropic and Bedrock converse format
                 elif (block.get("type") == "image") or "image" in block:
                     # Anthropic
diff --git a/libs/core/tests/unit_tests/messages/test_imports.py b/libs/core/tests/unit_tests/messages/test_imports.py
index 1b97e647e01..ff9fbf92fc7 100644
--- a/libs/core/tests/unit_tests/messages/test_imports.py
+++ b/libs/core/tests/unit_tests/messages/test_imports.py
@@ -33,6 +33,7 @@ EXPECTED_ALL = [
     "filter_messages",
     "merge_message_runs",
     "trim_messages",
+    "convert_to_openai_data_block",
     "convert_to_openai_image_block",
     "convert_to_openai_messages",
 ]
diff --git a/libs/core/tests/unit_tests/messages/test_utils.py b/libs/core/tests/unit_tests/messages/test_utils.py
index 9bc7b35bbfd..9031e1be5da 100644
--- a/libs/core/tests/unit_tests/messages/test_utils.py
+++ b/libs/core/tests/unit_tests/messages/test_utils.py
@@ -1186,6 +1186,55 @@ def test_convert_to_openai_messages_developer() -> None:
     assert result == [{"role": "developer", "content": "a"}] * 2
 
 
+def test_convert_to_openai_messages_multimodal() -> None:
+    messages = [
+        HumanMessage(
+            content=[
+                {"type": "text", "text": "Text message"},
+                {
+                    "type": "image",
+                    "source_type": "url",
+                    "url": "https://example.com/test.png",
+                },
+                {
+                    "type": "image",
+                    "source_type": "base64",
+                    "data": "<base64 string>",
+                    "mime_type": "image/png",
+                },
+                {
+                    "type": "file",
+                    "source_type": "base64",
+                    "data": "<base64 string>",
+                    "mime_type": "application/pdf",
+                },
+                {
+                    "type": "file",
+                    "source_type": "base64",
+                    "data": "<base64 string>",
+                    "mime_type": "application/pdf",
+                    "filename": "test.pdf",
+                },
+                {
+                    "type": "file",
+                    "source_type": "id",
+                    "id": "file-abc123",
+                },
+                {
+                    "type": "audio",
+                    "source_type": "base64",
+                    "data": "<base64 string>",
+                    "mime_type": "audio/wav",
+                },
+            ]
+        )
+    ]
+    result = convert_to_openai_messages(messages, text_format="block")
+    assert len(result) == 1
+    message = result[0]
+    assert len(message["content"]) == 7
+
+
 def test_count_tokens_approximately_empty_messages() -> None:
     # Test with empty message list
     assert count_tokens_approximately([]) == 0
diff --git a/libs/partners/openai/langchain_openai/chat_models/base.py b/libs/partners/openai/langchain_openai/chat_models/base.py
index 16afa1a1023..3c6a9ba6871 100644
--- a/libs/partners/openai/langchain_openai/chat_models/base.py
+++ b/libs/partners/openai/langchain_openai/chat_models/base.py
@@ -61,7 +61,7 @@ from langchain_core.messages import (
     ToolCall,
     ToolMessage,
     ToolMessageChunk,
-    convert_to_openai_image_block,
+    convert_to_openai_data_block,
     is_data_content_block,
 )
 from langchain_core.messages.ai import (
@@ -186,45 +186,6 @@ def _convert_dict_to_message(_dict: Mapping[str, Any]) -> BaseMessage:
         return ChatMessage(content=_dict.get("content", ""), role=role, id=id_)  # type: ignore[arg-type]
 
 
-def _format_data_content_block(block: dict) -> dict:
-    """Format standard data content block to format expected by OpenAI."""
-    if block["type"] == "image":
-        formatted_block = convert_to_openai_image_block(block)
-
-    elif block["type"] == "file":
-        if block["source_type"] == "base64":
-            file = {"file_data": f"data:{block['mime_type']};base64,{block['data']}"}
-            if filename := block.get("filename"):
-                file["filename"] = filename
-            elif (metadata := block.get("metadata")) and ("filename" in metadata):
-                file["filename"] = metadata["filename"]
-            else:
-                warnings.warn(
-                    "OpenAI may require a filename for file inputs. Specify a filename "
-                    "in the content block: {'type': 'file', 'source_type': 'base64', "
-                    "'mime_type': 'application/pdf', 'data': '...', "
-                    "'filename': 'my-pdf'}"
-                )
-            formatted_block = {"type": "file", "file": file}
-        elif block["source_type"] == "id":
-            formatted_block = {"type": "file", "file": {"file_id": block["id"]}}
-        else:
-            raise ValueError("source_type base64 or id is required for file blocks.")
-    elif block["type"] == "audio":
-        if block["source_type"] == "base64":
-            format = block["mime_type"].split("/")[-1]
-            formatted_block = {
-                "type": "input_audio",
-                "input_audio": {"data": block["data"], "format": format},
-            }
-        else:
-            raise ValueError("source_type base64 is required for audio blocks.")
-    else:
-        raise ValueError(f"Block of type {block['type']} is not supported.")
-
-    return formatted_block
-
-
 def _format_message_content(content: Any) -> Any:
     """Format message content."""
     if content and isinstance(content, list):
@@ -238,7 +199,7 @@ def _format_message_content(content: Any) -> Any:
             ):
                 continue
             elif isinstance(block, dict) and is_data_content_block(block):
-                formatted_content.append(_format_data_content_block(block))
+                formatted_content.append(convert_to_openai_data_block(block))
             # Anthropic image blocks
             elif (
                 isinstance(block, dict)