From 4bc70766b583837669fec9d3393a45e24d226369 Mon Sep 17 00:00:00 2001 From: ccurme Date: Wed, 23 Apr 2025 11:20:44 -0400 Subject: [PATCH] core, openai: support standard multi-modal blocks in convert_to_openai_messages (#30968) --- libs/core/langchain_core/messages/__init__.py | 3 ++ .../langchain_core/messages/content_blocks.py | 45 +++++++++++++++++ libs/core/langchain_core/messages/utils.py | 4 ++ .../tests/unit_tests/messages/test_imports.py | 1 + .../tests/unit_tests/messages/test_utils.py | 49 +++++++++++++++++++ .../langchain_openai/chat_models/base.py | 43 +--------------- 6 files changed, 104 insertions(+), 41 deletions(-) diff --git a/libs/core/langchain_core/messages/__init__.py b/libs/core/langchain_core/messages/__init__.py index d4e22138cef..a36042ba86d 100644 --- a/libs/core/langchain_core/messages/__init__.py +++ b/libs/core/langchain_core/messages/__init__.py @@ -33,6 +33,7 @@ if TYPE_CHECKING: ) from langchain_core.messages.chat import ChatMessage, ChatMessageChunk from langchain_core.messages.content_blocks import ( + convert_to_openai_data_block, convert_to_openai_image_block, is_data_content_block, ) @@ -83,6 +84,7 @@ __all__ = ( "ToolMessageChunk", "RemoveMessage", "_message_from_dict", + "convert_to_openai_data_block", "convert_to_openai_image_block", "convert_to_messages", "get_buffer_string", @@ -124,6 +126,7 @@ _dynamic_imports = { "MessageLikeRepresentation": "utils", "_message_from_dict": "utils", "convert_to_messages": "utils", + "convert_to_openai_data_block": "content_blocks", "convert_to_openai_image_block": "content_blocks", "convert_to_openai_messages": "utils", "filter_messages": "utils", diff --git a/libs/core/langchain_core/messages/content_blocks.py b/libs/core/langchain_core/messages/content_blocks.py index 3e3ee11a7ea..cbd9bfa2f00 100644 --- a/libs/core/langchain_core/messages/content_blocks.py +++ b/libs/core/langchain_core/messages/content_blocks.py @@ -1,5 +1,6 @@ """Types for content blocks.""" +import warnings from typing import Any, Literal, Union from pydantic import TypeAdapter, ValidationError @@ -108,3 +109,47 @@ def convert_to_openai_image_block(content_block: dict[str, Any]) -> dict: } error_message = "Unsupported source type. Only 'url' and 'base64' are supported." raise ValueError(error_message) + + +def convert_to_openai_data_block(block: dict) -> dict: + """Format standard data content block to format expected by OpenAI.""" + if block["type"] == "image": + formatted_block = convert_to_openai_image_block(block) + + elif block["type"] == "file": + if block["source_type"] == "base64": + file = {"file_data": f"data:{block['mime_type']};base64,{block['data']}"} + if filename := block.get("filename"): + file["filename"] = filename + elif (metadata := block.get("metadata")) and ("filename" in metadata): + file["filename"] = metadata["filename"] + else: + warnings.warn( + "OpenAI may require a filename for file inputs. Specify a filename " + "in the content block: {'type': 'file', 'source_type': 'base64', " + "'mime_type': 'application/pdf', 'data': '...', " + "'filename': 'my-pdf'}", + stacklevel=1, + ) + formatted_block = {"type": "file", "file": file} + elif block["source_type"] == "id": + formatted_block = {"type": "file", "file": {"file_id": block["id"]}} + else: + error_msg = "source_type base64 or id is required for file blocks." + raise ValueError(error_msg) + + elif block["type"] == "audio": + if block["source_type"] == "base64": + format = block["mime_type"].split("/")[-1] + formatted_block = { + "type": "input_audio", + "input_audio": {"data": block["data"], "format": format}, + } + else: + error_msg = "source_type base64 is required for audio blocks." + raise ValueError(error_msg) + else: + error_msg = f"Block of type {block['type']} is not supported." + raise ValueError(error_msg) + + return formatted_block diff --git a/libs/core/langchain_core/messages/utils.py b/libs/core/langchain_core/messages/utils.py index 92a658afd23..6582f298abe 100644 --- a/libs/core/langchain_core/messages/utils.py +++ b/libs/core/langchain_core/messages/utils.py @@ -30,6 +30,7 @@ from typing import ( from pydantic import Discriminator, Field, Tag from langchain_core.exceptions import ErrorCode, create_message +from langchain_core.messages import convert_to_openai_data_block, is_data_content_block from langchain_core.messages.ai import AIMessage, AIMessageChunk from langchain_core.messages.base import BaseMessage, BaseMessageChunk from langchain_core.messages.chat import ChatMessage, ChatMessageChunk @@ -1067,6 +1068,9 @@ def convert_to_openai_messages( "image_url": block["image_url"], } ) + # Standard multi-modal content block + elif is_data_content_block(block): + content.append(convert_to_openai_data_block(block)) # Anthropic and Bedrock converse format elif (block.get("type") == "image") or "image" in block: # Anthropic diff --git a/libs/core/tests/unit_tests/messages/test_imports.py b/libs/core/tests/unit_tests/messages/test_imports.py index 1b97e647e01..ff9fbf92fc7 100644 --- a/libs/core/tests/unit_tests/messages/test_imports.py +++ b/libs/core/tests/unit_tests/messages/test_imports.py @@ -33,6 +33,7 @@ EXPECTED_ALL = [ "filter_messages", "merge_message_runs", "trim_messages", + "convert_to_openai_data_block", "convert_to_openai_image_block", "convert_to_openai_messages", ] diff --git a/libs/core/tests/unit_tests/messages/test_utils.py b/libs/core/tests/unit_tests/messages/test_utils.py index 9bc7b35bbfd..9031e1be5da 100644 --- a/libs/core/tests/unit_tests/messages/test_utils.py +++ b/libs/core/tests/unit_tests/messages/test_utils.py @@ -1186,6 +1186,55 @@ def test_convert_to_openai_messages_developer() -> None: assert result == [{"role": "developer", "content": "a"}] * 2 +def test_convert_to_openai_messages_multimodal() -> None: + messages = [ + HumanMessage( + content=[ + {"type": "text", "text": "Text message"}, + { + "type": "image", + "source_type": "url", + "url": "https://example.com/test.png", + }, + { + "type": "image", + "source_type": "base64", + "data": "", + "mime_type": "image/png", + }, + { + "type": "file", + "source_type": "base64", + "data": "", + "mime_type": "application/pdf", + }, + { + "type": "file", + "source_type": "base64", + "data": "", + "mime_type": "application/pdf", + "filename": "test.pdf", + }, + { + "type": "file", + "source_type": "id", + "id": "file-abc123", + }, + { + "type": "audio", + "source_type": "base64", + "data": "", + "mime_type": "audio/wav", + }, + ] + ) + ] + result = convert_to_openai_messages(messages, text_format="block") + assert len(result) == 1 + message = result[0] + assert len(message["content"]) == 7 + + def test_count_tokens_approximately_empty_messages() -> None: # Test with empty message list assert count_tokens_approximately([]) == 0 diff --git a/libs/partners/openai/langchain_openai/chat_models/base.py b/libs/partners/openai/langchain_openai/chat_models/base.py index 16afa1a1023..3c6a9ba6871 100644 --- a/libs/partners/openai/langchain_openai/chat_models/base.py +++ b/libs/partners/openai/langchain_openai/chat_models/base.py @@ -61,7 +61,7 @@ from langchain_core.messages import ( ToolCall, ToolMessage, ToolMessageChunk, - convert_to_openai_image_block, + convert_to_openai_data_block, is_data_content_block, ) from langchain_core.messages.ai import ( @@ -186,45 +186,6 @@ def _convert_dict_to_message(_dict: Mapping[str, Any]) -> BaseMessage: return ChatMessage(content=_dict.get("content", ""), role=role, id=id_) # type: ignore[arg-type] -def _format_data_content_block(block: dict) -> dict: - """Format standard data content block to format expected by OpenAI.""" - if block["type"] == "image": - formatted_block = convert_to_openai_image_block(block) - - elif block["type"] == "file": - if block["source_type"] == "base64": - file = {"file_data": f"data:{block['mime_type']};base64,{block['data']}"} - if filename := block.get("filename"): - file["filename"] = filename - elif (metadata := block.get("metadata")) and ("filename" in metadata): - file["filename"] = metadata["filename"] - else: - warnings.warn( - "OpenAI may require a filename for file inputs. Specify a filename " - "in the content block: {'type': 'file', 'source_type': 'base64', " - "'mime_type': 'application/pdf', 'data': '...', " - "'filename': 'my-pdf'}" - ) - formatted_block = {"type": "file", "file": file} - elif block["source_type"] == "id": - formatted_block = {"type": "file", "file": {"file_id": block["id"]}} - else: - raise ValueError("source_type base64 or id is required for file blocks.") - elif block["type"] == "audio": - if block["source_type"] == "base64": - format = block["mime_type"].split("/")[-1] - formatted_block = { - "type": "input_audio", - "input_audio": {"data": block["data"], "format": format}, - } - else: - raise ValueError("source_type base64 is required for audio blocks.") - else: - raise ValueError(f"Block of type {block['type']} is not supported.") - - return formatted_block - - def _format_message_content(content: Any) -> Any: """Format message content.""" if content and isinstance(content, list): @@ -238,7 +199,7 @@ def _format_message_content(content: Any) -> Any: ): continue elif isinstance(block, dict) and is_data_content_block(block): - formatted_content.append(_format_data_content_block(block)) + formatted_content.append(convert_to_openai_data_block(block)) # Anthropic image blocks elif ( isinstance(block, dict)