core, openai: support standard multi-modal blocks in convert_to_openai_messages (#30968)

This commit is contained in:
ccurme 2025-04-23 11:20:44 -04:00 committed by GitHub
parent e4877e5ef1
commit 4bc70766b5
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 104 additions and 41 deletions

View File

@ -33,6 +33,7 @@ if TYPE_CHECKING:
) )
from langchain_core.messages.chat import ChatMessage, ChatMessageChunk from langchain_core.messages.chat import ChatMessage, ChatMessageChunk
from langchain_core.messages.content_blocks import ( from langchain_core.messages.content_blocks import (
convert_to_openai_data_block,
convert_to_openai_image_block, convert_to_openai_image_block,
is_data_content_block, is_data_content_block,
) )
@ -83,6 +84,7 @@ __all__ = (
"ToolMessageChunk", "ToolMessageChunk",
"RemoveMessage", "RemoveMessage",
"_message_from_dict", "_message_from_dict",
"convert_to_openai_data_block",
"convert_to_openai_image_block", "convert_to_openai_image_block",
"convert_to_messages", "convert_to_messages",
"get_buffer_string", "get_buffer_string",
@ -124,6 +126,7 @@ _dynamic_imports = {
"MessageLikeRepresentation": "utils", "MessageLikeRepresentation": "utils",
"_message_from_dict": "utils", "_message_from_dict": "utils",
"convert_to_messages": "utils", "convert_to_messages": "utils",
"convert_to_openai_data_block": "content_blocks",
"convert_to_openai_image_block": "content_blocks", "convert_to_openai_image_block": "content_blocks",
"convert_to_openai_messages": "utils", "convert_to_openai_messages": "utils",
"filter_messages": "utils", "filter_messages": "utils",

View File

@ -1,5 +1,6 @@
"""Types for content blocks.""" """Types for content blocks."""
import warnings
from typing import Any, Literal, Union from typing import Any, Literal, Union
from pydantic import TypeAdapter, ValidationError from pydantic import TypeAdapter, ValidationError
@ -108,3 +109,47 @@ def convert_to_openai_image_block(content_block: dict[str, Any]) -> dict:
} }
error_message = "Unsupported source type. Only 'url' and 'base64' are supported." error_message = "Unsupported source type. Only 'url' and 'base64' are supported."
raise ValueError(error_message) raise ValueError(error_message)
def convert_to_openai_data_block(block: dict) -> dict:
"""Format standard data content block to format expected by OpenAI."""
if block["type"] == "image":
formatted_block = convert_to_openai_image_block(block)
elif block["type"] == "file":
if block["source_type"] == "base64":
file = {"file_data": f"data:{block['mime_type']};base64,{block['data']}"}
if filename := block.get("filename"):
file["filename"] = filename
elif (metadata := block.get("metadata")) and ("filename" in metadata):
file["filename"] = metadata["filename"]
else:
warnings.warn(
"OpenAI may require a filename for file inputs. Specify a filename "
"in the content block: {'type': 'file', 'source_type': 'base64', "
"'mime_type': 'application/pdf', 'data': '...', "
"'filename': 'my-pdf'}",
stacklevel=1,
)
formatted_block = {"type": "file", "file": file}
elif block["source_type"] == "id":
formatted_block = {"type": "file", "file": {"file_id": block["id"]}}
else:
error_msg = "source_type base64 or id is required for file blocks."
raise ValueError(error_msg)
elif block["type"] == "audio":
if block["source_type"] == "base64":
format = block["mime_type"].split("/")[-1]
formatted_block = {
"type": "input_audio",
"input_audio": {"data": block["data"], "format": format},
}
else:
error_msg = "source_type base64 is required for audio blocks."
raise ValueError(error_msg)
else:
error_msg = f"Block of type {block['type']} is not supported."
raise ValueError(error_msg)
return formatted_block

View File

@ -30,6 +30,7 @@ from typing import (
from pydantic import Discriminator, Field, Tag from pydantic import Discriminator, Field, Tag
from langchain_core.exceptions import ErrorCode, create_message from langchain_core.exceptions import ErrorCode, create_message
from langchain_core.messages import convert_to_openai_data_block, is_data_content_block
from langchain_core.messages.ai import AIMessage, AIMessageChunk from langchain_core.messages.ai import AIMessage, AIMessageChunk
from langchain_core.messages.base import BaseMessage, BaseMessageChunk from langchain_core.messages.base import BaseMessage, BaseMessageChunk
from langchain_core.messages.chat import ChatMessage, ChatMessageChunk from langchain_core.messages.chat import ChatMessage, ChatMessageChunk
@ -1067,6 +1068,9 @@ def convert_to_openai_messages(
"image_url": block["image_url"], "image_url": block["image_url"],
} }
) )
# Standard multi-modal content block
elif is_data_content_block(block):
content.append(convert_to_openai_data_block(block))
# Anthropic and Bedrock converse format # Anthropic and Bedrock converse format
elif (block.get("type") == "image") or "image" in block: elif (block.get("type") == "image") or "image" in block:
# Anthropic # Anthropic

View File

@ -33,6 +33,7 @@ EXPECTED_ALL = [
"filter_messages", "filter_messages",
"merge_message_runs", "merge_message_runs",
"trim_messages", "trim_messages",
"convert_to_openai_data_block",
"convert_to_openai_image_block", "convert_to_openai_image_block",
"convert_to_openai_messages", "convert_to_openai_messages",
] ]

View File

@ -1186,6 +1186,55 @@ def test_convert_to_openai_messages_developer() -> None:
assert result == [{"role": "developer", "content": "a"}] * 2 assert result == [{"role": "developer", "content": "a"}] * 2
def test_convert_to_openai_messages_multimodal() -> None:
messages = [
HumanMessage(
content=[
{"type": "text", "text": "Text message"},
{
"type": "image",
"source_type": "url",
"url": "https://example.com/test.png",
},
{
"type": "image",
"source_type": "base64",
"data": "<base64 string>",
"mime_type": "image/png",
},
{
"type": "file",
"source_type": "base64",
"data": "<base64 string>",
"mime_type": "application/pdf",
},
{
"type": "file",
"source_type": "base64",
"data": "<base64 string>",
"mime_type": "application/pdf",
"filename": "test.pdf",
},
{
"type": "file",
"source_type": "id",
"id": "file-abc123",
},
{
"type": "audio",
"source_type": "base64",
"data": "<base64 string>",
"mime_type": "audio/wav",
},
]
)
]
result = convert_to_openai_messages(messages, text_format="block")
assert len(result) == 1
message = result[0]
assert len(message["content"]) == 7
def test_count_tokens_approximately_empty_messages() -> None: def test_count_tokens_approximately_empty_messages() -> None:
# Test with empty message list # Test with empty message list
assert count_tokens_approximately([]) == 0 assert count_tokens_approximately([]) == 0

View File

@ -61,7 +61,7 @@ from langchain_core.messages import (
ToolCall, ToolCall,
ToolMessage, ToolMessage,
ToolMessageChunk, ToolMessageChunk,
convert_to_openai_image_block, convert_to_openai_data_block,
is_data_content_block, is_data_content_block,
) )
from langchain_core.messages.ai import ( from langchain_core.messages.ai import (
@ -186,45 +186,6 @@ def _convert_dict_to_message(_dict: Mapping[str, Any]) -> BaseMessage:
return ChatMessage(content=_dict.get("content", ""), role=role, id=id_) # type: ignore[arg-type] return ChatMessage(content=_dict.get("content", ""), role=role, id=id_) # type: ignore[arg-type]
def _format_data_content_block(block: dict) -> dict:
"""Format standard data content block to format expected by OpenAI."""
if block["type"] == "image":
formatted_block = convert_to_openai_image_block(block)
elif block["type"] == "file":
if block["source_type"] == "base64":
file = {"file_data": f"data:{block['mime_type']};base64,{block['data']}"}
if filename := block.get("filename"):
file["filename"] = filename
elif (metadata := block.get("metadata")) and ("filename" in metadata):
file["filename"] = metadata["filename"]
else:
warnings.warn(
"OpenAI may require a filename for file inputs. Specify a filename "
"in the content block: {'type': 'file', 'source_type': 'base64', "
"'mime_type': 'application/pdf', 'data': '...', "
"'filename': 'my-pdf'}"
)
formatted_block = {"type": "file", "file": file}
elif block["source_type"] == "id":
formatted_block = {"type": "file", "file": {"file_id": block["id"]}}
else:
raise ValueError("source_type base64 or id is required for file blocks.")
elif block["type"] == "audio":
if block["source_type"] == "base64":
format = block["mime_type"].split("/")[-1]
formatted_block = {
"type": "input_audio",
"input_audio": {"data": block["data"], "format": format},
}
else:
raise ValueError("source_type base64 is required for audio blocks.")
else:
raise ValueError(f"Block of type {block['type']} is not supported.")
return formatted_block
def _format_message_content(content: Any) -> Any: def _format_message_content(content: Any) -> Any:
"""Format message content.""" """Format message content."""
if content and isinstance(content, list): if content and isinstance(content, list):
@ -238,7 +199,7 @@ def _format_message_content(content: Any) -> Any:
): ):
continue continue
elif isinstance(block, dict) and is_data_content_block(block): elif isinstance(block, dict) and is_data_content_block(block):
formatted_content.append(_format_data_content_block(block)) formatted_content.append(convert_to_openai_data_block(block))
# Anthropic image blocks # Anthropic image blocks
elif ( elif (
isinstance(block, dict) isinstance(block, dict)