mirror of
https://github.com/hwchase17/langchain.git
synced 2025-07-01 19:03:25 +00:00
core, openai: support standard multi-modal blocks in convert_to_openai_messages (#30968)
This commit is contained in:
parent
e4877e5ef1
commit
4bc70766b5
@ -33,6 +33,7 @@ if TYPE_CHECKING:
|
|||||||
)
|
)
|
||||||
from langchain_core.messages.chat import ChatMessage, ChatMessageChunk
|
from langchain_core.messages.chat import ChatMessage, ChatMessageChunk
|
||||||
from langchain_core.messages.content_blocks import (
|
from langchain_core.messages.content_blocks import (
|
||||||
|
convert_to_openai_data_block,
|
||||||
convert_to_openai_image_block,
|
convert_to_openai_image_block,
|
||||||
is_data_content_block,
|
is_data_content_block,
|
||||||
)
|
)
|
||||||
@ -83,6 +84,7 @@ __all__ = (
|
|||||||
"ToolMessageChunk",
|
"ToolMessageChunk",
|
||||||
"RemoveMessage",
|
"RemoveMessage",
|
||||||
"_message_from_dict",
|
"_message_from_dict",
|
||||||
|
"convert_to_openai_data_block",
|
||||||
"convert_to_openai_image_block",
|
"convert_to_openai_image_block",
|
||||||
"convert_to_messages",
|
"convert_to_messages",
|
||||||
"get_buffer_string",
|
"get_buffer_string",
|
||||||
@ -124,6 +126,7 @@ _dynamic_imports = {
|
|||||||
"MessageLikeRepresentation": "utils",
|
"MessageLikeRepresentation": "utils",
|
||||||
"_message_from_dict": "utils",
|
"_message_from_dict": "utils",
|
||||||
"convert_to_messages": "utils",
|
"convert_to_messages": "utils",
|
||||||
|
"convert_to_openai_data_block": "content_blocks",
|
||||||
"convert_to_openai_image_block": "content_blocks",
|
"convert_to_openai_image_block": "content_blocks",
|
||||||
"convert_to_openai_messages": "utils",
|
"convert_to_openai_messages": "utils",
|
||||||
"filter_messages": "utils",
|
"filter_messages": "utils",
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
"""Types for content blocks."""
|
"""Types for content blocks."""
|
||||||
|
|
||||||
|
import warnings
|
||||||
from typing import Any, Literal, Union
|
from typing import Any, Literal, Union
|
||||||
|
|
||||||
from pydantic import TypeAdapter, ValidationError
|
from pydantic import TypeAdapter, ValidationError
|
||||||
@ -108,3 +109,47 @@ def convert_to_openai_image_block(content_block: dict[str, Any]) -> dict:
|
|||||||
}
|
}
|
||||||
error_message = "Unsupported source type. Only 'url' and 'base64' are supported."
|
error_message = "Unsupported source type. Only 'url' and 'base64' are supported."
|
||||||
raise ValueError(error_message)
|
raise ValueError(error_message)
|
||||||
|
|
||||||
|
|
||||||
|
def convert_to_openai_data_block(block: dict) -> dict:
|
||||||
|
"""Format standard data content block to format expected by OpenAI."""
|
||||||
|
if block["type"] == "image":
|
||||||
|
formatted_block = convert_to_openai_image_block(block)
|
||||||
|
|
||||||
|
elif block["type"] == "file":
|
||||||
|
if block["source_type"] == "base64":
|
||||||
|
file = {"file_data": f"data:{block['mime_type']};base64,{block['data']}"}
|
||||||
|
if filename := block.get("filename"):
|
||||||
|
file["filename"] = filename
|
||||||
|
elif (metadata := block.get("metadata")) and ("filename" in metadata):
|
||||||
|
file["filename"] = metadata["filename"]
|
||||||
|
else:
|
||||||
|
warnings.warn(
|
||||||
|
"OpenAI may require a filename for file inputs. Specify a filename "
|
||||||
|
"in the content block: {'type': 'file', 'source_type': 'base64', "
|
||||||
|
"'mime_type': 'application/pdf', 'data': '...', "
|
||||||
|
"'filename': 'my-pdf'}",
|
||||||
|
stacklevel=1,
|
||||||
|
)
|
||||||
|
formatted_block = {"type": "file", "file": file}
|
||||||
|
elif block["source_type"] == "id":
|
||||||
|
formatted_block = {"type": "file", "file": {"file_id": block["id"]}}
|
||||||
|
else:
|
||||||
|
error_msg = "source_type base64 or id is required for file blocks."
|
||||||
|
raise ValueError(error_msg)
|
||||||
|
|
||||||
|
elif block["type"] == "audio":
|
||||||
|
if block["source_type"] == "base64":
|
||||||
|
format = block["mime_type"].split("/")[-1]
|
||||||
|
formatted_block = {
|
||||||
|
"type": "input_audio",
|
||||||
|
"input_audio": {"data": block["data"], "format": format},
|
||||||
|
}
|
||||||
|
else:
|
||||||
|
error_msg = "source_type base64 is required for audio blocks."
|
||||||
|
raise ValueError(error_msg)
|
||||||
|
else:
|
||||||
|
error_msg = f"Block of type {block['type']} is not supported."
|
||||||
|
raise ValueError(error_msg)
|
||||||
|
|
||||||
|
return formatted_block
|
||||||
|
@ -30,6 +30,7 @@ from typing import (
|
|||||||
from pydantic import Discriminator, Field, Tag
|
from pydantic import Discriminator, Field, Tag
|
||||||
|
|
||||||
from langchain_core.exceptions import ErrorCode, create_message
|
from langchain_core.exceptions import ErrorCode, create_message
|
||||||
|
from langchain_core.messages import convert_to_openai_data_block, is_data_content_block
|
||||||
from langchain_core.messages.ai import AIMessage, AIMessageChunk
|
from langchain_core.messages.ai import AIMessage, AIMessageChunk
|
||||||
from langchain_core.messages.base import BaseMessage, BaseMessageChunk
|
from langchain_core.messages.base import BaseMessage, BaseMessageChunk
|
||||||
from langchain_core.messages.chat import ChatMessage, ChatMessageChunk
|
from langchain_core.messages.chat import ChatMessage, ChatMessageChunk
|
||||||
@ -1067,6 +1068,9 @@ def convert_to_openai_messages(
|
|||||||
"image_url": block["image_url"],
|
"image_url": block["image_url"],
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
# Standard multi-modal content block
|
||||||
|
elif is_data_content_block(block):
|
||||||
|
content.append(convert_to_openai_data_block(block))
|
||||||
# Anthropic and Bedrock converse format
|
# Anthropic and Bedrock converse format
|
||||||
elif (block.get("type") == "image") or "image" in block:
|
elif (block.get("type") == "image") or "image" in block:
|
||||||
# Anthropic
|
# Anthropic
|
||||||
|
@ -33,6 +33,7 @@ EXPECTED_ALL = [
|
|||||||
"filter_messages",
|
"filter_messages",
|
||||||
"merge_message_runs",
|
"merge_message_runs",
|
||||||
"trim_messages",
|
"trim_messages",
|
||||||
|
"convert_to_openai_data_block",
|
||||||
"convert_to_openai_image_block",
|
"convert_to_openai_image_block",
|
||||||
"convert_to_openai_messages",
|
"convert_to_openai_messages",
|
||||||
]
|
]
|
||||||
|
@ -1186,6 +1186,55 @@ def test_convert_to_openai_messages_developer() -> None:
|
|||||||
assert result == [{"role": "developer", "content": "a"}] * 2
|
assert result == [{"role": "developer", "content": "a"}] * 2
|
||||||
|
|
||||||
|
|
||||||
|
def test_convert_to_openai_messages_multimodal() -> None:
|
||||||
|
messages = [
|
||||||
|
HumanMessage(
|
||||||
|
content=[
|
||||||
|
{"type": "text", "text": "Text message"},
|
||||||
|
{
|
||||||
|
"type": "image",
|
||||||
|
"source_type": "url",
|
||||||
|
"url": "https://example.com/test.png",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "image",
|
||||||
|
"source_type": "base64",
|
||||||
|
"data": "<base64 string>",
|
||||||
|
"mime_type": "image/png",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "file",
|
||||||
|
"source_type": "base64",
|
||||||
|
"data": "<base64 string>",
|
||||||
|
"mime_type": "application/pdf",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "file",
|
||||||
|
"source_type": "base64",
|
||||||
|
"data": "<base64 string>",
|
||||||
|
"mime_type": "application/pdf",
|
||||||
|
"filename": "test.pdf",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "file",
|
||||||
|
"source_type": "id",
|
||||||
|
"id": "file-abc123",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "audio",
|
||||||
|
"source_type": "base64",
|
||||||
|
"data": "<base64 string>",
|
||||||
|
"mime_type": "audio/wav",
|
||||||
|
},
|
||||||
|
]
|
||||||
|
)
|
||||||
|
]
|
||||||
|
result = convert_to_openai_messages(messages, text_format="block")
|
||||||
|
assert len(result) == 1
|
||||||
|
message = result[0]
|
||||||
|
assert len(message["content"]) == 7
|
||||||
|
|
||||||
|
|
||||||
def test_count_tokens_approximately_empty_messages() -> None:
|
def test_count_tokens_approximately_empty_messages() -> None:
|
||||||
# Test with empty message list
|
# Test with empty message list
|
||||||
assert count_tokens_approximately([]) == 0
|
assert count_tokens_approximately([]) == 0
|
||||||
|
@ -61,7 +61,7 @@ from langchain_core.messages import (
|
|||||||
ToolCall,
|
ToolCall,
|
||||||
ToolMessage,
|
ToolMessage,
|
||||||
ToolMessageChunk,
|
ToolMessageChunk,
|
||||||
convert_to_openai_image_block,
|
convert_to_openai_data_block,
|
||||||
is_data_content_block,
|
is_data_content_block,
|
||||||
)
|
)
|
||||||
from langchain_core.messages.ai import (
|
from langchain_core.messages.ai import (
|
||||||
@ -186,45 +186,6 @@ def _convert_dict_to_message(_dict: Mapping[str, Any]) -> BaseMessage:
|
|||||||
return ChatMessage(content=_dict.get("content", ""), role=role, id=id_) # type: ignore[arg-type]
|
return ChatMessage(content=_dict.get("content", ""), role=role, id=id_) # type: ignore[arg-type]
|
||||||
|
|
||||||
|
|
||||||
def _format_data_content_block(block: dict) -> dict:
|
|
||||||
"""Format standard data content block to format expected by OpenAI."""
|
|
||||||
if block["type"] == "image":
|
|
||||||
formatted_block = convert_to_openai_image_block(block)
|
|
||||||
|
|
||||||
elif block["type"] == "file":
|
|
||||||
if block["source_type"] == "base64":
|
|
||||||
file = {"file_data": f"data:{block['mime_type']};base64,{block['data']}"}
|
|
||||||
if filename := block.get("filename"):
|
|
||||||
file["filename"] = filename
|
|
||||||
elif (metadata := block.get("metadata")) and ("filename" in metadata):
|
|
||||||
file["filename"] = metadata["filename"]
|
|
||||||
else:
|
|
||||||
warnings.warn(
|
|
||||||
"OpenAI may require a filename for file inputs. Specify a filename "
|
|
||||||
"in the content block: {'type': 'file', 'source_type': 'base64', "
|
|
||||||
"'mime_type': 'application/pdf', 'data': '...', "
|
|
||||||
"'filename': 'my-pdf'}"
|
|
||||||
)
|
|
||||||
formatted_block = {"type": "file", "file": file}
|
|
||||||
elif block["source_type"] == "id":
|
|
||||||
formatted_block = {"type": "file", "file": {"file_id": block["id"]}}
|
|
||||||
else:
|
|
||||||
raise ValueError("source_type base64 or id is required for file blocks.")
|
|
||||||
elif block["type"] == "audio":
|
|
||||||
if block["source_type"] == "base64":
|
|
||||||
format = block["mime_type"].split("/")[-1]
|
|
||||||
formatted_block = {
|
|
||||||
"type": "input_audio",
|
|
||||||
"input_audio": {"data": block["data"], "format": format},
|
|
||||||
}
|
|
||||||
else:
|
|
||||||
raise ValueError("source_type base64 is required for audio blocks.")
|
|
||||||
else:
|
|
||||||
raise ValueError(f"Block of type {block['type']} is not supported.")
|
|
||||||
|
|
||||||
return formatted_block
|
|
||||||
|
|
||||||
|
|
||||||
def _format_message_content(content: Any) -> Any:
|
def _format_message_content(content: Any) -> Any:
|
||||||
"""Format message content."""
|
"""Format message content."""
|
||||||
if content and isinstance(content, list):
|
if content and isinstance(content, list):
|
||||||
@ -238,7 +199,7 @@ def _format_message_content(content: Any) -> Any:
|
|||||||
):
|
):
|
||||||
continue
|
continue
|
||||||
elif isinstance(block, dict) and is_data_content_block(block):
|
elif isinstance(block, dict) and is_data_content_block(block):
|
||||||
formatted_content.append(_format_data_content_block(block))
|
formatted_content.append(convert_to_openai_data_block(block))
|
||||||
# Anthropic image blocks
|
# Anthropic image blocks
|
||||||
elif (
|
elif (
|
||||||
isinstance(block, dict)
|
isinstance(block, dict)
|
||||||
|
Loading…
Reference in New Issue
Block a user