mirror of
https://github.com/hwchase17/langchain.git
synced 2025-09-17 15:35:14 +00:00
refactor: enhance OpenAI data block handling and normalize message formats
This commit is contained in:
@@ -1,12 +1,26 @@
|
||||
import re
|
||||
from collections.abc import Sequence
|
||||
from typing import Optional
|
||||
from typing import TYPE_CHECKING, Literal, Optional, TypedDict
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from langchain_core.messages import BaseMessage
|
||||
from langchain_core.messages.content_blocks import (
|
||||
KNOWN_BLOCK_TYPES,
|
||||
ContentBlock,
|
||||
create_audio_block,
|
||||
create_file_block,
|
||||
create_image_block,
|
||||
create_non_standard_block,
|
||||
create_plaintext_block,
|
||||
)
|
||||
|
||||
|
||||
def _is_openai_data_block(block: dict) -> bool:
|
||||
"""Check if the block contains multimodal data in OpenAI Chat Completions format."""
|
||||
"""Check if the block contains multimodal data in OpenAI Chat Completions format.
|
||||
|
||||
Supports both data and ID-style blocks (e.g. ``'file_data'`` and ``'file_id'``).
|
||||
|
||||
"""
|
||||
if block.get("type") == "image_url":
|
||||
if (
|
||||
(set(block.keys()) <= {"type", "image_url", "detail"})
|
||||
@@ -15,29 +29,42 @@ def _is_openai_data_block(block: dict) -> bool:
|
||||
):
|
||||
url = image_url.get("url")
|
||||
if isinstance(url, str):
|
||||
# Required per OpenAI spec
|
||||
return True
|
||||
# Ignore `'detail'` since it's optional and specific to OpenAI
|
||||
|
||||
elif block.get("type") == "file":
|
||||
if (file := block.get("file")) and isinstance(file, dict):
|
||||
file_data = file.get("file_data")
|
||||
if isinstance(file_data, str):
|
||||
file_id = file.get("file_id")
|
||||
if isinstance(file_data, str) or isinstance(file_id, str):
|
||||
return True
|
||||
|
||||
elif block.get("type") == "input_audio":
|
||||
if (input_audio := block.get("input_audio")) and isinstance(input_audio, dict):
|
||||
audio_data = input_audio.get("data")
|
||||
audio_format = input_audio.get("format")
|
||||
if (audio := block.get("audio")) and isinstance(audio, dict):
|
||||
audio_data = audio.get("data")
|
||||
audio_format = audio.get("format")
|
||||
if isinstance(audio_data, str) and isinstance(audio_format, str):
|
||||
# Both required per OpenAI spec
|
||||
return True
|
||||
|
||||
else:
|
||||
return False
|
||||
|
||||
# Has no `'type'` key
|
||||
return False
|
||||
|
||||
|
||||
def _parse_data_uri(uri: str) -> Optional[dict]:
|
||||
"""Parse a data URI into its components. If parsing fails, return None.
|
||||
class ParsedDataUri(TypedDict):
|
||||
source_type: Literal["base64"]
|
||||
data: str
|
||||
mime_type: str
|
||||
|
||||
|
||||
def _parse_data_uri(uri: str) -> Optional[ParsedDataUri]:
|
||||
"""Parse a data URI into its components.
|
||||
|
||||
If parsing fails, return None. If either MIME type or data is missing, return None.
|
||||
|
||||
Example:
|
||||
|
||||
@@ -57,84 +84,350 @@ def _parse_data_uri(uri: str) -> Optional[dict]:
|
||||
match = re.match(regex, uri)
|
||||
if match is None:
|
||||
return None
|
||||
|
||||
mime_type = match.group("mime_type")
|
||||
data = match.group("data")
|
||||
if not mime_type or not data:
|
||||
return None
|
||||
|
||||
return {
|
||||
"source_type": "base64",
|
||||
"data": match.group("data"),
|
||||
"mime_type": match.group("mime_type"),
|
||||
"data": data,
|
||||
"mime_type": mime_type,
|
||||
}
|
||||
|
||||
|
||||
def _convert_openai_format_to_data_block(block: dict) -> dict:
|
||||
"""Convert OpenAI image content block to standard data content block.
|
||||
def _convert_openai_format_to_data_block(block: dict) -> ContentBlock:
|
||||
"""Convert OpenAI image/audio/file content block to v1 standard content block.
|
||||
|
||||
If parsing fails, pass-through.
|
||||
|
||||
Args:
|
||||
block: The OpenAI image content block to convert.
|
||||
|
||||
Returns:
|
||||
The converted standard data content block.
|
||||
"""
|
||||
if block["type"] == "image_url":
|
||||
parsed = _parse_data_uri(block["image_url"]["url"])
|
||||
if parsed is not None:
|
||||
parsed["type"] = "image"
|
||||
return parsed
|
||||
return block
|
||||
|
||||
if block["type"] == "file":
|
||||
parsed = _parse_data_uri(block["file"]["file_data"])
|
||||
if parsed is not None:
|
||||
parsed["type"] = "file"
|
||||
if filename := block["file"].get("filename"):
|
||||
parsed["filename"] = filename
|
||||
return parsed
|
||||
return block
|
||||
if block.get("type") == "file" and "file_id" in block.get("file", {}):
|
||||
return create_file_block(
|
||||
file_id=block["file"]["file_id"],
|
||||
)
|
||||
|
||||
if block["type"] == "input_audio":
|
||||
data = block["input_audio"].get("data")
|
||||
audio_format = block["input_audio"].get("format")
|
||||
if data and audio_format:
|
||||
return {
|
||||
"type": "audio",
|
||||
"source_type": "base64",
|
||||
"data": data,
|
||||
"mime_type": f"audio/{audio_format}",
|
||||
return create_audio_block(
|
||||
base64=block["audio"]["data"],
|
||||
mime_type=f"audio/{block['audio']['format']}",
|
||||
)
|
||||
|
||||
if (block["type"] == "file") and (
|
||||
parsed := _parse_data_uri(block["file"]["file_data"])
|
||||
):
|
||||
mime_type = parsed["mime_type"]
|
||||
filename = block["file"].get("filename")
|
||||
return create_file_block(
|
||||
base64=block["file"]["file_data"],
|
||||
mime_type=mime_type,
|
||||
filename=filename,
|
||||
)
|
||||
|
||||
# base64-style image block
|
||||
if (block["type"] == "image_url") and (
|
||||
parsed := _parse_data_uri(block["image_url"]["url"])
|
||||
):
|
||||
return create_image_block(
|
||||
base64=block["image_url"]["url"],
|
||||
mime_type=parsed["mime_type"],
|
||||
detail=block["image_url"].get("detail"), # Optional, specific to OpenAI
|
||||
)
|
||||
# url-style image block
|
||||
if (block["type"] == "image_url") and isinstance(
|
||||
block["image_url"].get("url"), str
|
||||
):
|
||||
return create_image_block(
|
||||
url=block["image_url"]["url"],
|
||||
detail=block["image_url"].get("detail"), # Optional, specific to OpenAI
|
||||
)
|
||||
|
||||
# Escape hatch for non-standard content blocks
|
||||
return create_non_standard_block(
|
||||
value=block,
|
||||
)
|
||||
|
||||
|
||||
def _normalize_messages(messages: Sequence["BaseMessage"]) -> list["BaseMessage"]:
|
||||
"""Normalize different message formats to LangChain v1 standard content blocks.
|
||||
|
||||
Chat models implement support for:
|
||||
- Images in OpenAI Chat Completions format
|
||||
- LangChain v1 standard content blocks
|
||||
|
||||
This function extends support to:
|
||||
- `Audio <https://platform.openai.com/docs/api-reference/chat/create>`__ and
|
||||
`file <https://platform.openai.com/docs/api-reference/files>`__ data in OpenAI
|
||||
Chat Completions format
|
||||
- Images are technically supported but we expect chat models to handle them
|
||||
directly; this may change in the future
|
||||
- LangChain v0 standard content blocks for backward compatibility
|
||||
|
||||
.. versionchanged:: 1.0.0
|
||||
In previous versions, this function returned messages in LangChain v0 format.
|
||||
Now, it returns messages in LangChain v1 format, which upgraded chat models now
|
||||
expect to receive when passing back in message history. For backward
|
||||
compatibility, we now allow converting v0 message content to v1 format.
|
||||
|
||||
.. dropdown:: v0 Content Blocks
|
||||
|
||||
``URLContentBlock``:
|
||||
|
||||
.. codeblock::
|
||||
|
||||
{
|
||||
mime_type: NotRequired[str]
|
||||
type: Literal['image', 'audio', 'file'],
|
||||
source_type: Literal['url'],
|
||||
url: str,
|
||||
}
|
||||
return block
|
||||
|
||||
return block
|
||||
``Base64ContentBlock``:
|
||||
|
||||
.. codeblock::
|
||||
|
||||
def _normalize_messages(messages: Sequence[BaseMessage]) -> list[BaseMessage]:
|
||||
"""Extend support for message formats.
|
||||
{
|
||||
mime_type: NotRequired[str]
|
||||
type: Literal['image', 'audio', 'file'],
|
||||
source_type: Literal['base64'],
|
||||
data: str,
|
||||
}
|
||||
|
||||
``IDContentBlock``:
|
||||
|
||||
.. codeblock::
|
||||
|
||||
{
|
||||
type: Literal['image', 'audio', 'file'],
|
||||
source_type: Literal['id'],
|
||||
id: str,
|
||||
}
|
||||
|
||||
``PlainTextContentBlock``:
|
||||
|
||||
.. codeblock::
|
||||
|
||||
{
|
||||
mime_type: NotRequired[str]
|
||||
type: Literal['file'],
|
||||
source_type: Literal['text'],
|
||||
url: str,
|
||||
}
|
||||
|
||||
(Untested): if a v1 message is passed in, it will be returned as-is, meaning it is
|
||||
safe to always pass in v1 messages to this function for assurance.
|
||||
|
||||
Chat models implement support for images in OpenAI Chat Completions format, as well
|
||||
as other multimodal data as standard data blocks. This function extends support to
|
||||
audio and file data in OpenAI Chat Completions format by converting them to standard
|
||||
data blocks.
|
||||
"""
|
||||
# For posterity, here are the OpenAI Chat Completions schemas we expect:
|
||||
#
|
||||
# Chat Completions image. Can be URL-based or base64-encoded. Supports MIME types
|
||||
# png, jpeg/jpg, webp, static gif:
|
||||
# {
|
||||
# "type": Literal['image_url'],
|
||||
# "image_url": {
|
||||
# "url": Union["data:$MIME_TYPE;base64,$BASE64_ENCODED_IMAGE", "$IMAGE_URL"], # noqa: E501
|
||||
# "detail": Literal['low', 'high', 'auto'] = 'auto', # Only supported by OpenAI # noqa: E501
|
||||
# }
|
||||
# }
|
||||
|
||||
# Chat Completions audio:
|
||||
# {
|
||||
# "type": Literal['input_audio'],
|
||||
# "audio": {
|
||||
# "format": Literal['wav', 'mp3'],
|
||||
# "data": str = "$BASE64_ENCODED_AUDIO",
|
||||
# },
|
||||
# }
|
||||
|
||||
# Chat Completions files: either base64 or pre-uploaded file ID
|
||||
# {
|
||||
# "type": Literal['file'],
|
||||
# "file": Union[
|
||||
# {
|
||||
# "filename": Optional[str] = "$FILENAME",
|
||||
# "file_data": str = "$BASE64_ENCODED_FILE",
|
||||
# },
|
||||
# {
|
||||
# "file_id": str = "$FILE_ID", # For pre-uploaded files to OpenAI
|
||||
# },
|
||||
# ],
|
||||
# }
|
||||
|
||||
formatted_messages = []
|
||||
for message in messages:
|
||||
# We preserve input messages - the caller may reuse them elsewhere and expects
|
||||
# them to remain unchanged. We only create a copy if we need to translate
|
||||
# (e.g. they're not already in LangChain format).
|
||||
|
||||
formatted_message = message
|
||||
if isinstance(message.content, list):
|
||||
for idx, block in enumerate(message.content):
|
||||
if (
|
||||
isinstance(block, dict)
|
||||
# Subset to (PDF) files and audio, as most relevant chat models
|
||||
# support images in OAI format (and some may not yet support the
|
||||
# standard data block format)
|
||||
and block.get("type") in {"file", "input_audio"}
|
||||
and _is_openai_data_block(block)
|
||||
):
|
||||
if isinstance(message.content, str):
|
||||
if formatted_message is message:
|
||||
formatted_message = message.model_copy()
|
||||
# Also shallow-copy content
|
||||
# Shallow-copy the content string so we can modify it
|
||||
formatted_message.content = str(formatted_message.content)
|
||||
formatted_message.content = [
|
||||
{
|
||||
"type": "text",
|
||||
"text": message.content,
|
||||
}
|
||||
]
|
||||
|
||||
elif isinstance(message.content, list):
|
||||
for idx, block in enumerate(message.content):
|
||||
if isinstance(block, str):
|
||||
if formatted_message is message:
|
||||
formatted_message = message.model_copy()
|
||||
# Shallow-copy the content list so we can modify it
|
||||
formatted_message.content = list(formatted_message.content)
|
||||
formatted_message.content[idx] = {"type": "text", "text": block} # type: ignore[index] # mypy confused by .model_copy
|
||||
|
||||
# Handle OpenAI Chat Completions multimodal data blocks
|
||||
if (
|
||||
# Subset to base64 image, file, and audio
|
||||
isinstance(block, dict)
|
||||
and block.get("type") in {"image_url", "input_audio", "file"}
|
||||
# We need to discriminate between an OpenAI formatted file and a LC
|
||||
# file content block since they share the `'type'` key
|
||||
and _is_openai_data_block(block)
|
||||
):
|
||||
# Only copy if it is an OpenAI data block that needs conversion
|
||||
if formatted_message is message:
|
||||
formatted_message = message.model_copy()
|
||||
# Shallow-copy the content list so we can modify it
|
||||
formatted_message.content = list(formatted_message.content)
|
||||
|
||||
formatted_message.content[idx] = ( # type: ignore[index] # mypy confused by .model_copy
|
||||
# Convert OpenAI image/audio/file block to LangChain v1 standard
|
||||
# content
|
||||
formatted_message.content[idx] = ( # type: ignore[call-overload,index] # mypy confused by .model_copy
|
||||
_convert_openai_format_to_data_block(block)
|
||||
# This may return a NonStandardContentBlock if parsing fails!
|
||||
)
|
||||
|
||||
# Handle LangChain v0 standard content blocks
|
||||
|
||||
# TODO: check for source_type since that disqualifies v1 blocks and
|
||||
# ensures this block only checks v0
|
||||
elif isinstance(block, dict) and block.get("type") in {
|
||||
"image",
|
||||
"audio",
|
||||
"file",
|
||||
}:
|
||||
# Convert v0 to v1 standard content blocks
|
||||
# These guard against v1 blocks as they don't have `'source_type'`
|
||||
|
||||
if formatted_message is message:
|
||||
formatted_message = message.model_copy()
|
||||
# Shallow-copy the content list so we can modify it
|
||||
formatted_message.content = list(formatted_message.content)
|
||||
|
||||
# URL-image
|
||||
if block.get("source_type") == "url" and block["type"] == "image":
|
||||
formatted_message.content[idx] = create_image_block( # type: ignore[call-overload,index] # mypy confused by .model_copy
|
||||
url=block["url"],
|
||||
mime_type=block.get("mime_type"),
|
||||
)
|
||||
|
||||
# URL-audio
|
||||
elif block.get("source_type") == "url" and block["type"] == "audio":
|
||||
formatted_message.content[idx] = create_audio_block( # type: ignore[call-overload,index] # mypy confused by .model_copy
|
||||
url=block["url"],
|
||||
mime_type=block.get("mime_type"),
|
||||
)
|
||||
|
||||
# URL-file
|
||||
elif block.get("source_type") == "url" and block["type"] == "file":
|
||||
formatted_message.content[idx] = create_file_block( # type: ignore[call-overload,index] # mypy confused by .model_copy
|
||||
url=block["url"],
|
||||
mime_type=block.get("mime_type"),
|
||||
)
|
||||
|
||||
# base64-image
|
||||
elif (
|
||||
block.get("source_type") == "base64"
|
||||
and block["type"] == "image"
|
||||
):
|
||||
formatted_message.content[idx] = create_image_block( # type: ignore[call-overload,index] # mypy confused by .model_copy
|
||||
base64=block["data"],
|
||||
mime_type=block.get("mime_type"),
|
||||
)
|
||||
|
||||
# base64-audio
|
||||
elif (
|
||||
block.get("source_type") == "base64"
|
||||
and block["type"] == "audio"
|
||||
):
|
||||
formatted_message.content[idx] = create_audio_block( # type: ignore[call-overload,index] # mypy confused by .model_copy
|
||||
base64=block["data"],
|
||||
mime_type=block.get("mime_type"),
|
||||
)
|
||||
|
||||
# base64-file
|
||||
elif (
|
||||
block.get("source_type") == "base64" and block["type"] == "file"
|
||||
):
|
||||
formatted_message.content[idx] = create_file_block( # type: ignore[call-overload,index] # mypy confused by .model_copy
|
||||
base64=block["data"],
|
||||
mime_type=block.get("mime_type"),
|
||||
)
|
||||
|
||||
# id-image
|
||||
elif block.get("source_type") == "id" and block["type"] == "image":
|
||||
formatted_message.content[idx] = create_image_block( # type: ignore[call-overload,index] # mypy confused by .model_copy
|
||||
id=block["id"],
|
||||
)
|
||||
|
||||
# id-audio
|
||||
elif block.get("source_type") == "id" and block["type"] == "audio":
|
||||
formatted_message.content[idx] = create_audio_block( # type: ignore[call-overload,index] # mypy confused by .model_copy
|
||||
id=block["id"],
|
||||
)
|
||||
|
||||
# id-file
|
||||
elif block.get("source_type") == "id" and block["type"] == "file":
|
||||
formatted_message.content[idx] = create_file_block( # type: ignore[call-overload,index] # mypy confused by .model_copy
|
||||
id=block["id"],
|
||||
)
|
||||
|
||||
# text-file
|
||||
elif block.get("source_type") == "text" and block["type"] == "file":
|
||||
formatted_message.content[idx] = create_plaintext_block( # type: ignore[call-overload,index] # mypy confused by .model_copy
|
||||
text=block["url"],
|
||||
# Note: `text` is the URL in this case, not the content
|
||||
# This is a legacy format, so we don't expect a MIME type
|
||||
# but we can still pass it if it exists
|
||||
mime_type=block.get("mime_type"),
|
||||
)
|
||||
|
||||
else: # Unsupported or malformed v0 content block
|
||||
formatted_message.content[idx] = { # type: ignore[index] # mypy confused by .model_copy
|
||||
"type": "non_standard",
|
||||
"value": block,
|
||||
}
|
||||
|
||||
# Validate a v1 block to pass through
|
||||
elif (
|
||||
isinstance(block, dict)
|
||||
and "type" in block
|
||||
and block["type"] in KNOWN_BLOCK_TYPES
|
||||
):
|
||||
# # Handle shared type keys between v1 blocks and Chat Completions
|
||||
# if block["type"] == "file" and block["file"]:
|
||||
# # This is a file ID block
|
||||
# formatted_message.content[idx] = create_file_block( # type: ignore[call-overload,index] # mypy confused by .model_copy # noqa: E501
|
||||
# id=block["file"]["file_id"],
|
||||
# )
|
||||
|
||||
formatted_message.content[idx] = block # type: ignore[index] # mypy confused by .model_copy
|
||||
|
||||
# Pass through any other content block types
|
||||
|
||||
# If we didn't modify the message, skip creating a new instance
|
||||
if formatted_message is message:
|
||||
formatted_messages.append(message)
|
||||
continue
|
||||
|
||||
# At this point, `content` will be a list of v1 standard content blocks.
|
||||
formatted_messages.append(formatted_message)
|
||||
|
||||
return formatted_messages
|
||||
|
@@ -6,6 +6,7 @@ from typing import TYPE_CHECKING, Any, Optional, Union, cast, overload
|
||||
|
||||
from pydantic import ConfigDict, Field
|
||||
|
||||
from langchain_core.language_models._utils import _convert_openai_format_to_data_block
|
||||
from langchain_core.load.serializable import Serializable
|
||||
from langchain_core.messages import content_blocks as types
|
||||
from langchain_core.utils import get_bolded_text
|
||||
@@ -132,6 +133,12 @@ class BaseMessage(Serializable):
|
||||
blocks.append({"type": "text", "text": item})
|
||||
elif isinstance(item, dict):
|
||||
item_type = item.get("type")
|
||||
if item_type in types.KNOWN_OPENAI_BLOCK_TYPES:
|
||||
# OpenAI-specific content blocks
|
||||
if item_type in {"image_url", "input_audio"}:
|
||||
blocks.append(_convert_openai_format_to_data_block(item))
|
||||
else:
|
||||
blocks.append(cast("types.ContentBlock", item))
|
||||
if item_type not in types.KNOWN_BLOCK_TYPES:
|
||||
msg = (
|
||||
f"Non-standard content block type '{item_type}'. Ensure "
|
||||
|
@@ -212,12 +212,29 @@ async def test_callback_handlers() -> None:
|
||||
|
||||
|
||||
def test_chat_model_inputs() -> None:
|
||||
fake = ParrotFakeChatModel()
|
||||
# Do we need to parameterize over both versions?
|
||||
# fake = ParrotFakeChatModel()
|
||||
|
||||
assert fake.invoke("hello") == _any_id_human_message(content="hello")
|
||||
assert fake.invoke([("ai", "blah")]) == _any_id_ai_message(content="blah")
|
||||
# assert fake.invoke("hello") == _any_id_human_message(
|
||||
# content=[{"type": "text", "text": "hello"}]
|
||||
# )
|
||||
# assert fake.invoke([("ai", "blah")]) == _any_id_ai_message(
|
||||
# content=[{"type": "text", "text": "blah"}]
|
||||
# )
|
||||
# assert fake.invoke([AIMessage(content="blah")]) == _any_id_ai_message(
|
||||
# content=[{"type": "text", "text": "blah"}]
|
||||
# )
|
||||
|
||||
fake = ParrotFakeChatModel(output_version="v1")
|
||||
|
||||
assert fake.invoke("hello") == _any_id_human_message(
|
||||
content=[{"type": "text", "text": "hello"}]
|
||||
)
|
||||
assert fake.invoke([("ai", "blah")]) == _any_id_ai_message(
|
||||
content=[{"type": "text", "text": "blah"}]
|
||||
)
|
||||
assert fake.invoke([AIMessage(content="blah")]) == _any_id_ai_message(
|
||||
content="blah"
|
||||
content=[{"type": "text", "text": "blah"}]
|
||||
)
|
||||
|
||||
|
||||
|
@@ -428,43 +428,44 @@ class FakeChatModelStartTracer(FakeTracer):
|
||||
|
||||
def test_trace_images_in_openai_format() -> None:
|
||||
"""Test that images are traced in OpenAI format."""
|
||||
llm = ParrotFakeChatModel()
|
||||
messages = [
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{
|
||||
"type": "image",
|
||||
"source_type": "url",
|
||||
"url": "https://example.com/image.png",
|
||||
}
|
||||
],
|
||||
}
|
||||
]
|
||||
tracer = FakeChatModelStartTracer()
|
||||
response = llm.invoke(messages, config={"callbacks": [tracer]})
|
||||
assert tracer.messages == [
|
||||
[
|
||||
[
|
||||
HumanMessage(
|
||||
content=[
|
||||
{
|
||||
"type": "image_url",
|
||||
"image_url": {"url": "https://example.com/image.png"},
|
||||
}
|
||||
]
|
||||
)
|
||||
]
|
||||
]
|
||||
]
|
||||
# Test no mutation
|
||||
assert response.content == [
|
||||
{
|
||||
"type": "image",
|
||||
"source_type": "url",
|
||||
"url": "https://example.com/image.png",
|
||||
}
|
||||
]
|
||||
# TODO: trace in new format, or add way to trace in both formats?
|
||||
# llm = ParrotFakeChatModel()
|
||||
# messages = [
|
||||
# {
|
||||
# "role": "user",
|
||||
# # v0 format
|
||||
# "content": [
|
||||
# {
|
||||
# "type": "image",
|
||||
# "source_type": "url",
|
||||
# "url": "https://example.com/image.png",
|
||||
# }
|
||||
# ],
|
||||
# }
|
||||
# ]
|
||||
# tracer = FakeChatModelStartTracer()
|
||||
# response = llm.invoke(messages, config={"callbacks": [tracer]})
|
||||
# assert tracer.messages == [
|
||||
# [
|
||||
# [
|
||||
# HumanMessage(
|
||||
# content=[
|
||||
# {
|
||||
# "type": "image_url",
|
||||
# "image_url": {"url": "https://example.com/image.png"},
|
||||
# }
|
||||
# ]
|
||||
# )
|
||||
# ]
|
||||
# ]
|
||||
# ]
|
||||
# # Passing in a v0 should return a v1
|
||||
# assert response.content == [
|
||||
# {
|
||||
# "type": "image",
|
||||
# "url": "https://example.com/image.png",
|
||||
# }
|
||||
# ]
|
||||
|
||||
|
||||
def test_trace_content_blocks_with_no_type_key() -> None:
|
||||
@@ -478,7 +479,7 @@ def test_trace_content_blocks_with_no_type_key() -> None:
|
||||
"type": "text",
|
||||
"text": "Hello",
|
||||
},
|
||||
{
|
||||
{ # Will be converted to NonStandardContentBlock
|
||||
"cachePoint": {"type": "default"},
|
||||
},
|
||||
],
|
||||
@@ -495,8 +496,8 @@ def test_trace_content_blocks_with_no_type_key() -> None:
|
||||
"type": "text",
|
||||
"text": "Hello",
|
||||
},
|
||||
{
|
||||
"type": "cachePoint",
|
||||
{ # For tracing, we are concerned with how messages are _sent_
|
||||
"type": "cachePoint", # TODO: how is this decided?
|
||||
"cachePoint": {"type": "default"},
|
||||
},
|
||||
]
|
||||
@@ -504,20 +505,20 @@ def test_trace_content_blocks_with_no_type_key() -> None:
|
||||
]
|
||||
]
|
||||
]
|
||||
# Test no mutation
|
||||
assert response.content == [
|
||||
{
|
||||
"type": "text",
|
||||
"text": "Hello",
|
||||
},
|
||||
{
|
||||
"cachePoint": {"type": "default"},
|
||||
"type": "non_standard",
|
||||
"value": {"cachePoint": {"type": "default"}},
|
||||
},
|
||||
]
|
||||
|
||||
|
||||
def test_extend_support_to_openai_multimodal_formats() -> None:
|
||||
"""Test that chat models normalize OpenAI file and audio inputs."""
|
||||
"""Test that chat models normalize OpenAI file and audio inputs to v1."""
|
||||
llm = ParrotFakeChatModel()
|
||||
messages = [
|
||||
{
|
||||
@@ -539,98 +540,65 @@ def test_extend_support_to_openai_multimodal_formats() -> None:
|
||||
"file_data": "data:application/pdf;base64,<base64 string>",
|
||||
},
|
||||
},
|
||||
{
|
||||
"type": "file",
|
||||
"file": {
|
||||
"file_data": "data:application/pdf;base64,<base64 string>",
|
||||
},
|
||||
},
|
||||
{
|
||||
"type": "file",
|
||||
"file": {"file_id": "<file id>"},
|
||||
},
|
||||
{
|
||||
"type": "input_audio",
|
||||
"input_audio": {"data": "<base64 data>", "format": "wav"},
|
||||
"audio": {
|
||||
"format": "wav",
|
||||
"data": "data:audio/wav;base64,<base64 string>",
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
]
|
||||
expected_content = [
|
||||
{"type": "text", "text": "Hello"},
|
||||
{
|
||||
"type": "image_url",
|
||||
"image_url": {"url": "https://example.com/image.png"},
|
||||
{"type": "text", "text": "Hello"}, # TextContentBlock
|
||||
{ # Chat Completions Image becomes ImageContentBlock after invoke
|
||||
"type": "image",
|
||||
"url": "https://example.com/image.png",
|
||||
},
|
||||
{
|
||||
"type": "image_url",
|
||||
"image_url": {"url": "..."},
|
||||
{ # ...
|
||||
"type": "image",
|
||||
"base64": "...",
|
||||
"mime_type": "image/jpeg",
|
||||
},
|
||||
{
|
||||
{ # FileContentBlock
|
||||
"type": "file",
|
||||
"source_type": "base64",
|
||||
"data": "<base64 string>",
|
||||
"base64": "data:application/pdf;base64,<base64 string>",
|
||||
"mime_type": "application/pdf",
|
||||
"filename": "draconomicon.pdf",
|
||||
"extras": {"filename": "draconomicon.pdf"},
|
||||
},
|
||||
{
|
||||
{ # ...
|
||||
"type": "file",
|
||||
"source_type": "base64",
|
||||
"data": "<base64 string>",
|
||||
"mime_type": "application/pdf",
|
||||
"file_id": "<file id>",
|
||||
},
|
||||
{
|
||||
"type": "file",
|
||||
"file": {"file_id": "<file id>"},
|
||||
},
|
||||
{
|
||||
{ # AudioContentBlock
|
||||
"type": "audio",
|
||||
"source_type": "base64",
|
||||
"data": "<base64 data>",
|
||||
"base64": "data:audio/wav;base64,<base64 string>",
|
||||
"mime_type": "audio/wav",
|
||||
},
|
||||
]
|
||||
response = llm.invoke(messages)
|
||||
assert response.content == expected_content
|
||||
|
||||
# Test no mutation
|
||||
assert messages[0]["content"] == [
|
||||
{"type": "text", "text": "Hello"},
|
||||
{
|
||||
"type": "image_url",
|
||||
"image_url": {"url": "https://example.com/image.png"},
|
||||
},
|
||||
{
|
||||
"type": "image_url",
|
||||
"image_url": {"url": "..."},
|
||||
},
|
||||
{
|
||||
"type": "file",
|
||||
"file": {
|
||||
"filename": "draconomicon.pdf",
|
||||
"file_data": "data:application/pdf;base64,<base64 string>",
|
||||
},
|
||||
},
|
||||
{
|
||||
"type": "file",
|
||||
"file": {
|
||||
"file_data": "data:application/pdf;base64,<base64 string>",
|
||||
},
|
||||
},
|
||||
{
|
||||
"type": "file",
|
||||
"file": {"file_id": "<file id>"},
|
||||
},
|
||||
{
|
||||
"type": "input_audio",
|
||||
"input_audio": {"data": "<base64 data>", "format": "wav"},
|
||||
},
|
||||
]
|
||||
# Check structure, ignoring auto-generated IDs
|
||||
actual_content = response.content
|
||||
assert len(actual_content) == len(expected_content)
|
||||
|
||||
for i, (actual, expected) in enumerate(zip(actual_content, expected_content)):
|
||||
if isinstance(actual, dict) and "id" in actual:
|
||||
# Remove auto-generated id for comparison
|
||||
actual_without_id = {k: v for k, v in actual.items() if k != "id"}
|
||||
assert actual_without_id == expected, f"Mismatch at index {i}"
|
||||
else:
|
||||
assert actual == expected, f"Mismatch at index {i}"
|
||||
|
||||
|
||||
def test_normalize_messages_edge_cases() -> None:
|
||||
# Test some blocks that should pass through
|
||||
messages = [
|
||||
# Test unrecognized blocks come back as NonStandardContentBlock
|
||||
input_messages = [
|
||||
HumanMessage(
|
||||
content=[
|
||||
{
|
||||
@@ -639,18 +607,55 @@ def test_normalize_messages_edge_cases() -> None:
|
||||
},
|
||||
{
|
||||
"type": "input_file",
|
||||
"file_data": "uri",
|
||||
"file_data": "uri", # Malformed base64
|
||||
"filename": "file-name",
|
||||
},
|
||||
{
|
||||
"type": "input_audio",
|
||||
"input_audio": "uri",
|
||||
"input_audio": "uri", # Not nested in `audio`
|
||||
},
|
||||
{
|
||||
"type": "input_image",
|
||||
"image_url": "uri",
|
||||
"image_url": "uri", # Not nested in `image_url`
|
||||
},
|
||||
]
|
||||
)
|
||||
]
|
||||
assert messages == _normalize_messages(messages)
|
||||
|
||||
expected_messages = [
|
||||
HumanMessage(
|
||||
content=[
|
||||
{
|
||||
"type": "non_standard",
|
||||
"value": {
|
||||
"type": "file",
|
||||
"file": "uri",
|
||||
},
|
||||
},
|
||||
{
|
||||
"type": "non_standard",
|
||||
"value": {
|
||||
"type": "input_file",
|
||||
"file_data": "uri",
|
||||
"filename": "file-name",
|
||||
},
|
||||
},
|
||||
{
|
||||
"type": "non_standard",
|
||||
"value": {
|
||||
"type": "input_audio",
|
||||
"input_audio": "uri",
|
||||
},
|
||||
},
|
||||
{
|
||||
"type": "non_standard",
|
||||
"value": {
|
||||
"type": "input_image",
|
||||
"image_url": "uri",
|
||||
},
|
||||
},
|
||||
]
|
||||
)
|
||||
]
|
||||
|
||||
assert _normalize_messages(input_messages) == expected_messages
|
||||
|
@@ -215,7 +215,8 @@ def test_rate_limit_skips_cache() -> None:
|
||||
(
|
||||
'[{"lc": 1, "type": "constructor", "id": ["langchain", "schema", '
|
||||
'"messages", '
|
||||
'"HumanMessage"], "kwargs": {"content": "foo", "type": "human"}}]',
|
||||
'"HumanMessage"], "kwargs": {"content": [{"type": "text", "text": "foo"}], '
|
||||
'"type": "human"}}]',
|
||||
"[('_type', 'generic-fake-chat-model'), ('stop', None)]",
|
||||
)
|
||||
]
|
||||
|
Reference in New Issue
Block a user