mirror of
https://github.com/hwchase17/langchain.git
synced 2026-05-01 13:23:37 +00:00
fix(fireworks): translate canonical multimodal content blocks for chat completions (#37090)
## Summary
`langchain_fireworks._convert_message_to_dict` ships LangChain canonical
v0/v1 multimodal content blocks (e.g. `{"type": "image", "base64": ...,
"mime_type": ...}`) on the wire unchanged. Fireworks' OpenAI-compatible
chat completions API rejects the unknown `base64`/`mime_type` keys and
the list shape on roles that expect a string, returning HTTP 422 — so
any image upload, including via tools that return image content blocks,
fails for Kimi K2.6 and other Fireworks vision models.
This change mirrors
`langchain_openai.chat_models.base._format_message_content`:
- Walk `content` blocks.
- Drop block types the chat-completions wire doesn't carry (`tool_use`,
`thinking`, `reasoning_content`, `function_call`,
`code_interpreter_call`).
- Detect v0/v1 multimodal data blocks via
`langchain_core.messages.is_data_content_block`, and translate them via
`convert_to_openai_data_block(..., api="chat/completions")`.
- Strings and non-list content pass through unchanged.
Applied in the `ChatMessage`, `HumanMessage`, `SystemMessage`, and
`ToolMessage` paths of `_convert_message_to_dict`. `AIMessage` already
routes through `_convert_from_v1_to_chat_completions` for v1 output and
assistant content is text-only on the way out, so it is left untouched.
## Why this approach
Fireworks is OpenAI-compatible. The canonical → OpenAI translator
already exists in `langchain_core.messages.block_translators.openai` and
is the same one `langchain-openai` uses. Reusing it (rather than
inventing a Fireworks-specific translator) gives:
- v0 (`source_type`-based) and v1 (`base64`/`url`-based) data block
coverage for free.
- Consistent behavior with `langchain-openai` for image, file, and any
future canonical data block.
- A small, focused diff (≈30 lines of new code, plus tests).
## Test plan
- [x] `make test` passes (64/64 unit tests, including 9 new ones for the
new helper and translation paths).
- [x] `make lint` passes (ruff check, ruff format, mypy, lint_imports).
- [ ] End-to-end: image upload to a Kimi K2.6 (Fireworks) agent
translates to `{"type": "image_url", "image_url": {"url":
"data:image/png;base64,..."}}` on the wire and the model returns a
coherent description (validated locally against
`langchain-fireworks==1.0.0` site-packages with the same patch).
---------
Co-authored-by: murugand23 <murugand23@users.noreply.github.com>
Co-authored-by: Mason Daugherty <github@mdrxy.com>
This commit is contained in:
@@ -57,6 +57,10 @@ from langchain_core.messages import (
|
||||
ToolCall,
|
||||
ToolMessage,
|
||||
ToolMessageChunk,
|
||||
is_data_content_block,
|
||||
)
|
||||
from langchain_core.messages.block_translators.openai import (
|
||||
convert_to_openai_data_block,
|
||||
)
|
||||
from langchain_core.messages.tool import (
|
||||
ToolCallChunk,
|
||||
@@ -166,6 +170,70 @@ def _convert_dict_to_message(_dict: Mapping[str, Any]) -> BaseMessage:
|
||||
return ChatMessage(content=_dict.get("content", ""), role=role or "")
|
||||
|
||||
|
||||
def _format_message_content(content: Any) -> Any:
|
||||
"""Format message content for the Fireworks chat completions wire format.
|
||||
|
||||
Adapted from `langchain_openai.chat_models.base._format_message_content`,
|
||||
scoped to the chat completions API: drops content block types the wire
|
||||
format does not carry, translates canonical v0/v1 multimodal data blocks
|
||||
via `convert_to_openai_data_block(block, api="chat/completions")`, and
|
||||
converts legacy Anthropic-shape image blocks (`{"type": "image",
|
||||
"source": {...}}`) to OpenAI `image_url` blocks. String and non-list
|
||||
content are returned unchanged.
|
||||
|
||||
Args:
|
||||
content: The message content. Strings and non-list values are
|
||||
returned as-is; lists are walked block by block.
|
||||
|
||||
Returns:
|
||||
The formatted content, ready to be placed on the chat completions
|
||||
wire. List inputs return a new list with translations applied; other
|
||||
inputs are returned unchanged.
|
||||
"""
|
||||
if not isinstance(content, list):
|
||||
return content
|
||||
formatted: list[Any] = []
|
||||
for block in content:
|
||||
if isinstance(block, dict) and "type" in block:
|
||||
btype = block["type"]
|
||||
if btype in (
|
||||
"tool_use",
|
||||
"thinking",
|
||||
"reasoning_content",
|
||||
"function_call",
|
||||
"code_interpreter_call",
|
||||
):
|
||||
continue
|
||||
if is_data_content_block(block):
|
||||
formatted.append(
|
||||
convert_to_openai_data_block(block, api="chat/completions")
|
||||
)
|
||||
continue
|
||||
if (
|
||||
btype == "image"
|
||||
and (source := block.get("source"))
|
||||
and isinstance(source, dict)
|
||||
):
|
||||
if (
|
||||
source.get("type") == "base64"
|
||||
and (media_type := source.get("media_type"))
|
||||
and (data := source.get("data"))
|
||||
):
|
||||
formatted.append(
|
||||
{
|
||||
"type": "image_url",
|
||||
"image_url": {"url": f"data:{media_type};base64,{data}"},
|
||||
}
|
||||
)
|
||||
continue
|
||||
if source.get("type") == "url" and (url := source.get("url")):
|
||||
formatted.append({"type": "image_url", "image_url": {"url": url}})
|
||||
continue
|
||||
continue
|
||||
formatted.append(block)
|
||||
return formatted
|
||||
|
||||
|
||||
def _convert_message_to_dict(message: BaseMessage) -> dict:
|
||||
"""Convert a LangChain message to a dictionary.
|
||||
|
||||
@@ -178,14 +246,23 @@ def _convert_message_to_dict(message: BaseMessage) -> dict:
|
||||
"""
|
||||
message_dict: dict[str, Any]
|
||||
if isinstance(message, ChatMessage):
|
||||
message_dict = {"role": message.role, "content": message.content}
|
||||
message_dict = {
|
||||
"role": message.role,
|
||||
"content": _format_message_content(message.content),
|
||||
}
|
||||
elif isinstance(message, HumanMessage):
|
||||
message_dict = {"role": "user", "content": message.content}
|
||||
message_dict = {
|
||||
"role": "user",
|
||||
"content": _format_message_content(message.content),
|
||||
}
|
||||
elif isinstance(message, AIMessage):
|
||||
# Translate v1 content
|
||||
if message.response_metadata.get("output_version") == "v1":
|
||||
message = _convert_from_v1_to_chat_completions(message)
|
||||
message_dict = {"role": "assistant", "content": message.content}
|
||||
message_dict = {
|
||||
"role": "assistant",
|
||||
"content": _format_message_content(message.content),
|
||||
}
|
||||
if "function_call" in message.additional_kwargs:
|
||||
message_dict["function_call"] = message.additional_kwargs["function_call"]
|
||||
# If function call only, content is None not empty string
|
||||
@@ -206,7 +283,10 @@ def _convert_message_to_dict(message: BaseMessage) -> dict:
|
||||
else:
|
||||
pass
|
||||
elif isinstance(message, SystemMessage):
|
||||
message_dict = {"role": "system", "content": message.content}
|
||||
message_dict = {
|
||||
"role": "system",
|
||||
"content": _format_message_content(message.content),
|
||||
}
|
||||
elif isinstance(message, FunctionMessage):
|
||||
message_dict = {
|
||||
"role": "function",
|
||||
@@ -216,7 +296,7 @@ def _convert_message_to_dict(message: BaseMessage) -> dict:
|
||||
elif isinstance(message, ToolMessage):
|
||||
message_dict = {
|
||||
"role": "tool",
|
||||
"content": message.content,
|
||||
"content": _format_message_content(message.content),
|
||||
"tool_call_id": message.tool_call_id,
|
||||
}
|
||||
else:
|
||||
|
||||
@@ -14,7 +14,14 @@ from fireworks.client.error import ( # type: ignore[import-untyped]
|
||||
RateLimitError,
|
||||
ServiceUnavailableError,
|
||||
)
|
||||
from langchain_core.messages import AIMessage, AIMessageChunk, HumanMessage
|
||||
from langchain_core.messages import (
|
||||
AIMessage,
|
||||
AIMessageChunk,
|
||||
ChatMessage,
|
||||
HumanMessage,
|
||||
SystemMessage,
|
||||
ToolMessage,
|
||||
)
|
||||
|
||||
from langchain_fireworks import ChatFireworks
|
||||
from langchain_fireworks.chat_models import (
|
||||
@@ -22,6 +29,8 @@ from langchain_fireworks.chat_models import (
|
||||
_completion_with_retry,
|
||||
_convert_chunk_to_message_chunk,
|
||||
_convert_dict_to_message,
|
||||
_convert_message_to_dict,
|
||||
_format_message_content,
|
||||
_usage_to_metadata,
|
||||
)
|
||||
|
||||
@@ -92,6 +101,318 @@ def test_convert_dict_to_message_without_reasoning_content() -> None:
|
||||
assert "reasoning_content" not in message.additional_kwargs
|
||||
|
||||
|
||||
def test_format_message_content_passthrough_string() -> None:
|
||||
"""Plain string content is returned unchanged."""
|
||||
assert _format_message_content("hello") == "hello"
|
||||
|
||||
|
||||
def test_format_message_content_translates_v1_image_block() -> None:
|
||||
"""Canonical v1 image block is translated to OpenAI image_url + data URI."""
|
||||
blocks = [{"type": "image", "base64": "abc", "mime_type": "image/png"}]
|
||||
|
||||
formatted = _format_message_content(blocks)
|
||||
|
||||
assert formatted == [
|
||||
{"type": "image_url", "image_url": {"url": "data:image/png;base64,abc"}},
|
||||
]
|
||||
|
||||
|
||||
def test_format_message_content_translates_v0_base64_image_block() -> None:
|
||||
"""v0 source_type='base64' image block is translated."""
|
||||
blocks = [
|
||||
{
|
||||
"type": "image",
|
||||
"source_type": "base64",
|
||||
"data": "qqq",
|
||||
"mime_type": "image/png",
|
||||
}
|
||||
]
|
||||
|
||||
formatted = _format_message_content(blocks)
|
||||
|
||||
assert formatted == [
|
||||
{"type": "image_url", "image_url": {"url": "data:image/png;base64,qqq"}},
|
||||
]
|
||||
|
||||
|
||||
def test_format_message_content_passes_through_existing_image_url() -> None:
|
||||
"""Already-OpenAI image_url blocks pass through unchanged."""
|
||||
blocks = [
|
||||
{"type": "image_url", "image_url": {"url": "https://example.com/y.png"}},
|
||||
]
|
||||
|
||||
formatted = _format_message_content(blocks)
|
||||
|
||||
assert formatted == blocks
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"btype",
|
||||
[
|
||||
"tool_use",
|
||||
"thinking",
|
||||
"reasoning_content",
|
||||
"function_call",
|
||||
"code_interpreter_call",
|
||||
],
|
||||
)
|
||||
def test_format_message_content_drops_unsupported_block_types(btype: str) -> None:
|
||||
"""Block types not part of the OpenAI chat completions wire format are stripped."""
|
||||
blocks = [
|
||||
{"type": "text", "text": "visible"},
|
||||
{"type": btype, "foo": "bar"},
|
||||
]
|
||||
|
||||
formatted = _format_message_content(blocks)
|
||||
|
||||
assert formatted == [{"type": "text", "text": "visible"}]
|
||||
|
||||
|
||||
def test_format_message_content_preserves_order_around_dropped_blocks() -> None:
|
||||
"""Surviving blocks keep their order when interleaved drops are removed."""
|
||||
blocks = [
|
||||
{"type": "text", "text": "before"},
|
||||
{"type": "thinking", "thinking": "..."},
|
||||
{"type": "image", "base64": "abc", "mime_type": "image/png"},
|
||||
{"type": "tool_use", "name": "t", "input": {}},
|
||||
{"type": "text", "text": "after"},
|
||||
]
|
||||
|
||||
formatted = _format_message_content(blocks)
|
||||
|
||||
assert formatted == [
|
||||
{"type": "text", "text": "before"},
|
||||
{"type": "image_url", "image_url": {"url": "data:image/png;base64,abc"}},
|
||||
{"type": "text", "text": "after"},
|
||||
]
|
||||
|
||||
|
||||
def test_format_message_content_translates_v1_url_image_block() -> None:
|
||||
"""v1 image block with a top-level URL maps to an OpenAI image_url block."""
|
||||
blocks = [{"type": "image", "url": "https://example.com/img.png"}]
|
||||
|
||||
formatted = _format_message_content(blocks)
|
||||
|
||||
assert formatted == [
|
||||
{"type": "image_url", "image_url": {"url": "https://example.com/img.png"}},
|
||||
]
|
||||
|
||||
|
||||
def test_format_message_content_translates_v0_url_image_block() -> None:
|
||||
"""v0 source_type=url image block is translated."""
|
||||
blocks = [
|
||||
{
|
||||
"type": "image",
|
||||
"source_type": "url",
|
||||
"url": "https://example.com/v0.png",
|
||||
}
|
||||
]
|
||||
|
||||
formatted = _format_message_content(blocks)
|
||||
|
||||
assert formatted == [
|
||||
{"type": "image_url", "image_url": {"url": "https://example.com/v0.png"}},
|
||||
]
|
||||
|
||||
|
||||
def test_format_message_content_translates_anthropic_source_base64_image() -> None:
|
||||
"""Legacy Anthropic-shape image with base64 source maps to a data URI."""
|
||||
blocks = [
|
||||
{
|
||||
"type": "image",
|
||||
"source": {
|
||||
"type": "base64",
|
||||
"media_type": "image/png",
|
||||
"data": "abc",
|
||||
},
|
||||
}
|
||||
]
|
||||
|
||||
formatted = _format_message_content(blocks)
|
||||
|
||||
assert formatted == [
|
||||
{"type": "image_url", "image_url": {"url": "data:image/png;base64,abc"}},
|
||||
]
|
||||
|
||||
|
||||
def test_format_message_content_translates_anthropic_source_url_image() -> None:
|
||||
"""Legacy Anthropic-shape image with url source maps to image_url."""
|
||||
blocks = [
|
||||
{
|
||||
"type": "image",
|
||||
"source": {"type": "url", "url": "https://example.com/anthropic.png"},
|
||||
}
|
||||
]
|
||||
|
||||
formatted = _format_message_content(blocks)
|
||||
|
||||
assert formatted == [
|
||||
{
|
||||
"type": "image_url",
|
||||
"image_url": {"url": "https://example.com/anthropic.png"},
|
||||
},
|
||||
]
|
||||
|
||||
|
||||
def test_format_message_content_translates_v1_audio_block() -> None:
|
||||
"""v1 audio block is translated to OpenAI input_audio shape."""
|
||||
blocks = [{"type": "audio", "base64": "aGVsbG8=", "mime_type": "audio/wav"}]
|
||||
|
||||
formatted = _format_message_content(blocks)
|
||||
|
||||
assert formatted == [
|
||||
{"type": "input_audio", "input_audio": {"data": "aGVsbG8=", "format": "wav"}},
|
||||
]
|
||||
|
||||
|
||||
def test_format_message_content_translates_v1_file_block_base64() -> None:
|
||||
"""v1 file block with base64 + filename maps to OpenAI file_data shape."""
|
||||
blocks = [
|
||||
{
|
||||
"type": "file",
|
||||
"base64": "JVBERi0=",
|
||||
"mime_type": "application/pdf",
|
||||
"filename": "x.pdf",
|
||||
}
|
||||
]
|
||||
|
||||
formatted = _format_message_content(blocks)
|
||||
|
||||
assert formatted == [
|
||||
{
|
||||
"type": "file",
|
||||
"file": {
|
||||
"file_data": "data:application/pdf;base64,JVBERi0=",
|
||||
"filename": "x.pdf",
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
|
||||
def test_convert_message_to_dict_translates_tool_message_image() -> None:
|
||||
"""ToolMessage with a canonical image block lands as OpenAI image_url on the wire.
|
||||
|
||||
Reproduces the failure mode where a tool that returns an image (e.g. a
|
||||
file-reader) hands back `content_blocks=[{"type": "image", ...}]` and the
|
||||
message round-trips into a Fireworks chat completions request.
|
||||
"""
|
||||
tool_message = ToolMessage(
|
||||
content=[{"type": "image", "base64": "abc", "mime_type": "image/png"}],
|
||||
tool_call_id="call_1",
|
||||
)
|
||||
|
||||
result = _convert_message_to_dict(tool_message)
|
||||
|
||||
assert result == {
|
||||
"role": "tool",
|
||||
"content": [
|
||||
{"type": "image_url", "image_url": {"url": "data:image/png;base64,abc"}},
|
||||
],
|
||||
"tool_call_id": "call_1",
|
||||
}
|
||||
|
||||
|
||||
def test_convert_message_to_dict_translates_human_mixed_content() -> None:
|
||||
"""HumanMessage with mixed text + image blocks translates correctly."""
|
||||
human_message = HumanMessage(
|
||||
content=[
|
||||
{"type": "text", "text": "what is this?"},
|
||||
{"type": "image", "base64": "xyz", "mime_type": "image/jpeg"},
|
||||
]
|
||||
)
|
||||
|
||||
result = _convert_message_to_dict(human_message)
|
||||
|
||||
assert result == {
|
||||
"role": "user",
|
||||
"content": [
|
||||
{"type": "text", "text": "what is this?"},
|
||||
{
|
||||
"type": "image_url",
|
||||
"image_url": {"url": "data:image/jpeg;base64,xyz"},
|
||||
},
|
||||
],
|
||||
}
|
||||
|
||||
|
||||
def test_convert_message_to_dict_chat_message_uses_translator() -> None:
|
||||
"""ChatMessage path also runs content through the formatter."""
|
||||
chat_message = ChatMessage(
|
||||
role="custom",
|
||||
content=[{"type": "image", "base64": "zz", "mime_type": "image/gif"}],
|
||||
)
|
||||
|
||||
result = _convert_message_to_dict(chat_message)
|
||||
|
||||
assert result == {
|
||||
"role": "custom",
|
||||
"content": [
|
||||
{"type": "image_url", "image_url": {"url": "data:image/gif;base64,zz"}},
|
||||
],
|
||||
}
|
||||
|
||||
|
||||
def test_convert_message_to_dict_string_content_unchanged() -> None:
|
||||
"""String content on common message types passes through unmodified."""
|
||||
assert _convert_message_to_dict(HumanMessage(content="hi"))["content"] == "hi"
|
||||
assert _convert_message_to_dict(SystemMessage(content="sys"))["content"] == "sys"
|
||||
assert (
|
||||
_convert_message_to_dict(ToolMessage(content="r1", tool_call_id="t"))["content"]
|
||||
== "r1"
|
||||
)
|
||||
|
||||
|
||||
def test_convert_message_to_dict_translates_system_list_content() -> None:
|
||||
"""SystemMessage with list content is routed through the formatter."""
|
||||
system_message = SystemMessage(
|
||||
content=[
|
||||
{"type": "text", "text": "rules"},
|
||||
{"type": "thinking", "thinking": "drop me"},
|
||||
]
|
||||
)
|
||||
|
||||
result = _convert_message_to_dict(system_message)
|
||||
|
||||
assert result == {
|
||||
"role": "system",
|
||||
"content": [{"type": "text", "text": "rules"}],
|
||||
}
|
||||
|
||||
|
||||
def test_convert_message_to_dict_translates_ai_message_image_content() -> None:
|
||||
"""AIMessage with a canonical image block is translated, not forwarded raw."""
|
||||
ai_message = AIMessage(
|
||||
content=[
|
||||
{"type": "text", "text": "see attached"},
|
||||
{"type": "image", "base64": "abc", "mime_type": "image/png"},
|
||||
]
|
||||
)
|
||||
|
||||
result = _convert_message_to_dict(ai_message)
|
||||
|
||||
assert result == {
|
||||
"role": "assistant",
|
||||
"content": [
|
||||
{"type": "text", "text": "see attached"},
|
||||
{"type": "image_url", "image_url": {"url": "data:image/png;base64,abc"}},
|
||||
],
|
||||
}
|
||||
|
||||
|
||||
def test_convert_message_to_dict_propagates_translator_value_error() -> None:
|
||||
"""Translator errors surface to callers instead of shipping bad payloads.
|
||||
|
||||
Chat completions does not support file URLs; the translator raises rather
|
||||
than letting an unsupported block through.
|
||||
"""
|
||||
bad_message = HumanMessage(
|
||||
content=[{"type": "file", "url": "https://example.com/doc.pdf"}]
|
||||
)
|
||||
|
||||
with pytest.raises(ValueError, match="file URLs"):
|
||||
_convert_message_to_dict(bad_message)
|
||||
|
||||
|
||||
def _make_llm(max_retries: int | None = 2) -> ChatFireworks:
|
||||
return ChatFireworks(
|
||||
model="accounts/fireworks/models/test",
|
||||
|
||||
Reference in New Issue
Block a user