feat(mistralai): add image input support for human messages (#37112)

Closes #37007

---

`ChatMistralAI` was POSTing `HumanMessage` content lists verbatim, so
canonical `ImageContentBlock` dicts (`{"type": "image", "url"/"base64":
...}`) reached the Mistral API unchanged and were rejected — the API
expects OpenAI-shape `{"type": "image_url", "image_url": {"url":
"..."}}`. Multimodal inputs failed for both URL and base64 images.

## Changes
- Introduce `_format_message_content` in
`langchain_mistralai.chat_models`, which delegates to
`is_data_content_block` and
`convert_to_openai_data_block(api="chat/completions")` from
`langchain-core`. Reuses the same translator `langchain-openai` and
`langchain-fireworks` (#37090) use, so v0 `source_type` blocks, v1
`url`/`base64` blocks, and `file_id` references are all handled by one
canonical path.
- Route `HumanMessage` content through `_format_message_content` in
`_convert_message_to_mistral_chat_message`. Strings, already-translated
`image_url` blocks, and Mistral-specific blocks (`document_url`,
`input_audio`) pass through unchanged; the API surfaces an error for
anything it doesn't recognize.

---------

Co-authored-by: Akash Choudhary <achoudhary@lenovo.com>
Co-authored-by: Mason Daugherty <github@mdrxy.com>
This commit is contained in:
Akash Choudhary
2026-05-02 02:36:39 +05:30
committed by GitHub
parent 4389b4c774
commit d1a3c3d0a5
2 changed files with 213 additions and 1 deletions

View File

@@ -44,6 +44,10 @@ from langchain_core.messages import (
SystemMessageChunk, SystemMessageChunk,
ToolCall, ToolCall,
ToolMessage, ToolMessage,
is_data_content_block,
)
from langchain_core.messages.block_translators.openai import (
convert_to_openai_data_block,
) )
from langchain_core.messages.tool import tool_call_chunk from langchain_core.messages.tool import tool_call_chunk
from langchain_core.output_parsers import ( from langchain_core.output_parsers import (
@@ -369,13 +373,46 @@ def _clean_block(block: dict) -> dict:
return new_block return new_block
def _format_message_content(content: Any) -> Any:
"""Format message content for the Mistral chat completions wire format.
Walks list content and translates LangChain canonical v0/v1 multimodal
data blocks (e.g. `ImageContentBlock` with `url`, `base64`, or
`file_id`) into the OpenAI-compatible shape that Mistral accepts:
`{"type": "image_url", "image_url": {"url": "..."}}`. Strings and any
other dict blocks are returned unchanged so that already-translated wire
blocks (e.g. `text`, `image_url`) and Mistral-specific blocks
(`document_url`, `input_audio`) pass through; the API surfaces an error
for anything it doesn't understand.
Args:
content: The message content. Strings and non-list values pass
through unchanged; lists are walked block by block.
Returns:
The formatted content. List inputs return a new list with canonical
data-block translations applied; other inputs are returned as-is.
"""
if not isinstance(content, list):
return content
formatted: list[Any] = []
for block in content:
if isinstance(block, dict) and is_data_content_block(block):
formatted.append(
convert_to_openai_data_block(block, api="chat/completions")
)
continue
formatted.append(block)
return formatted
def _convert_message_to_mistral_chat_message( def _convert_message_to_mistral_chat_message(
message: BaseMessage, message: BaseMessage,
) -> dict: ) -> dict:
if isinstance(message, ChatMessage): if isinstance(message, ChatMessage):
return {"role": message.role, "content": message.content} return {"role": message.role, "content": message.content}
if isinstance(message, HumanMessage): if isinstance(message, HumanMessage):
return {"role": "user", "content": message.content} return {"role": "user", "content": _format_message_content(message.content)}
if isinstance(message, AIMessage): if isinstance(message, AIMessage):
message_dict: dict[str, Any] = {"role": "assistant"} message_dict: dict[str, Any] = {"role": "assistant"}
tool_calls: list = [] tool_calls: list = []

View File

@@ -24,6 +24,7 @@ from langchain_mistralai.chat_models import ( # type: ignore[import]
_convert_message_to_mistral_chat_message, _convert_message_to_mistral_chat_message,
_convert_mistral_chat_message_to_message, _convert_mistral_chat_message_to_message,
_convert_tool_call_id_to_mistral_compatible, _convert_tool_call_id_to_mistral_compatible,
_format_message_content,
_is_valid_mistral_tool_call_id, _is_valid_mistral_tool_call_id,
) )
@@ -111,6 +112,180 @@ def test_convert_message_to_mistral_chat_message(
assert result == expected assert result == expected
@pytest.mark.parametrize(
("content", "expected"),
[
("hello", "hello"),
("", ""),
(None, None),
([], []),
],
)
def test_format_message_content_passthrough_non_list(
content: Any, expected: Any
) -> None:
"""Strings, None, and empty lists pass through `_format_message_content`."""
assert _format_message_content(content) == expected
@pytest.mark.parametrize(
("block", "expected"),
[
(
{"type": "image", "url": "https://example.com/img.png"},
{
"type": "image_url",
"image_url": {"url": "https://example.com/img.png"},
},
),
(
{"type": "image", "base64": "abc123", "mime_type": "image/jpeg"},
{
"type": "image_url",
"image_url": {"url": "data:image/jpeg;base64,abc123"},
},
),
(
{
"type": "image",
"source_type": "url",
"url": "https://example.com/v0.png",
},
{
"type": "image_url",
"image_url": {"url": "https://example.com/v0.png"},
},
),
(
{
"type": "image",
"source_type": "base64",
"data": "v0data",
"mime_type": "image/png",
},
{
"type": "image_url",
"image_url": {"url": "data:image/png;base64,v0data"},
},
),
],
)
def test_format_message_content_translates_image_blocks(
block: dict, expected: dict
) -> None:
"""v0 and v1 canonical image blocks translate to Mistral's `image_url` shape."""
assert _format_message_content([block]) == [expected]
@pytest.mark.parametrize(
"block",
[
{"type": "text", "text": "hello"},
{"type": "image_url", "image_url": {"url": "https://example.com/img.png"}},
{"type": "image_url", "image_url": "https://example.com/img.png"},
],
)
def test_format_message_content_passthrough_known_blocks(block: dict) -> None:
"""Already-translated wire blocks and text blocks pass through unchanged."""
assert _format_message_content([block]) == [block]
@pytest.mark.parametrize(
"block_type",
["tool_use", "thinking", "reasoning_content", "document_url", "input_audio"],
)
def test_format_message_content_passes_unknown_blocks_through(block_type: str) -> None:
"""Non-canonical blocks pass through; the Mistral API validates them."""
blocks = [
{"type": "text", "text": "kept"},
{"type": block_type, "data": "anything"},
]
assert _format_message_content(blocks) == blocks
def test_format_message_content_preserves_order_for_mixed_blocks() -> None:
"""Multiple text + image blocks retain their order — vision prompts depend on it."""
blocks: list[Any] = [
{"type": "text", "text": "first"},
{"type": "image", "url": "https://example.com/a.png"},
{"type": "text", "text": "between"},
{"type": "image", "base64": "xyz", "mime_type": "image/png"},
"trailing string",
]
expected = [
{"type": "text", "text": "first"},
{"type": "image_url", "image_url": {"url": "https://example.com/a.png"}},
{"type": "text", "text": "between"},
{"type": "image_url", "image_url": {"url": "data:image/png;base64,xyz"}},
"trailing string",
]
assert _format_message_content(blocks) == expected
def test_format_message_content_image_missing_mime_type_raises() -> None:
"""Base64 image without `mime_type` raises via the core translator."""
with pytest.raises(ValueError, match="mime_type"):
_format_message_content([{"type": "image", "base64": "abc"}])
@pytest.mark.parametrize(
("message", "expected"),
[
(
HumanMessage(
content=[
{"type": "text", "text": "What is in this image?"},
{"type": "image", "url": "https://example.com/img.png"},
]
),
{
"role": "user",
"content": [
{"type": "text", "text": "What is in this image?"},
{
"type": "image_url",
"image_url": {"url": "https://example.com/img.png"},
},
],
},
),
(
HumanMessage(
content=[
{"type": "text", "text": "Describe this image."},
{
"type": "image",
"base64": "abc123",
"mime_type": "image/png",
},
]
),
{
"role": "user",
"content": [
{"type": "text", "text": "Describe this image."},
{
"type": "image_url",
"image_url": {"url": "data:image/png;base64,abc123"},
},
],
},
),
],
)
def test_convert_human_message_with_images(
message: BaseMessage, expected: dict
) -> None:
result = _convert_message_to_mistral_chat_message(message)
assert result == expected
def test_convert_human_message_with_string_content_unchanged() -> None:
"""Plain string `HumanMessage` content is not wrapped or modified."""
result = _convert_message_to_mistral_chat_message(HumanMessage(content="hi"))
assert result == {"role": "user", "content": "hi"}
def _make_completion_response_from_token(token: str) -> dict: def _make_completion_response_from_token(token: str) -> dict:
return { return {
"id": "abc123", "id": "abc123",