fix(ollama): avoid leading newline in multimodal text content (#37481)

Fixes #37480 

---

Avoid prepending a leading newline when converting multimodal message
text content in `ChatOllama`. Previously, the first text segment in
list-format message content was always prefixed with `\n`, which could
break formatting-sensitive vision/OCR models.

Also adds a regression test to ensure multimodal message content does
not start with an unintended leading newline.

Verified by running:

- make format
- make lint
- make test
This commit is contained in:
Sanchay Singh
2026-05-18 03:04:31 +05:30
committed by GitHub
parent 2c179365f4
commit cdb4a8aeb0
2 changed files with 33 additions and 2 deletions

View File

@@ -1024,9 +1024,13 @@ class ChatOllama(BaseChatModel):
else: # List
for content_part in message.content:
if isinstance(content_part, str):
content += f"\n{content_part}"
if content:
content += "\n"
content += content_part
elif content_part.get("type") == "text":
content += f"\n{content_part['text']}"
if content:
content += "\n"
content += content_part["text"]
elif content_part.get("type") == "tool_use":
continue
elif content_part.get("type") == "image_url":

View File

@@ -1137,3 +1137,30 @@ def test_non_ai_message_reasoning_content_ignored() -> None:
]
ollama_messages = llm._convert_messages_to_ollama_messages(messages)
assert "thinking" not in ollama_messages[0]
def test_multimodal_message_content_has_no_leading_newline() -> None:
"""Test that multimodal text content does not start with a newline."""
message = HumanMessage(
content=[
{
"type": "text",
"text": "Extract all text from this image.",
},
{
"type": "image_url",
"image_url": {
"url": (
"data:image/png;base64,"
"iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNk+M9QDwADhgGAWjR9awAAAABJRU5ErkJggg=="
)
},
},
]
)
model = ChatOllama(model="any")
ollama_messages = model._convert_messages_to_ollama_messages([message])
assert ollama_messages[0]["content"] == "Extract all text from this image."