feat(ollama): warn on empty load responses (#32161)

## Problem

When using `ChatOllama` with `create_react_agent`, agents would
sometimes terminate prematurely with empty responses when Ollama
returned `done_reason: 'load'` responses with no content. This caused
agents to return empty `AIMessage` objects instead of actual generated
text.

```python
from langchain_ollama import ChatOllama
from langgraph.prebuilt import create_react_agent
from langchain_core.messages import HumanMessage

llm = ChatOllama(model='qwen2.5:7b', temperature=0)
agent = create_react_agent(model=llm, tools=[])

result = agent.invoke(HumanMessage('Hello'), {"configurable": {"thread_id": "1"}})
# Before fix: AIMessage(content='', response_metadata={'done_reason': 'load'})
# Expected: AIMessage with actual generated content
```

## Root Cause

The `_iterate_over_stream` and `_aiterate_over_stream` methods treated
any response with `done: True` as final, regardless of `done_reason`.
When Ollama returns `done_reason: 'load'` with empty content, it
indicates the model was loaded but no actual generation occurred - this
should not be considered a complete response.

## Solution

Modified the streaming logic to skip responses when:
- `done: True`
- `done_reason: 'load'` 
- Content is empty or contains only whitespace

This ensures agents only receive actual generated content while
preserving backward compatibility for load responses that do contain
content.

## Changes

- **`_iterate_over_stream`**: Skip empty load responses instead of
yielding them
- **`_aiterate_over_stream`**: Apply same fix to async streaming
- **Tests**: Added comprehensive test cases covering all edge cases

## Testing

All scenarios now work correctly:
-  Empty load responses are skipped (fixes original issue)
-  Load responses with actual content are preserved (backward
compatibility)
-  Normal stop responses work unchanged
-  Streaming behavior preserved
-  `create_react_agent` integration fixed

Fixes #31482.

<!-- START COPILOT CODING AGENT TIPS -->
---

💡 You can make Copilot smarter by setting up custom instructions,
customizing its development environment and configuring Model Context
Protocol (MCP) servers. Learn more [Copilot coding agent
tips](https://gh.io/copilot-coding-agent-tips) in the docs.

---------

Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com>
Co-authored-by: mdrxy <61371264+mdrxy@users.noreply.github.com>
Co-authored-by: Mason Daugherty <github@mdrxy.com>
This commit is contained in:
Copilot 2025-07-22 13:21:11 -04:00 committed by GitHub
parent 116b758498
commit d40fd5a3ce
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 179 additions and 16 deletions

View File

@ -4,6 +4,7 @@ from __future__ import annotations
import ast
import json
import logging
from collections.abc import AsyncIterator, Iterator, Mapping, Sequence
from operator import itemgetter
from typing import (
@ -58,6 +59,8 @@ from typing_extensions import Self, is_typeddict
from ._utils import validate_model
log = logging.getLogger(__name__)
def _get_usage_metadata_from_generation_info(
generation_info: Optional[Mapping[str, Any]],
@ -837,6 +840,28 @@ class ChatOllama(BaseChatModel):
reasoning = kwargs.get("reasoning", self.reasoning)
for stream_resp in self._create_chat_stream(messages, stop, **kwargs):
if not isinstance(stream_resp, str):
content = (
stream_resp["message"]["content"]
if "message" in stream_resp and "content" in stream_resp["message"]
else ""
)
# Warn and skip responses with done_reason: 'load' and empty content
# These indicate the model was loaded but no actual generation occurred
is_load_response_with_empty_content = (
stream_resp.get("done") is True
and stream_resp.get("done_reason") == "load"
and not content.strip()
)
if is_load_response_with_empty_content:
log.warning(
"Ollama returned empty response with done_reason='load'."
"This typically indicates the model was loaded but no content "
"was generated. Skipping this response."
)
continue
if stream_resp.get("done") is True:
generation_info = dict(stream_resp)
if "model" in generation_info:
@ -845,12 +870,6 @@ class ChatOllama(BaseChatModel):
else:
generation_info = None
content = (
stream_resp["message"]["content"]
if "message" in stream_resp and "content" in stream_resp["message"]
else ""
)
additional_kwargs = {}
if (
reasoning
@ -897,6 +916,28 @@ class ChatOllama(BaseChatModel):
reasoning = kwargs.get("reasoning", self.reasoning)
async for stream_resp in self._acreate_chat_stream(messages, stop, **kwargs):
if not isinstance(stream_resp, str):
content = (
stream_resp["message"]["content"]
if "message" in stream_resp and "content" in stream_resp["message"]
else ""
)
# Warn and skip responses with done_reason: 'load' and empty content
# These indicate the model was loaded but no actual generation occurred
is_load_response_with_empty_content = (
stream_resp.get("done") is True
and stream_resp.get("done_reason") == "load"
and not content.strip()
)
if is_load_response_with_empty_content:
log.warning(
"Ollama returned empty response with done_reason='load'. "
"This typically indicates the model was loaded but no content "
"was generated. Skipping this response."
)
continue
if stream_resp.get("done") is True:
generation_info = dict(stream_resp)
if "model" in generation_info:
@ -905,12 +946,6 @@ class ChatOllama(BaseChatModel):
else:
generation_info = None
content = (
stream_resp["message"]["content"]
if "message" in stream_resp and "content" in stream_resp["message"]
else ""
)
additional_kwargs = {}
if (
reasoning

View File

@ -1,15 +1,16 @@
"""Test chat model integration."""
import json
import logging
from collections.abc import Generator
from contextlib import contextmanager
from typing import Any
from unittest.mock import patch
from unittest.mock import MagicMock, patch
import pytest
from httpx import Client, Request, Response
from langchain_core.exceptions import OutputParserException
from langchain_core.messages import ChatMessage
from langchain_core.messages import ChatMessage, HumanMessage
from langchain_tests.unit_tests import ChatModelUnitTests
from langchain_ollama.chat_models import (
@ -140,3 +141,130 @@ def test_parse_json_string_skip_returns_input_on_failure() -> None:
skip=True,
)
assert result == malformed_string
def test_load_response_with_empty_content_is_skipped(
caplog: pytest.LogCaptureFixture,
) -> None:
"""Test that load responses with empty content log a warning and are skipped."""
load_only_response = [
{
"model": "test-model",
"created_at": "2025-01-01T00:00:00.000000000Z",
"done": True,
"done_reason": "load",
"message": {"role": "assistant", "content": ""},
}
]
with patch("langchain_ollama.chat_models.Client") as mock_client_class:
mock_client = MagicMock()
mock_client_class.return_value = mock_client
mock_client.chat.return_value = load_only_response
llm = ChatOllama(model="test-model")
with (
caplog.at_level(logging.WARNING),
pytest.raises(ValueError, match="No data received from Ollama stream"),
):
llm.invoke([HumanMessage("Hello")])
assert "Ollama returned empty response with done_reason='load'" in caplog.text
def test_load_response_with_whitespace_content_is_skipped(
caplog: pytest.LogCaptureFixture,
) -> None:
"""Test load responses w/ only whitespace content log a warning and are skipped."""
load_whitespace_response = [
{
"model": "test-model",
"created_at": "2025-01-01T00:00:00.000000000Z",
"done": True,
"done_reason": "load",
"message": {"role": "assistant", "content": " \n \t "},
}
]
with patch("langchain_ollama.chat_models.Client") as mock_client_class:
mock_client = MagicMock()
mock_client_class.return_value = mock_client
mock_client.chat.return_value = load_whitespace_response
llm = ChatOllama(model="test-model")
with (
caplog.at_level(logging.WARNING),
pytest.raises(ValueError, match="No data received from Ollama stream"),
):
llm.invoke([HumanMessage("Hello")])
assert "Ollama returned empty response with done_reason='load'" in caplog.text
def test_load_followed_by_content_response(
caplog: pytest.LogCaptureFixture,
) -> None:
"""Test load responses log a warning and are skipped when followed by content."""
load_then_content_response = [
{
"model": "test-model",
"created_at": "2025-01-01T00:00:00.000000000Z",
"done": True,
"done_reason": "load",
"message": {"role": "assistant", "content": ""},
},
{
"model": "test-model",
"created_at": "2025-01-01T00:00:01.000000000Z",
"done": True,
"done_reason": "stop",
"message": {
"role": "assistant",
"content": "Hello! How can I help you today?",
},
},
]
with patch("langchain_ollama.chat_models.Client") as mock_client_class:
mock_client = MagicMock()
mock_client_class.return_value = mock_client
mock_client.chat.return_value = load_then_content_response
llm = ChatOllama(model="test-model")
with caplog.at_level(logging.WARNING):
result = llm.invoke([HumanMessage("Hello")])
assert "Ollama returned empty response with done_reason='load'" in caplog.text
assert result.content == "Hello! How can I help you today?"
assert result.response_metadata.get("done_reason") == "stop"
def test_load_response_with_actual_content_is_not_skipped(
caplog: pytest.LogCaptureFixture,
) -> None:
"""Test load responses with actual content are NOT skipped and log no warning."""
load_with_content_response = [
{
"model": "test-model",
"created_at": "2025-01-01T00:00:00.000000000Z",
"done": True,
"done_reason": "load",
"message": {"role": "assistant", "content": "This is actual content"},
}
]
with patch("langchain_ollama.chat_models.Client") as mock_client_class:
mock_client = MagicMock()
mock_client_class.return_value = mock_client
mock_client.chat.return_value = load_with_content_response
llm = ChatOllama(model="test-model")
with caplog.at_level(logging.WARNING):
result = llm.invoke([HumanMessage("Hello")])
assert result.content == "This is actual content"
assert result.response_metadata.get("done_reason") == "load"
assert not caplog.text

View File

@ -305,7 +305,7 @@ wheels = [
[[package]]
name = "langchain-core"
version = "0.3.69"
version = "0.3.70"
source = { editable = "../../core" }
dependencies = [
{ name = "jsonpatch" },
@ -363,7 +363,7 @@ typing = [
[[package]]
name = "langchain-ollama"
version = "0.3.4"
version = "0.3.5"
source = { editable = "." }
dependencies = [
{ name = "langchain-core" },