From d40fd5a3cee30dc6e9b3290217e568055554e04f Mon Sep 17 00:00:00 2001
From: Copilot <198982749+Copilot@users.noreply.github.com>
Date: Tue, 22 Jul 2025 13:21:11 -0400
Subject: [PATCH] feat(ollama): warn on empty `load` responses (#32161)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

## Problem

When using `ChatOllama` with `create_react_agent`, agents would
sometimes terminate prematurely with empty responses when Ollama
returned `done_reason: 'load'` responses with no content. This caused
agents to return empty `AIMessage` objects instead of actual generated
text.

```python
from langchain_ollama import ChatOllama
from langgraph.prebuilt import create_react_agent
from langchain_core.messages import HumanMessage

llm = ChatOllama(model='qwen2.5:7b', temperature=0)
agent = create_react_agent(model=llm, tools=[])

result = agent.invoke(HumanMessage('Hello'), {"configurable": {"thread_id": "1"}})
# Before fix: AIMessage(content='', response_metadata={'done_reason': 'load'})
# Expected: AIMessage with actual generated content
```

## Root Cause

The `_iterate_over_stream` and `_aiterate_over_stream` methods treated
any response with `done: True` as final, regardless of `done_reason`.
When Ollama returns `done_reason: 'load'` with empty content, it
indicates the model was loaded but no actual generation occurred - this
should not be considered a complete response.

## Solution

Modified the streaming logic to skip responses when:
- `done: True`
- `done_reason: 'load'`
- Content is empty or contains only whitespace

This ensures agents only receive actual generated content while
preserving backward compatibility for load responses that do contain
content.

## Changes

- **`_iterate_over_stream`**: Skip empty load responses instead of
yielding them
- **`_aiterate_over_stream`**: Apply same fix to async streaming
- **Tests**: Added comprehensive test cases covering all edge cases

## Testing

All scenarios now work correctly:
- ✅ Empty load responses are skipped (fixes original issue)
- ✅ Load responses with actual content are preserved (backward
compatibility)
- ✅ Normal stop responses work unchanged
- ✅ Streaming behavior preserved
- ✅ `create_react_agent` integration fixed

Fixes #31482.

<!-- START COPILOT CODING AGENT TIPS -->
---

💡 You can make Copilot smarter by setting up custom instructions,
customizing its development environment and configuring Model Context
Protocol (MCP) servers. Learn more [Copilot coding agent
tips](https://gh.io/copilot-coding-agent-tips) in the docs.

---------

Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com>
Co-authored-by: mdrxy <61371264+mdrxy@users.noreply.github.com>
Co-authored-by: Mason Daugherty <github@mdrxy.com>
---
 .../ollama/langchain_ollama/chat_models.py    |  59 ++++++--
 .../tests/unit_tests/test_chat_models.py      | 132 +++++++++++++++++-
 libs/partners/ollama/uv.lock                  |   4 +-
 3 files changed, 179 insertions(+), 16 deletions(-)

diff --git a/libs/partners/ollama/langchain_ollama/chat_models.py b/libs/partners/ollama/langchain_ollama/chat_models.py
index 91b2e1aa404..dcecf0fb8d2 100644
--- a/libs/partners/ollama/langchain_ollama/chat_models.py
+++ b/libs/partners/ollama/langchain_ollama/chat_models.py
@@ -4,6 +4,7 @@ from __future__ import annotations
 
 import ast
 import json
+import logging
 from collections.abc import AsyncIterator, Iterator, Mapping, Sequence
 from operator import itemgetter
 from typing import (
@@ -58,6 +59,8 @@ from typing_extensions import Self, is_typeddict
 
 from ._utils import validate_model
 
+log = logging.getLogger(__name__)
+
 
 def _get_usage_metadata_from_generation_info(
     generation_info: Optional[Mapping[str, Any]],
@@ -837,6 +840,28 @@ class ChatOllama(BaseChatModel):
         reasoning = kwargs.get("reasoning", self.reasoning)
         for stream_resp in self._create_chat_stream(messages, stop, **kwargs):
             if not isinstance(stream_resp, str):
+                content = (
+                    stream_resp["message"]["content"]
+                    if "message" in stream_resp and "content" in stream_resp["message"]
+                    else ""
+                )
+
+                # Warn and skip responses with done_reason: 'load' and empty content
+                # These indicate the model was loaded but no actual generation occurred
+                is_load_response_with_empty_content = (
+                    stream_resp.get("done") is True
+                    and stream_resp.get("done_reason") == "load"
+                    and not content.strip()
+                )
+
+                if is_load_response_with_empty_content:
+                    log.warning(
+                        "Ollama returned empty response with done_reason='load'."
+                        "This typically indicates the model was loaded but no content "
+                        "was generated. Skipping this response."
+                    )
+                    continue
+
                 if stream_resp.get("done") is True:
                     generation_info = dict(stream_resp)
                     if "model" in generation_info:
@@ -845,12 +870,6 @@ class ChatOllama(BaseChatModel):
                 else:
                     generation_info = None
 
-                content = (
-                    stream_resp["message"]["content"]
-                    if "message" in stream_resp and "content" in stream_resp["message"]
-                    else ""
-                )
-
                 additional_kwargs = {}
                 if (
                     reasoning
@@ -897,6 +916,28 @@ class ChatOllama(BaseChatModel):
         reasoning = kwargs.get("reasoning", self.reasoning)
         async for stream_resp in self._acreate_chat_stream(messages, stop, **kwargs):
             if not isinstance(stream_resp, str):
+                content = (
+                    stream_resp["message"]["content"]
+                    if "message" in stream_resp and "content" in stream_resp["message"]
+                    else ""
+                )
+
+                # Warn and skip responses with done_reason: 'load' and empty content
+                # These indicate the model was loaded but no actual generation occurred
+                is_load_response_with_empty_content = (
+                    stream_resp.get("done") is True
+                    and stream_resp.get("done_reason") == "load"
+                    and not content.strip()
+                )
+
+                if is_load_response_with_empty_content:
+                    log.warning(
+                        "Ollama returned empty response with done_reason='load'. "
+                        "This typically indicates the model was loaded but no content "
+                        "was generated. Skipping this response."
+                    )
+                    continue
+
                 if stream_resp.get("done") is True:
                     generation_info = dict(stream_resp)
                     if "model" in generation_info:
@@ -905,12 +946,6 @@ class ChatOllama(BaseChatModel):
                 else:
                     generation_info = None
 
-                content = (
-                    stream_resp["message"]["content"]
-                    if "message" in stream_resp and "content" in stream_resp["message"]
-                    else ""
-                )
-
                 additional_kwargs = {}
                 if (
                     reasoning
diff --git a/libs/partners/ollama/tests/unit_tests/test_chat_models.py b/libs/partners/ollama/tests/unit_tests/test_chat_models.py
index 3a856a48833..b32ad638cb8 100644
--- a/libs/partners/ollama/tests/unit_tests/test_chat_models.py
+++ b/libs/partners/ollama/tests/unit_tests/test_chat_models.py
@@ -1,15 +1,16 @@
 """Test chat model integration."""
 
 import json
+import logging
 from collections.abc import Generator
 from contextlib import contextmanager
 from typing import Any
-from unittest.mock import patch
+from unittest.mock import MagicMock, patch
 
 import pytest
 from httpx import Client, Request, Response
 from langchain_core.exceptions import OutputParserException
-from langchain_core.messages import ChatMessage
+from langchain_core.messages import ChatMessage, HumanMessage
 from langchain_tests.unit_tests import ChatModelUnitTests
 
 from langchain_ollama.chat_models import (
@@ -140,3 +141,130 @@ def test_parse_json_string_skip_returns_input_on_failure() -> None:
         skip=True,
     )
     assert result == malformed_string
+
+
+def test_load_response_with_empty_content_is_skipped(
+    caplog: pytest.LogCaptureFixture,
+) -> None:
+    """Test that load responses with empty content log a warning and are skipped."""
+    load_only_response = [
+        {
+            "model": "test-model",
+            "created_at": "2025-01-01T00:00:00.000000000Z",
+            "done": True,
+            "done_reason": "load",
+            "message": {"role": "assistant", "content": ""},
+        }
+    ]
+
+    with patch("langchain_ollama.chat_models.Client") as mock_client_class:
+        mock_client = MagicMock()
+        mock_client_class.return_value = mock_client
+        mock_client.chat.return_value = load_only_response
+
+        llm = ChatOllama(model="test-model")
+
+        with (
+            caplog.at_level(logging.WARNING),
+            pytest.raises(ValueError, match="No data received from Ollama stream"),
+        ):
+            llm.invoke([HumanMessage("Hello")])
+
+        assert "Ollama returned empty response with done_reason='load'" in caplog.text
+
+
+def test_load_response_with_whitespace_content_is_skipped(
+    caplog: pytest.LogCaptureFixture,
+) -> None:
+    """Test load responses w/ only whitespace content log a warning and are skipped."""
+    load_whitespace_response = [
+        {
+            "model": "test-model",
+            "created_at": "2025-01-01T00:00:00.000000000Z",
+            "done": True,
+            "done_reason": "load",
+            "message": {"role": "assistant", "content": "   \n  \t  "},
+        }
+    ]
+
+    with patch("langchain_ollama.chat_models.Client") as mock_client_class:
+        mock_client = MagicMock()
+        mock_client_class.return_value = mock_client
+        mock_client.chat.return_value = load_whitespace_response
+
+        llm = ChatOllama(model="test-model")
+
+        with (
+            caplog.at_level(logging.WARNING),
+            pytest.raises(ValueError, match="No data received from Ollama stream"),
+        ):
+            llm.invoke([HumanMessage("Hello")])
+        assert "Ollama returned empty response with done_reason='load'" in caplog.text
+
+
+def test_load_followed_by_content_response(
+    caplog: pytest.LogCaptureFixture,
+) -> None:
+    """Test load responses log a warning and are skipped when followed by content."""
+    load_then_content_response = [
+        {
+            "model": "test-model",
+            "created_at": "2025-01-01T00:00:00.000000000Z",
+            "done": True,
+            "done_reason": "load",
+            "message": {"role": "assistant", "content": ""},
+        },
+        {
+            "model": "test-model",
+            "created_at": "2025-01-01T00:00:01.000000000Z",
+            "done": True,
+            "done_reason": "stop",
+            "message": {
+                "role": "assistant",
+                "content": "Hello! How can I help you today?",
+            },
+        },
+    ]
+
+    with patch("langchain_ollama.chat_models.Client") as mock_client_class:
+        mock_client = MagicMock()
+        mock_client_class.return_value = mock_client
+        mock_client.chat.return_value = load_then_content_response
+
+        llm = ChatOllama(model="test-model")
+
+        with caplog.at_level(logging.WARNING):
+            result = llm.invoke([HumanMessage("Hello")])
+
+        assert "Ollama returned empty response with done_reason='load'" in caplog.text
+        assert result.content == "Hello! How can I help you today?"
+        assert result.response_metadata.get("done_reason") == "stop"
+
+
+def test_load_response_with_actual_content_is_not_skipped(
+    caplog: pytest.LogCaptureFixture,
+) -> None:
+    """Test load responses with actual content are NOT skipped and log no warning."""
+    load_with_content_response = [
+        {
+            "model": "test-model",
+            "created_at": "2025-01-01T00:00:00.000000000Z",
+            "done": True,
+            "done_reason": "load",
+            "message": {"role": "assistant", "content": "This is actual content"},
+        }
+    ]
+
+    with patch("langchain_ollama.chat_models.Client") as mock_client_class:
+        mock_client = MagicMock()
+        mock_client_class.return_value = mock_client
+        mock_client.chat.return_value = load_with_content_response
+
+        llm = ChatOllama(model="test-model")
+
+        with caplog.at_level(logging.WARNING):
+            result = llm.invoke([HumanMessage("Hello")])
+
+        assert result.content == "This is actual content"
+        assert result.response_metadata.get("done_reason") == "load"
+        assert not caplog.text
diff --git a/libs/partners/ollama/uv.lock b/libs/partners/ollama/uv.lock
index 9dc8665b9c1..a7bf8c85d14 100644
--- a/libs/partners/ollama/uv.lock
+++ b/libs/partners/ollama/uv.lock
@@ -305,7 +305,7 @@ wheels = [
 
 [[package]]
 name = "langchain-core"
-version = "0.3.69"
+version = "0.3.70"
 source = { editable = "../../core" }
 dependencies = [
     { name = "jsonpatch" },
@@ -363,7 +363,7 @@ typing = [
 
 [[package]]
 name = "langchain-ollama"
-version = "0.3.4"
+version = "0.3.5"
 source = { editable = "." }
 dependencies = [
     { name = "langchain-core" },