From d40fd5a3cee30dc6e9b3290217e568055554e04f Mon Sep 17 00:00:00 2001 From: Copilot <198982749+Copilot@users.noreply.github.com> Date: Tue, 22 Jul 2025 13:21:11 -0400 Subject: [PATCH] feat(ollama): warn on empty `load` responses (#32161) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Problem When using `ChatOllama` with `create_react_agent`, agents would sometimes terminate prematurely with empty responses when Ollama returned `done_reason: 'load'` responses with no content. This caused agents to return empty `AIMessage` objects instead of actual generated text. ```python from langchain_ollama import ChatOllama from langgraph.prebuilt import create_react_agent from langchain_core.messages import HumanMessage llm = ChatOllama(model='qwen2.5:7b', temperature=0) agent = create_react_agent(model=llm, tools=[]) result = agent.invoke(HumanMessage('Hello'), {"configurable": {"thread_id": "1"}}) # Before fix: AIMessage(content='', response_metadata={'done_reason': 'load'}) # Expected: AIMessage with actual generated content ``` ## Root Cause The `_iterate_over_stream` and `_aiterate_over_stream` methods treated any response with `done: True` as final, regardless of `done_reason`. When Ollama returns `done_reason: 'load'` with empty content, it indicates the model was loaded but no actual generation occurred - this should not be considered a complete response. ## Solution Modified the streaming logic to skip responses when: - `done: True` - `done_reason: 'load'` - Content is empty or contains only whitespace This ensures agents only receive actual generated content while preserving backward compatibility for load responses that do contain content. ## Changes - **`_iterate_over_stream`**: Skip empty load responses instead of yielding them - **`_aiterate_over_stream`**: Apply same fix to async streaming - **Tests**: Added comprehensive test cases covering all edge cases ## Testing All scenarios now work correctly: - ✅ Empty load responses are skipped (fixes original issue) - ✅ Load responses with actual content are preserved (backward compatibility) - ✅ Normal stop responses work unchanged - ✅ Streaming behavior preserved - ✅ `create_react_agent` integration fixed Fixes #31482. --- 💡 You can make Copilot smarter by setting up custom instructions, customizing its development environment and configuring Model Context Protocol (MCP) servers. Learn more [Copilot coding agent tips](https://gh.io/copilot-coding-agent-tips) in the docs. --------- Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> Co-authored-by: mdrxy <61371264+mdrxy@users.noreply.github.com> Co-authored-by: Mason Daugherty --- .../ollama/langchain_ollama/chat_models.py | 59 ++++++-- .../tests/unit_tests/test_chat_models.py | 132 +++++++++++++++++- libs/partners/ollama/uv.lock | 4 +- 3 files changed, 179 insertions(+), 16 deletions(-) diff --git a/libs/partners/ollama/langchain_ollama/chat_models.py b/libs/partners/ollama/langchain_ollama/chat_models.py index 91b2e1aa404..dcecf0fb8d2 100644 --- a/libs/partners/ollama/langchain_ollama/chat_models.py +++ b/libs/partners/ollama/langchain_ollama/chat_models.py @@ -4,6 +4,7 @@ from __future__ import annotations import ast import json +import logging from collections.abc import AsyncIterator, Iterator, Mapping, Sequence from operator import itemgetter from typing import ( @@ -58,6 +59,8 @@ from typing_extensions import Self, is_typeddict from ._utils import validate_model +log = logging.getLogger(__name__) + def _get_usage_metadata_from_generation_info( generation_info: Optional[Mapping[str, Any]], @@ -837,6 +840,28 @@ class ChatOllama(BaseChatModel): reasoning = kwargs.get("reasoning", self.reasoning) for stream_resp in self._create_chat_stream(messages, stop, **kwargs): if not isinstance(stream_resp, str): + content = ( + stream_resp["message"]["content"] + if "message" in stream_resp and "content" in stream_resp["message"] + else "" + ) + + # Warn and skip responses with done_reason: 'load' and empty content + # These indicate the model was loaded but no actual generation occurred + is_load_response_with_empty_content = ( + stream_resp.get("done") is True + and stream_resp.get("done_reason") == "load" + and not content.strip() + ) + + if is_load_response_with_empty_content: + log.warning( + "Ollama returned empty response with done_reason='load'." + "This typically indicates the model was loaded but no content " + "was generated. Skipping this response." + ) + continue + if stream_resp.get("done") is True: generation_info = dict(stream_resp) if "model" in generation_info: @@ -845,12 +870,6 @@ class ChatOllama(BaseChatModel): else: generation_info = None - content = ( - stream_resp["message"]["content"] - if "message" in stream_resp and "content" in stream_resp["message"] - else "" - ) - additional_kwargs = {} if ( reasoning @@ -897,6 +916,28 @@ class ChatOllama(BaseChatModel): reasoning = kwargs.get("reasoning", self.reasoning) async for stream_resp in self._acreate_chat_stream(messages, stop, **kwargs): if not isinstance(stream_resp, str): + content = ( + stream_resp["message"]["content"] + if "message" in stream_resp and "content" in stream_resp["message"] + else "" + ) + + # Warn and skip responses with done_reason: 'load' and empty content + # These indicate the model was loaded but no actual generation occurred + is_load_response_with_empty_content = ( + stream_resp.get("done") is True + and stream_resp.get("done_reason") == "load" + and not content.strip() + ) + + if is_load_response_with_empty_content: + log.warning( + "Ollama returned empty response with done_reason='load'. " + "This typically indicates the model was loaded but no content " + "was generated. Skipping this response." + ) + continue + if stream_resp.get("done") is True: generation_info = dict(stream_resp) if "model" in generation_info: @@ -905,12 +946,6 @@ class ChatOllama(BaseChatModel): else: generation_info = None - content = ( - stream_resp["message"]["content"] - if "message" in stream_resp and "content" in stream_resp["message"] - else "" - ) - additional_kwargs = {} if ( reasoning diff --git a/libs/partners/ollama/tests/unit_tests/test_chat_models.py b/libs/partners/ollama/tests/unit_tests/test_chat_models.py index 3a856a48833..b32ad638cb8 100644 --- a/libs/partners/ollama/tests/unit_tests/test_chat_models.py +++ b/libs/partners/ollama/tests/unit_tests/test_chat_models.py @@ -1,15 +1,16 @@ """Test chat model integration.""" import json +import logging from collections.abc import Generator from contextlib import contextmanager from typing import Any -from unittest.mock import patch +from unittest.mock import MagicMock, patch import pytest from httpx import Client, Request, Response from langchain_core.exceptions import OutputParserException -from langchain_core.messages import ChatMessage +from langchain_core.messages import ChatMessage, HumanMessage from langchain_tests.unit_tests import ChatModelUnitTests from langchain_ollama.chat_models import ( @@ -140,3 +141,130 @@ def test_parse_json_string_skip_returns_input_on_failure() -> None: skip=True, ) assert result == malformed_string + + +def test_load_response_with_empty_content_is_skipped( + caplog: pytest.LogCaptureFixture, +) -> None: + """Test that load responses with empty content log a warning and are skipped.""" + load_only_response = [ + { + "model": "test-model", + "created_at": "2025-01-01T00:00:00.000000000Z", + "done": True, + "done_reason": "load", + "message": {"role": "assistant", "content": ""}, + } + ] + + with patch("langchain_ollama.chat_models.Client") as mock_client_class: + mock_client = MagicMock() + mock_client_class.return_value = mock_client + mock_client.chat.return_value = load_only_response + + llm = ChatOllama(model="test-model") + + with ( + caplog.at_level(logging.WARNING), + pytest.raises(ValueError, match="No data received from Ollama stream"), + ): + llm.invoke([HumanMessage("Hello")]) + + assert "Ollama returned empty response with done_reason='load'" in caplog.text + + +def test_load_response_with_whitespace_content_is_skipped( + caplog: pytest.LogCaptureFixture, +) -> None: + """Test load responses w/ only whitespace content log a warning and are skipped.""" + load_whitespace_response = [ + { + "model": "test-model", + "created_at": "2025-01-01T00:00:00.000000000Z", + "done": True, + "done_reason": "load", + "message": {"role": "assistant", "content": " \n \t "}, + } + ] + + with patch("langchain_ollama.chat_models.Client") as mock_client_class: + mock_client = MagicMock() + mock_client_class.return_value = mock_client + mock_client.chat.return_value = load_whitespace_response + + llm = ChatOllama(model="test-model") + + with ( + caplog.at_level(logging.WARNING), + pytest.raises(ValueError, match="No data received from Ollama stream"), + ): + llm.invoke([HumanMessage("Hello")]) + assert "Ollama returned empty response with done_reason='load'" in caplog.text + + +def test_load_followed_by_content_response( + caplog: pytest.LogCaptureFixture, +) -> None: + """Test load responses log a warning and are skipped when followed by content.""" + load_then_content_response = [ + { + "model": "test-model", + "created_at": "2025-01-01T00:00:00.000000000Z", + "done": True, + "done_reason": "load", + "message": {"role": "assistant", "content": ""}, + }, + { + "model": "test-model", + "created_at": "2025-01-01T00:00:01.000000000Z", + "done": True, + "done_reason": "stop", + "message": { + "role": "assistant", + "content": "Hello! How can I help you today?", + }, + }, + ] + + with patch("langchain_ollama.chat_models.Client") as mock_client_class: + mock_client = MagicMock() + mock_client_class.return_value = mock_client + mock_client.chat.return_value = load_then_content_response + + llm = ChatOllama(model="test-model") + + with caplog.at_level(logging.WARNING): + result = llm.invoke([HumanMessage("Hello")]) + + assert "Ollama returned empty response with done_reason='load'" in caplog.text + assert result.content == "Hello! How can I help you today?" + assert result.response_metadata.get("done_reason") == "stop" + + +def test_load_response_with_actual_content_is_not_skipped( + caplog: pytest.LogCaptureFixture, +) -> None: + """Test load responses with actual content are NOT skipped and log no warning.""" + load_with_content_response = [ + { + "model": "test-model", + "created_at": "2025-01-01T00:00:00.000000000Z", + "done": True, + "done_reason": "load", + "message": {"role": "assistant", "content": "This is actual content"}, + } + ] + + with patch("langchain_ollama.chat_models.Client") as mock_client_class: + mock_client = MagicMock() + mock_client_class.return_value = mock_client + mock_client.chat.return_value = load_with_content_response + + llm = ChatOllama(model="test-model") + + with caplog.at_level(logging.WARNING): + result = llm.invoke([HumanMessage("Hello")]) + + assert result.content == "This is actual content" + assert result.response_metadata.get("done_reason") == "load" + assert not caplog.text diff --git a/libs/partners/ollama/uv.lock b/libs/partners/ollama/uv.lock index 9dc8665b9c1..a7bf8c85d14 100644 --- a/libs/partners/ollama/uv.lock +++ b/libs/partners/ollama/uv.lock @@ -305,7 +305,7 @@ wheels = [ [[package]] name = "langchain-core" -version = "0.3.69" +version = "0.3.70" source = { editable = "../../core" } dependencies = [ { name = "jsonpatch" }, @@ -363,7 +363,7 @@ typing = [ [[package]] name = "langchain-ollama" -version = "0.3.4" +version = "0.3.5" source = { editable = "." } dependencies = [ { name = "langchain-core" },