mirror of
https://github.com/hwchase17/langchain.git
synced 2025-08-04 02:33:05 +00:00
feat(ollama): warn on empty load
responses (#32161)
## Problem When using `ChatOllama` with `create_react_agent`, agents would sometimes terminate prematurely with empty responses when Ollama returned `done_reason: 'load'` responses with no content. This caused agents to return empty `AIMessage` objects instead of actual generated text. ```python from langchain_ollama import ChatOllama from langgraph.prebuilt import create_react_agent from langchain_core.messages import HumanMessage llm = ChatOllama(model='qwen2.5:7b', temperature=0) agent = create_react_agent(model=llm, tools=[]) result = agent.invoke(HumanMessage('Hello'), {"configurable": {"thread_id": "1"}}) # Before fix: AIMessage(content='', response_metadata={'done_reason': 'load'}) # Expected: AIMessage with actual generated content ``` ## Root Cause The `_iterate_over_stream` and `_aiterate_over_stream` methods treated any response with `done: True` as final, regardless of `done_reason`. When Ollama returns `done_reason: 'load'` with empty content, it indicates the model was loaded but no actual generation occurred - this should not be considered a complete response. ## Solution Modified the streaming logic to skip responses when: - `done: True` - `done_reason: 'load'` - Content is empty or contains only whitespace This ensures agents only receive actual generated content while preserving backward compatibility for load responses that do contain content. ## Changes - **`_iterate_over_stream`**: Skip empty load responses instead of yielding them - **`_aiterate_over_stream`**: Apply same fix to async streaming - **Tests**: Added comprehensive test cases covering all edge cases ## Testing All scenarios now work correctly: - ✅ Empty load responses are skipped (fixes original issue) - ✅ Load responses with actual content are preserved (backward compatibility) - ✅ Normal stop responses work unchanged - ✅ Streaming behavior preserved - ✅ `create_react_agent` integration fixed Fixes #31482. <!-- START COPILOT CODING AGENT TIPS --> --- 💡 You can make Copilot smarter by setting up custom instructions, customizing its development environment and configuring Model Context Protocol (MCP) servers. Learn more [Copilot coding agent tips](https://gh.io/copilot-coding-agent-tips) in the docs. --------- Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> Co-authored-by: mdrxy <61371264+mdrxy@users.noreply.github.com> Co-authored-by: Mason Daugherty <github@mdrxy.com>
This commit is contained in:
parent
116b758498
commit
d40fd5a3ce
@ -4,6 +4,7 @@ from __future__ import annotations
|
|||||||
|
|
||||||
import ast
|
import ast
|
||||||
import json
|
import json
|
||||||
|
import logging
|
||||||
from collections.abc import AsyncIterator, Iterator, Mapping, Sequence
|
from collections.abc import AsyncIterator, Iterator, Mapping, Sequence
|
||||||
from operator import itemgetter
|
from operator import itemgetter
|
||||||
from typing import (
|
from typing import (
|
||||||
@ -58,6 +59,8 @@ from typing_extensions import Self, is_typeddict
|
|||||||
|
|
||||||
from ._utils import validate_model
|
from ._utils import validate_model
|
||||||
|
|
||||||
|
log = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
def _get_usage_metadata_from_generation_info(
|
def _get_usage_metadata_from_generation_info(
|
||||||
generation_info: Optional[Mapping[str, Any]],
|
generation_info: Optional[Mapping[str, Any]],
|
||||||
@ -837,6 +840,28 @@ class ChatOllama(BaseChatModel):
|
|||||||
reasoning = kwargs.get("reasoning", self.reasoning)
|
reasoning = kwargs.get("reasoning", self.reasoning)
|
||||||
for stream_resp in self._create_chat_stream(messages, stop, **kwargs):
|
for stream_resp in self._create_chat_stream(messages, stop, **kwargs):
|
||||||
if not isinstance(stream_resp, str):
|
if not isinstance(stream_resp, str):
|
||||||
|
content = (
|
||||||
|
stream_resp["message"]["content"]
|
||||||
|
if "message" in stream_resp and "content" in stream_resp["message"]
|
||||||
|
else ""
|
||||||
|
)
|
||||||
|
|
||||||
|
# Warn and skip responses with done_reason: 'load' and empty content
|
||||||
|
# These indicate the model was loaded but no actual generation occurred
|
||||||
|
is_load_response_with_empty_content = (
|
||||||
|
stream_resp.get("done") is True
|
||||||
|
and stream_resp.get("done_reason") == "load"
|
||||||
|
and not content.strip()
|
||||||
|
)
|
||||||
|
|
||||||
|
if is_load_response_with_empty_content:
|
||||||
|
log.warning(
|
||||||
|
"Ollama returned empty response with done_reason='load'."
|
||||||
|
"This typically indicates the model was loaded but no content "
|
||||||
|
"was generated. Skipping this response."
|
||||||
|
)
|
||||||
|
continue
|
||||||
|
|
||||||
if stream_resp.get("done") is True:
|
if stream_resp.get("done") is True:
|
||||||
generation_info = dict(stream_resp)
|
generation_info = dict(stream_resp)
|
||||||
if "model" in generation_info:
|
if "model" in generation_info:
|
||||||
@ -845,12 +870,6 @@ class ChatOllama(BaseChatModel):
|
|||||||
else:
|
else:
|
||||||
generation_info = None
|
generation_info = None
|
||||||
|
|
||||||
content = (
|
|
||||||
stream_resp["message"]["content"]
|
|
||||||
if "message" in stream_resp and "content" in stream_resp["message"]
|
|
||||||
else ""
|
|
||||||
)
|
|
||||||
|
|
||||||
additional_kwargs = {}
|
additional_kwargs = {}
|
||||||
if (
|
if (
|
||||||
reasoning
|
reasoning
|
||||||
@ -897,6 +916,28 @@ class ChatOllama(BaseChatModel):
|
|||||||
reasoning = kwargs.get("reasoning", self.reasoning)
|
reasoning = kwargs.get("reasoning", self.reasoning)
|
||||||
async for stream_resp in self._acreate_chat_stream(messages, stop, **kwargs):
|
async for stream_resp in self._acreate_chat_stream(messages, stop, **kwargs):
|
||||||
if not isinstance(stream_resp, str):
|
if not isinstance(stream_resp, str):
|
||||||
|
content = (
|
||||||
|
stream_resp["message"]["content"]
|
||||||
|
if "message" in stream_resp and "content" in stream_resp["message"]
|
||||||
|
else ""
|
||||||
|
)
|
||||||
|
|
||||||
|
# Warn and skip responses with done_reason: 'load' and empty content
|
||||||
|
# These indicate the model was loaded but no actual generation occurred
|
||||||
|
is_load_response_with_empty_content = (
|
||||||
|
stream_resp.get("done") is True
|
||||||
|
and stream_resp.get("done_reason") == "load"
|
||||||
|
and not content.strip()
|
||||||
|
)
|
||||||
|
|
||||||
|
if is_load_response_with_empty_content:
|
||||||
|
log.warning(
|
||||||
|
"Ollama returned empty response with done_reason='load'. "
|
||||||
|
"This typically indicates the model was loaded but no content "
|
||||||
|
"was generated. Skipping this response."
|
||||||
|
)
|
||||||
|
continue
|
||||||
|
|
||||||
if stream_resp.get("done") is True:
|
if stream_resp.get("done") is True:
|
||||||
generation_info = dict(stream_resp)
|
generation_info = dict(stream_resp)
|
||||||
if "model" in generation_info:
|
if "model" in generation_info:
|
||||||
@ -905,12 +946,6 @@ class ChatOllama(BaseChatModel):
|
|||||||
else:
|
else:
|
||||||
generation_info = None
|
generation_info = None
|
||||||
|
|
||||||
content = (
|
|
||||||
stream_resp["message"]["content"]
|
|
||||||
if "message" in stream_resp and "content" in stream_resp["message"]
|
|
||||||
else ""
|
|
||||||
)
|
|
||||||
|
|
||||||
additional_kwargs = {}
|
additional_kwargs = {}
|
||||||
if (
|
if (
|
||||||
reasoning
|
reasoning
|
||||||
|
@ -1,15 +1,16 @@
|
|||||||
"""Test chat model integration."""
|
"""Test chat model integration."""
|
||||||
|
|
||||||
import json
|
import json
|
||||||
|
import logging
|
||||||
from collections.abc import Generator
|
from collections.abc import Generator
|
||||||
from contextlib import contextmanager
|
from contextlib import contextmanager
|
||||||
from typing import Any
|
from typing import Any
|
||||||
from unittest.mock import patch
|
from unittest.mock import MagicMock, patch
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
from httpx import Client, Request, Response
|
from httpx import Client, Request, Response
|
||||||
from langchain_core.exceptions import OutputParserException
|
from langchain_core.exceptions import OutputParserException
|
||||||
from langchain_core.messages import ChatMessage
|
from langchain_core.messages import ChatMessage, HumanMessage
|
||||||
from langchain_tests.unit_tests import ChatModelUnitTests
|
from langchain_tests.unit_tests import ChatModelUnitTests
|
||||||
|
|
||||||
from langchain_ollama.chat_models import (
|
from langchain_ollama.chat_models import (
|
||||||
@ -140,3 +141,130 @@ def test_parse_json_string_skip_returns_input_on_failure() -> None:
|
|||||||
skip=True,
|
skip=True,
|
||||||
)
|
)
|
||||||
assert result == malformed_string
|
assert result == malformed_string
|
||||||
|
|
||||||
|
|
||||||
|
def test_load_response_with_empty_content_is_skipped(
|
||||||
|
caplog: pytest.LogCaptureFixture,
|
||||||
|
) -> None:
|
||||||
|
"""Test that load responses with empty content log a warning and are skipped."""
|
||||||
|
load_only_response = [
|
||||||
|
{
|
||||||
|
"model": "test-model",
|
||||||
|
"created_at": "2025-01-01T00:00:00.000000000Z",
|
||||||
|
"done": True,
|
||||||
|
"done_reason": "load",
|
||||||
|
"message": {"role": "assistant", "content": ""},
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
with patch("langchain_ollama.chat_models.Client") as mock_client_class:
|
||||||
|
mock_client = MagicMock()
|
||||||
|
mock_client_class.return_value = mock_client
|
||||||
|
mock_client.chat.return_value = load_only_response
|
||||||
|
|
||||||
|
llm = ChatOllama(model="test-model")
|
||||||
|
|
||||||
|
with (
|
||||||
|
caplog.at_level(logging.WARNING),
|
||||||
|
pytest.raises(ValueError, match="No data received from Ollama stream"),
|
||||||
|
):
|
||||||
|
llm.invoke([HumanMessage("Hello")])
|
||||||
|
|
||||||
|
assert "Ollama returned empty response with done_reason='load'" in caplog.text
|
||||||
|
|
||||||
|
|
||||||
|
def test_load_response_with_whitespace_content_is_skipped(
|
||||||
|
caplog: pytest.LogCaptureFixture,
|
||||||
|
) -> None:
|
||||||
|
"""Test load responses w/ only whitespace content log a warning and are skipped."""
|
||||||
|
load_whitespace_response = [
|
||||||
|
{
|
||||||
|
"model": "test-model",
|
||||||
|
"created_at": "2025-01-01T00:00:00.000000000Z",
|
||||||
|
"done": True,
|
||||||
|
"done_reason": "load",
|
||||||
|
"message": {"role": "assistant", "content": " \n \t "},
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
with patch("langchain_ollama.chat_models.Client") as mock_client_class:
|
||||||
|
mock_client = MagicMock()
|
||||||
|
mock_client_class.return_value = mock_client
|
||||||
|
mock_client.chat.return_value = load_whitespace_response
|
||||||
|
|
||||||
|
llm = ChatOllama(model="test-model")
|
||||||
|
|
||||||
|
with (
|
||||||
|
caplog.at_level(logging.WARNING),
|
||||||
|
pytest.raises(ValueError, match="No data received from Ollama stream"),
|
||||||
|
):
|
||||||
|
llm.invoke([HumanMessage("Hello")])
|
||||||
|
assert "Ollama returned empty response with done_reason='load'" in caplog.text
|
||||||
|
|
||||||
|
|
||||||
|
def test_load_followed_by_content_response(
|
||||||
|
caplog: pytest.LogCaptureFixture,
|
||||||
|
) -> None:
|
||||||
|
"""Test load responses log a warning and are skipped when followed by content."""
|
||||||
|
load_then_content_response = [
|
||||||
|
{
|
||||||
|
"model": "test-model",
|
||||||
|
"created_at": "2025-01-01T00:00:00.000000000Z",
|
||||||
|
"done": True,
|
||||||
|
"done_reason": "load",
|
||||||
|
"message": {"role": "assistant", "content": ""},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model": "test-model",
|
||||||
|
"created_at": "2025-01-01T00:00:01.000000000Z",
|
||||||
|
"done": True,
|
||||||
|
"done_reason": "stop",
|
||||||
|
"message": {
|
||||||
|
"role": "assistant",
|
||||||
|
"content": "Hello! How can I help you today?",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
with patch("langchain_ollama.chat_models.Client") as mock_client_class:
|
||||||
|
mock_client = MagicMock()
|
||||||
|
mock_client_class.return_value = mock_client
|
||||||
|
mock_client.chat.return_value = load_then_content_response
|
||||||
|
|
||||||
|
llm = ChatOllama(model="test-model")
|
||||||
|
|
||||||
|
with caplog.at_level(logging.WARNING):
|
||||||
|
result = llm.invoke([HumanMessage("Hello")])
|
||||||
|
|
||||||
|
assert "Ollama returned empty response with done_reason='load'" in caplog.text
|
||||||
|
assert result.content == "Hello! How can I help you today?"
|
||||||
|
assert result.response_metadata.get("done_reason") == "stop"
|
||||||
|
|
||||||
|
|
||||||
|
def test_load_response_with_actual_content_is_not_skipped(
|
||||||
|
caplog: pytest.LogCaptureFixture,
|
||||||
|
) -> None:
|
||||||
|
"""Test load responses with actual content are NOT skipped and log no warning."""
|
||||||
|
load_with_content_response = [
|
||||||
|
{
|
||||||
|
"model": "test-model",
|
||||||
|
"created_at": "2025-01-01T00:00:00.000000000Z",
|
||||||
|
"done": True,
|
||||||
|
"done_reason": "load",
|
||||||
|
"message": {"role": "assistant", "content": "This is actual content"},
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
with patch("langchain_ollama.chat_models.Client") as mock_client_class:
|
||||||
|
mock_client = MagicMock()
|
||||||
|
mock_client_class.return_value = mock_client
|
||||||
|
mock_client.chat.return_value = load_with_content_response
|
||||||
|
|
||||||
|
llm = ChatOllama(model="test-model")
|
||||||
|
|
||||||
|
with caplog.at_level(logging.WARNING):
|
||||||
|
result = llm.invoke([HumanMessage("Hello")])
|
||||||
|
|
||||||
|
assert result.content == "This is actual content"
|
||||||
|
assert result.response_metadata.get("done_reason") == "load"
|
||||||
|
assert not caplog.text
|
||||||
|
@ -305,7 +305,7 @@ wheels = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "langchain-core"
|
name = "langchain-core"
|
||||||
version = "0.3.69"
|
version = "0.3.70"
|
||||||
source = { editable = "../../core" }
|
source = { editable = "../../core" }
|
||||||
dependencies = [
|
dependencies = [
|
||||||
{ name = "jsonpatch" },
|
{ name = "jsonpatch" },
|
||||||
@ -363,7 +363,7 @@ typing = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "langchain-ollama"
|
name = "langchain-ollama"
|
||||||
version = "0.3.4"
|
version = "0.3.5"
|
||||||
source = { editable = "." }
|
source = { editable = "." }
|
||||||
dependencies = [
|
dependencies = [
|
||||||
{ name = "langchain-core" },
|
{ name = "langchain-core" },
|
||||||
|
Loading…
Reference in New Issue
Block a user