mirror of
https://github.com/hwchase17/langchain.git
synced 2026-04-10 22:43:18 +00:00
fix(ollama): serialize reasoning_content back to ollama thinking (#36573)
Closes #36177. --- Ollama's deserialization path already captures `"thinking"` content as `additional_kwargs["reasoning_content"]` on `AIMessage`, but the reverse direction — serializing back to the Ollama wire format — was missing. This means multi-turn conversations with reasoning models like `deepseek-r1` would silently drop the chain-of-thought, breaking agents that need prior reasoning preserved across turns.
This commit is contained in:
@@ -970,6 +970,10 @@ class ChatOllama(BaseChatModel):
|
||||
msg_["tool_calls"] = tool_calls
|
||||
if tool_call_id:
|
||||
msg_["tool_call_id"] = tool_call_id
|
||||
if isinstance(message, AIMessage):
|
||||
thinking = message.additional_kwargs.get("reasoning_content")
|
||||
if thinking is not None:
|
||||
msg_["thinking"] = thinking
|
||||
ollama_messages.append(msg_)
|
||||
|
||||
return ollama_messages
|
||||
|
||||
@@ -1,7 +1,12 @@
|
||||
"""Ollama integration tests for reasoning chat models."""
|
||||
|
||||
import pytest
|
||||
from langchain_core.messages import AIMessageChunk, BaseMessageChunk, HumanMessage
|
||||
from langchain_core.messages import (
|
||||
AIMessage,
|
||||
AIMessageChunk,
|
||||
BaseMessageChunk,
|
||||
HumanMessage,
|
||||
)
|
||||
|
||||
from langchain_ollama import ChatOllama
|
||||
|
||||
@@ -224,3 +229,42 @@ def test_reasoning_modes_behavior(model: str) -> None:
|
||||
assert len(result_enabled.additional_kwargs["reasoning_content"]) > 0
|
||||
assert "<think>" not in result_enabled.additional_kwargs["reasoning_content"]
|
||||
assert "</think>" not in result_enabled.additional_kwargs["reasoning_content"]
|
||||
|
||||
|
||||
@pytest.mark.parametrize("model", [REASONING_MODEL_NAME])
|
||||
@pytest.mark.parametrize("use_async", [False, True])
|
||||
async def test_reasoning_content_round_trip(model: str, use_async: bool) -> None:
|
||||
"""Verify multi-turn conversation with reasoning_content round-trips without error.
|
||||
|
||||
Serialization correctness is covered by the unit test
|
||||
`test_reasoning_content_serialized_as_thinking`. This test verifies the
|
||||
end-to-end flow against a real Ollama instance.
|
||||
|
||||
Related: https://github.com/langchain-ai/langchain/issues/36177.
|
||||
"""
|
||||
llm = ChatOllama(model=model, num_ctx=2**12, reasoning=True)
|
||||
|
||||
# Turn 1: get a response with reasoning
|
||||
turn1_msg = HumanMessage(content=SAMPLE)
|
||||
if use_async:
|
||||
turn1_result = await llm.ainvoke([turn1_msg])
|
||||
else:
|
||||
turn1_result = llm.invoke([turn1_msg])
|
||||
|
||||
assert "reasoning_content" in turn1_result.additional_kwargs
|
||||
|
||||
# Turn 2: feed the AIMessage back alongside a follow-up question
|
||||
turn1_ai = AIMessage(
|
||||
content=str(turn1_result.content),
|
||||
additional_kwargs={
|
||||
"reasoning_content": turn1_result.additional_kwargs["reasoning_content"],
|
||||
},
|
||||
)
|
||||
turn2_messages = [turn1_msg, turn1_ai, HumanMessage(content="Now what is 4^4?")]
|
||||
if use_async:
|
||||
turn2_result = await llm.ainvoke(turn2_messages)
|
||||
else:
|
||||
turn2_result = llm.invoke(turn2_messages)
|
||||
|
||||
assert turn2_result.content
|
||||
assert "reasoning_content" in turn2_result.additional_kwargs
|
||||
|
||||
@@ -8,7 +8,7 @@ from unittest.mock import AsyncMock, MagicMock, patch
|
||||
|
||||
import pytest
|
||||
from langchain_core.exceptions import OutputParserException
|
||||
from langchain_core.messages import ChatMessage, HumanMessage
|
||||
from langchain_core.messages import AIMessage, BaseMessage, ChatMessage, HumanMessage
|
||||
from langchain_tests.unit_tests import ChatModelUnitTests
|
||||
|
||||
from langchain_ollama.chat_models import (
|
||||
@@ -482,7 +482,9 @@ def test_logprobs_params_passed_to_client() -> None:
|
||||
assert call_kwargs["top_logprobs"] == 3
|
||||
|
||||
# Case 3: auto-enabled logprobs propagates to client
|
||||
llm = ChatOllama(model=MODEL_NAME, top_logprobs=3)
|
||||
with warnings.catch_warnings():
|
||||
warnings.simplefilter("ignore", UserWarning)
|
||||
llm = ChatOllama(model=MODEL_NAME, top_logprobs=3)
|
||||
llm.invoke([HumanMessage("Hello")])
|
||||
|
||||
call_kwargs = mock_client.chat.call_args[1]
|
||||
@@ -810,3 +812,104 @@ def test_chat_ollama_ignores_strict_arg() -> None:
|
||||
# Check that 'strict' was NOT passed to the client
|
||||
call_kwargs = mock_client.chat.call_args[1]
|
||||
assert "strict" not in call_kwargs
|
||||
|
||||
|
||||
def test_reasoning_content_serialized_as_thinking() -> None:
|
||||
"""Test that `reasoning_content` in `AIMessage` is serialized as `'thinking'`.
|
||||
|
||||
When an AIMessage has `reasoning_content` in `additional_kwargs` (set during
|
||||
deserialization of Ollama thinking responses), it should be written back as
|
||||
the 'thinking' field in the outgoing Ollama message dict so the model can
|
||||
see its prior chain-of-thought in multi-turn conversations.
|
||||
|
||||
Reproduces https://github.com/langchain-ai/langchain/issues/36177.
|
||||
"""
|
||||
with patch("langchain_ollama.chat_models.Client"):
|
||||
llm = ChatOllama(model="deepseek-r1")
|
||||
|
||||
messages: list[BaseMessage] = [
|
||||
HumanMessage(content="Solve 2+2"),
|
||||
AIMessage(
|
||||
content="4",
|
||||
additional_kwargs={"reasoning_content": "2+2 equals 4"},
|
||||
),
|
||||
HumanMessage(content="Now solve 3+3"),
|
||||
]
|
||||
ollama_messages = llm._convert_messages_to_ollama_messages(messages)
|
||||
|
||||
assistant_msg = ollama_messages[1]
|
||||
assert assistant_msg["role"] == "assistant"
|
||||
assert assistant_msg.get("thinking") == "2+2 equals 4", (
|
||||
"reasoning_content should be serialized as 'thinking' in the Ollama message"
|
||||
)
|
||||
|
||||
|
||||
def test_convert_messages_does_not_mutate_input_list() -> None:
|
||||
"""Test that `_convert_messages_to_ollama_messages` does not mutate the input list.
|
||||
|
||||
Previously, the v1 content conversion replaced elements in the input list
|
||||
via `messages[idx] = ...`, which mutated the caller's list in-place.
|
||||
|
||||
Regression test for https://github.com/langchain-ai/langchain/issues/36564.
|
||||
"""
|
||||
with patch("langchain_ollama.chat_models.Client"):
|
||||
llm = ChatOllama(model="test-model")
|
||||
|
||||
v1_ai_message = AIMessage(
|
||||
content=[{"type": "text", "text": "Hello from v1"}],
|
||||
response_metadata={"output_version": "v1"},
|
||||
)
|
||||
messages: list = [
|
||||
HumanMessage(content="Hi"),
|
||||
v1_ai_message,
|
||||
]
|
||||
|
||||
# Keep a reference to the original second element
|
||||
original_message = messages[1]
|
||||
|
||||
llm._convert_messages_to_ollama_messages(messages)
|
||||
|
||||
assert messages[1] is original_message, (
|
||||
"_convert_messages_to_ollama_messages should not mutate the caller's list"
|
||||
)
|
||||
|
||||
|
||||
def test_reasoning_content_absent_no_thinking_key() -> None:
|
||||
"""AIMessage without `reasoning_content` should not produce a `thinking` key."""
|
||||
with patch("langchain_ollama.chat_models.Client"):
|
||||
llm = ChatOllama(model="test-model")
|
||||
|
||||
messages: list[BaseMessage] = [
|
||||
HumanMessage(content="Hi"),
|
||||
AIMessage(content="Hello"),
|
||||
]
|
||||
ollama_messages = llm._convert_messages_to_ollama_messages(messages)
|
||||
assert "thinking" not in ollama_messages[1]
|
||||
|
||||
|
||||
def test_reasoning_content_empty_string_preserved() -> None:
|
||||
"""An explicitly set empty-string `reasoning_content` should still round-trip."""
|
||||
with patch("langchain_ollama.chat_models.Client"):
|
||||
llm = ChatOllama(model="test-model")
|
||||
|
||||
messages: list[BaseMessage] = [
|
||||
HumanMessage(content="Hi"),
|
||||
AIMessage(content="Hello", additional_kwargs={"reasoning_content": ""}),
|
||||
]
|
||||
ollama_messages = llm._convert_messages_to_ollama_messages(messages)
|
||||
assert ollama_messages[1].get("thinking") == ""
|
||||
|
||||
|
||||
def test_non_ai_message_reasoning_content_ignored() -> None:
|
||||
"""Non-AIMessage types with `reasoning_content` should not produce `thinking`."""
|
||||
with patch("langchain_ollama.chat_models.Client"):
|
||||
llm = ChatOllama(model="test-model")
|
||||
|
||||
messages: list[BaseMessage] = [
|
||||
HumanMessage(
|
||||
content="Hi",
|
||||
additional_kwargs={"reasoning_content": "should be ignored"},
|
||||
),
|
||||
]
|
||||
ollama_messages = llm._convert_messages_to_ollama_messages(messages)
|
||||
assert "thinking" not in ollama_messages[0]
|
||||
|
||||
Reference in New Issue
Block a user