From fee695ce6d1b6be8fdce96d4bca4f978d9f35768 Mon Sep 17 00:00:00 2001
From: Mason Daugherty <github@mdrxy.com>
Date: Tue, 29 Jul 2025 14:27:39 -0400
Subject: [PATCH] tests updates before implementation

---
 .../chat_models/test_chat_models_reasoning.py | 128 +++++++++++++-----
 .../chat_models/test_chat_models_standard.py  |   2 +-
 2 files changed, 94 insertions(+), 36 deletions(-)
diff --git a/libs/partners/ollama/tests/integration_tests/chat_models/test_chat_models_reasoning.py b/libs/partners/ollama/tests/integration_tests/chat_models/test_chat_models_reasoning.py
index 19e2106e9ce..83914979dad 100644
--- a/libs/partners/ollama/tests/integration_tests/chat_models/test_chat_models_reasoning.py
+++ b/libs/partners/ollama/tests/integration_tests/chat_models/test_chat_models_reasoning.py
@@ -1,11 +1,7 @@
 """Ollama specific chat model integration tests for reasoning models."""
 
 import pytest
-from langchain_core.messages import (
-    AIMessageChunk,
-    BaseMessageChunk,
-    HumanMessage,
-)
+from langchain_core.messages import AIMessageChunk, BaseMessageChunk, HumanMessage
 from pydantic import BaseModel, Field
 
 from langchain_ollama import ChatOllama
@@ -23,7 +19,7 @@ class MathAnswer(BaseModel):
 @pytest.mark.parametrize(("model"), [("deepseek-r1:1.5b")])
 def test_stream_no_reasoning(model: str) -> None:
     """Test streaming with `reasoning=False`"""
-    llm = ChatOllama(model=model, num_ctx=2**12)
+    llm = ChatOllama(model=model, num_ctx=2**12, reasoning=False)
     messages = [
         {
             "role": "user",
@@ -41,14 +37,16 @@ def test_stream_no_reasoning(model: str) -> None:
     assert result.content
     assert "reasoning_content" not in result.additional_kwargs
     assert "<think>" not in result.content and "</think>" not in result.content
-    assert "<think>" not in result.additional_kwargs["reasoning_content"]
-    assert "</think>" not in result.additional_kwargs["reasoning_content"]
+    # Only check additional_kwargs for v0 format (content as string)
+    if not isinstance(result.content, list):
+        assert "<think>" not in result.additional_kwargs.get("reasoning_content", "")
+        assert "</think>" not in result.additional_kwargs.get("reasoning_content", "")
 
 
 @pytest.mark.parametrize(("model"), [("deepseek-r1:1.5b")])
 async def test_astream_no_reasoning(model: str) -> None:
     """Test async streaming with `reasoning=False`"""
-    llm = ChatOllama(model=model, num_ctx=2**12)
+    llm = ChatOllama(model=model, num_ctx=2**12, reasoning=False)
     messages = [
         {
             "role": "user",
@@ -66,8 +64,10 @@ async def test_astream_no_reasoning(model: str) -> None:
     assert result.content
     assert "reasoning_content" not in result.additional_kwargs
     assert "<think>" not in result.content and "</think>" not in result.content
-    assert "<think>" not in result.additional_kwargs["reasoning_content"]
-    assert "</think>" not in result.additional_kwargs["reasoning_content"]
+    # Only check additional_kwargs for v0 format (content as string)
+    if not isinstance(result.content, list):
+        assert "<think>" not in result.additional_kwargs.get("reasoning_content", "")
+        assert "</think>" not in result.additional_kwargs.get("reasoning_content", "")
 
 
 @pytest.mark.parametrize(("model"), [("deepseek-r1:1.5b")])
@@ -91,8 +91,10 @@ def test_stream_reasoning_none(model: str) -> None:
     assert result.content
     assert "reasoning_content" not in result.additional_kwargs
     assert "<think>" in result.content and "</think>" in result.content
-    assert "<think>" not in result.additional_kwargs["reasoning_content"]
-    assert "</think>" not in result.additional_kwargs["reasoning_content"]
+    # Only check additional_kwargs for v0 format (content as string)
+    if not isinstance(result.content, list):
+        assert "<think>" not in result.additional_kwargs.get("reasoning_content", "")
+        assert "</think>" not in result.additional_kwargs.get("reasoning_content", "")
 
 
 @pytest.mark.parametrize(("model"), [("deepseek-r1:1.5b")])
@@ -116,8 +118,10 @@ async def test_astream_reasoning_none(model: str) -> None:
     assert result.content
     assert "reasoning_content" not in result.additional_kwargs
     assert "<think>" in result.content and "</think>" in result.content
-    assert "<think>" not in result.additional_kwargs["reasoning_content"]
-    assert "</think>" not in result.additional_kwargs["reasoning_content"]
+    # Only check additional_kwargs for v0 format (content as string)
+    if not isinstance(result.content, list):
+        assert "<think>" not in result.additional_kwargs.get("reasoning_content", "")
+        assert "</think>" not in result.additional_kwargs.get("reasoning_content", "")
 
 
 @pytest.mark.parametrize(("model"), [("deepseek-r1:1.5b")])
@@ -142,8 +146,10 @@ def test_reasoning_stream(model: str) -> None:
     assert "reasoning_content" in result.additional_kwargs
     assert len(result.additional_kwargs["reasoning_content"]) > 0
     assert "<think>" not in result.content and "</think>" not in result.content
-    assert "<think>" not in result.additional_kwargs["reasoning_content"]
-    assert "</think>" not in result.additional_kwargs["reasoning_content"]
+    # Only check additional_kwargs for v0 format (content as string)
+    if not isinstance(result.content, list):
+        assert "<think>" not in result.additional_kwargs["reasoning_content"]
+        assert "</think>" not in result.additional_kwargs["reasoning_content"]
 
 
 @pytest.mark.parametrize(("model"), [("deepseek-r1:1.5b")])
@@ -168,34 +174,38 @@ async def test_reasoning_astream(model: str) -> None:
     assert "reasoning_content" in result.additional_kwargs
     assert len(result.additional_kwargs["reasoning_content"]) > 0
     assert "<think>" not in result.content and "</think>" not in result.content
-    assert "<think>" not in result.additional_kwargs["reasoning_content"]
-    assert "</think>" not in result.additional_kwargs["reasoning_content"]
+    # Only check additional_kwargs for v0 format (content as string)
+    if not isinstance(result.content, list):
+        assert "<think>" not in result.additional_kwargs["reasoning_content"]
+        assert "</think>" not in result.additional_kwargs["reasoning_content"]
 
 
 @pytest.mark.parametrize(("model"), [("deepseek-r1:1.5b")])
 def test_invoke_no_reasoning(model: str) -> None:
     """Test using invoke with `reasoning=False`"""
-    llm = ChatOllama(model=model, num_ctx=2**12)
+    llm = ChatOllama(model=model, num_ctx=2**12, reasoning=False)
     message = HumanMessage(content=SAMPLE)
     result = llm.invoke([message])
     assert result.content
-    assert "reasoning_content" not in result.additional_kwargs
     assert "<think>" not in result.content and "</think>" not in result.content
-    assert "<think>" not in result.additional_kwargs["reasoning_content"]
-    assert "</think>" not in result.additional_kwargs["reasoning_content"]
+    # Only check additional_kwargs for v0 format (content as string)
+    if not isinstance(result.content, list):
+        assert "<think>" not in result.additional_kwargs.get("reasoning_content", "")
+        assert "</think>" not in result.additional_kwargs.get("reasoning_content", "")
 
 
 @pytest.mark.parametrize(("model"), [("deepseek-r1:1.5b")])
 async def test_ainvoke_no_reasoning(model: str) -> None:
     """Test using async invoke with `reasoning=False`"""
-    llm = ChatOllama(model=model, num_ctx=2**12)
+    llm = ChatOllama(model=model, num_ctx=2**12, reasoning=False)
     message = HumanMessage(content=SAMPLE)
     result = await llm.ainvoke([message])
     assert result.content
-    assert "reasoning_content" not in result.additional_kwargs
     assert "<think>" not in result.content and "</think>" not in result.content
-    assert "<think>" not in result.additional_kwargs["reasoning_content"]
-    assert "</think>" not in result.additional_kwargs["reasoning_content"]
+    # Only check additional_kwargs for v0 format (content as string)
+    if not isinstance(result.content, list):
+        assert "<think>" not in result.additional_kwargs.get("reasoning_content", "")
+        assert "</think>" not in result.additional_kwargs.get("reasoning_content", "")
 
 
 @pytest.mark.parametrize(("model"), [("deepseek-r1:1.5b")])
@@ -207,8 +217,10 @@ def test_invoke_reasoning_none(model: str) -> None:
     assert result.content
     assert "reasoning_content" not in result.additional_kwargs
     assert "<think>" in result.content and "</think>" in result.content
-    assert "<think>" not in result.additional_kwargs["reasoning_content"]
-    assert "</think>" not in result.additional_kwargs["reasoning_content"]
+    # Only check additional_kwargs for v0 format (content as string)
+    if not isinstance(result.content, list):
+        assert "<think>" not in result.additional_kwargs.get("reasoning_content", "")
+        assert "</think>" not in result.additional_kwargs.get("reasoning_content", "")
 
 
 @pytest.mark.parametrize(("model"), [("deepseek-r1:1.5b")])
@@ -220,8 +232,10 @@ async def test_ainvoke_reasoning_none(model: str) -> None:
     assert result.content
     assert "reasoning_content" not in result.additional_kwargs
     assert "<think>" in result.content and "</think>" in result.content
-    assert "<think>" not in result.additional_kwargs["reasoning_content"]
-    assert "</think>" not in result.additional_kwargs["reasoning_content"]
+    # Only check additional_kwargs for v0 format (content as string)
+    if not isinstance(result.content, list):
+        assert "<think>" not in result.additional_kwargs.get("reasoning_content", "")
+        assert "</think>" not in result.additional_kwargs.get("reasoning_content", "")
 
 
 @pytest.mark.parametrize(("model"), [("deepseek-r1:1.5b")])
@@ -234,8 +248,10 @@ def test_reasoning_invoke(model: str) -> None:
     assert "reasoning_content" in result.additional_kwargs
     assert len(result.additional_kwargs["reasoning_content"]) > 0
     assert "<think>" not in result.content and "</think>" not in result.content
-    assert "<think>" not in result.additional_kwargs["reasoning_content"]
-    assert "</think>" not in result.additional_kwargs["reasoning_content"]
+    # Only check additional_kwargs for v0 format (content as string)
+    if not isinstance(result.content, list):
+        assert "<think>" not in result.additional_kwargs["reasoning_content"]
+        assert "</think>" not in result.additional_kwargs["reasoning_content"]
 
 
 @pytest.mark.parametrize(("model"), [("deepseek-r1:1.5b")])
@@ -248,5 +264,47 @@ async def test_reasoning_ainvoke(model: str) -> None:
     assert "reasoning_content" in result.additional_kwargs
     assert len(result.additional_kwargs["reasoning_content"]) > 0
     assert "<think>" not in result.content and "</think>" not in result.content
-    assert "<think>" not in result.additional_kwargs["reasoning_content"]
-    assert "</think>" not in result.additional_kwargs["reasoning_content"]
+    # Only check additional_kwargs for v0 format (content as string)
+    if not isinstance(result.content, list):
+        assert "<think>" not in result.additional_kwargs["reasoning_content"]
+        assert "</think>" not in result.additional_kwargs["reasoning_content"]
+
+
+@pytest.mark.parametrize(("model"), [("deepseek-r1:1.5b")])
+def test_think_tag_stripping_necessity(model: str) -> None:
+    """Test that demonstrates why ``_strip_think_tags`` is necessary.
+
+    DeepSeek R1 models include reasoning/thinking as their default behavior.
+    When ``reasoning=False`` is set, the user explicitly wants no reasoning content,
+    but Ollama cannot disable thinking at the API level for these models.
+    Therefore, post-processing is required to strip the ``<think>`` tags.
+
+    This test documents the specific behavior that necessitates the
+    ``_strip_think_tags`` function in the chat_models.py implementation.
+    """
+    # Test with reasoning=None (default behavior - should include think tags)
+    llm_default = ChatOllama(model=model, reasoning=None, num_ctx=2**12)
+    message = HumanMessage(content=SAMPLE)
+
+    result_default = llm_default.invoke([message])
+
+    # With reasoning=None, the model's default behavior includes <think> tags
+    # This demonstrates why we need the stripping logic
+    assert "<think>" in result_default.content
+    assert "</think>" in result_default.content
+    assert "reasoning_content" not in result_default.additional_kwargs
+
+    # Test with reasoning=False (explicit disable - should NOT include think tags)
+    llm_disabled = ChatOllama(model=model, reasoning=False, num_ctx=2**12)
+
+    result_disabled = llm_disabled.invoke([message])
+
+    # With reasoning=False, think tags should be stripped from content
+    # This verifies that _strip_think_tags is working correctly
+    assert "<think>" not in result_disabled.content
+    assert "</think>" not in result_disabled.content
+    assert "reasoning_content" not in result_disabled.additional_kwargs
+
+    # Verify the difference: same model, different reasoning settings
+    # Default includes tags, disabled strips them
+    assert result_default.content != result_disabled.content
diff --git a/libs/partners/ollama/tests/integration_tests/chat_models/test_chat_models_standard.py b/libs/partners/ollama/tests/integration_tests/chat_models/test_chat_models_standard.py
index d596011bfb4..c1397e1143e 100644
--- a/libs/partners/ollama/tests/integration_tests/chat_models/test_chat_models_standard.py
+++ b/libs/partners/ollama/tests/integration_tests/chat_models/test_chat_models_standard.py
@@ -168,7 +168,7 @@ class TestChatOllama(ChatModelIntegrationTests):
 
         with pytest.raises(ValidationError) as excinfo:
             ChatOllama(model="any-model", validate_model_on_init=True)
-        assert "not found in Ollama" in str(excinfo.value)
+        assert "Failed to connect to Ollama" in str(excinfo.value)
 
     @patch("langchain_ollama.chat_models.Client.list")
     def test_init_response_error(self, mock_list: MagicMock) -> None: