From fee695ce6d1b6be8fdce96d4bca4f978d9f35768 Mon Sep 17 00:00:00 2001 From: Mason Daugherty Date: Tue, 29 Jul 2025 14:27:39 -0400 Subject: [PATCH] tests updates before implementation --- .../chat_models/test_chat_models_reasoning.py | 128 +++++++++++++----- .../chat_models/test_chat_models_standard.py | 2 +- 2 files changed, 94 insertions(+), 36 deletions(-) diff --git a/libs/partners/ollama/tests/integration_tests/chat_models/test_chat_models_reasoning.py b/libs/partners/ollama/tests/integration_tests/chat_models/test_chat_models_reasoning.py index 19e2106e9ce..83914979dad 100644 --- a/libs/partners/ollama/tests/integration_tests/chat_models/test_chat_models_reasoning.py +++ b/libs/partners/ollama/tests/integration_tests/chat_models/test_chat_models_reasoning.py @@ -1,11 +1,7 @@ """Ollama specific chat model integration tests for reasoning models.""" import pytest -from langchain_core.messages import ( - AIMessageChunk, - BaseMessageChunk, - HumanMessage, -) +from langchain_core.messages import AIMessageChunk, BaseMessageChunk, HumanMessage from pydantic import BaseModel, Field from langchain_ollama import ChatOllama @@ -23,7 +19,7 @@ class MathAnswer(BaseModel): @pytest.mark.parametrize(("model"), [("deepseek-r1:1.5b")]) def test_stream_no_reasoning(model: str) -> None: """Test streaming with `reasoning=False`""" - llm = ChatOllama(model=model, num_ctx=2**12) + llm = ChatOllama(model=model, num_ctx=2**12, reasoning=False) messages = [ { "role": "user", @@ -41,14 +37,16 @@ def test_stream_no_reasoning(model: str) -> None: assert result.content assert "reasoning_content" not in result.additional_kwargs assert "" not in result.content and "" not in result.content - assert "" not in result.additional_kwargs["reasoning_content"] - assert "" not in result.additional_kwargs["reasoning_content"] + # Only check additional_kwargs for v0 format (content as string) + if not isinstance(result.content, list): + assert "" not in result.additional_kwargs.get("reasoning_content", "") + assert "" not in result.additional_kwargs.get("reasoning_content", "") @pytest.mark.parametrize(("model"), [("deepseek-r1:1.5b")]) async def test_astream_no_reasoning(model: str) -> None: """Test async streaming with `reasoning=False`""" - llm = ChatOllama(model=model, num_ctx=2**12) + llm = ChatOllama(model=model, num_ctx=2**12, reasoning=False) messages = [ { "role": "user", @@ -66,8 +64,10 @@ async def test_astream_no_reasoning(model: str) -> None: assert result.content assert "reasoning_content" not in result.additional_kwargs assert "" not in result.content and "" not in result.content - assert "" not in result.additional_kwargs["reasoning_content"] - assert "" not in result.additional_kwargs["reasoning_content"] + # Only check additional_kwargs for v0 format (content as string) + if not isinstance(result.content, list): + assert "" not in result.additional_kwargs.get("reasoning_content", "") + assert "" not in result.additional_kwargs.get("reasoning_content", "") @pytest.mark.parametrize(("model"), [("deepseek-r1:1.5b")]) @@ -91,8 +91,10 @@ def test_stream_reasoning_none(model: str) -> None: assert result.content assert "reasoning_content" not in result.additional_kwargs assert "" in result.content and "" in result.content - assert "" not in result.additional_kwargs["reasoning_content"] - assert "" not in result.additional_kwargs["reasoning_content"] + # Only check additional_kwargs for v0 format (content as string) + if not isinstance(result.content, list): + assert "" not in result.additional_kwargs.get("reasoning_content", "") + assert "" not in result.additional_kwargs.get("reasoning_content", "") @pytest.mark.parametrize(("model"), [("deepseek-r1:1.5b")]) @@ -116,8 +118,10 @@ async def test_astream_reasoning_none(model: str) -> None: assert result.content assert "reasoning_content" not in result.additional_kwargs assert "" in result.content and "" in result.content - assert "" not in result.additional_kwargs["reasoning_content"] - assert "" not in result.additional_kwargs["reasoning_content"] + # Only check additional_kwargs for v0 format (content as string) + if not isinstance(result.content, list): + assert "" not in result.additional_kwargs.get("reasoning_content", "") + assert "" not in result.additional_kwargs.get("reasoning_content", "") @pytest.mark.parametrize(("model"), [("deepseek-r1:1.5b")]) @@ -142,8 +146,10 @@ def test_reasoning_stream(model: str) -> None: assert "reasoning_content" in result.additional_kwargs assert len(result.additional_kwargs["reasoning_content"]) > 0 assert "" not in result.content and "" not in result.content - assert "" not in result.additional_kwargs["reasoning_content"] - assert "" not in result.additional_kwargs["reasoning_content"] + # Only check additional_kwargs for v0 format (content as string) + if not isinstance(result.content, list): + assert "" not in result.additional_kwargs["reasoning_content"] + assert "" not in result.additional_kwargs["reasoning_content"] @pytest.mark.parametrize(("model"), [("deepseek-r1:1.5b")]) @@ -168,34 +174,38 @@ async def test_reasoning_astream(model: str) -> None: assert "reasoning_content" in result.additional_kwargs assert len(result.additional_kwargs["reasoning_content"]) > 0 assert "" not in result.content and "" not in result.content - assert "" not in result.additional_kwargs["reasoning_content"] - assert "" not in result.additional_kwargs["reasoning_content"] + # Only check additional_kwargs for v0 format (content as string) + if not isinstance(result.content, list): + assert "" not in result.additional_kwargs["reasoning_content"] + assert "" not in result.additional_kwargs["reasoning_content"] @pytest.mark.parametrize(("model"), [("deepseek-r1:1.5b")]) def test_invoke_no_reasoning(model: str) -> None: """Test using invoke with `reasoning=False`""" - llm = ChatOllama(model=model, num_ctx=2**12) + llm = ChatOllama(model=model, num_ctx=2**12, reasoning=False) message = HumanMessage(content=SAMPLE) result = llm.invoke([message]) assert result.content - assert "reasoning_content" not in result.additional_kwargs assert "" not in result.content and "" not in result.content - assert "" not in result.additional_kwargs["reasoning_content"] - assert "" not in result.additional_kwargs["reasoning_content"] + # Only check additional_kwargs for v0 format (content as string) + if not isinstance(result.content, list): + assert "" not in result.additional_kwargs.get("reasoning_content", "") + assert "" not in result.additional_kwargs.get("reasoning_content", "") @pytest.mark.parametrize(("model"), [("deepseek-r1:1.5b")]) async def test_ainvoke_no_reasoning(model: str) -> None: """Test using async invoke with `reasoning=False`""" - llm = ChatOllama(model=model, num_ctx=2**12) + llm = ChatOllama(model=model, num_ctx=2**12, reasoning=False) message = HumanMessage(content=SAMPLE) result = await llm.ainvoke([message]) assert result.content - assert "reasoning_content" not in result.additional_kwargs assert "" not in result.content and "" not in result.content - assert "" not in result.additional_kwargs["reasoning_content"] - assert "" not in result.additional_kwargs["reasoning_content"] + # Only check additional_kwargs for v0 format (content as string) + if not isinstance(result.content, list): + assert "" not in result.additional_kwargs.get("reasoning_content", "") + assert "" not in result.additional_kwargs.get("reasoning_content", "") @pytest.mark.parametrize(("model"), [("deepseek-r1:1.5b")]) @@ -207,8 +217,10 @@ def test_invoke_reasoning_none(model: str) -> None: assert result.content assert "reasoning_content" not in result.additional_kwargs assert "" in result.content and "" in result.content - assert "" not in result.additional_kwargs["reasoning_content"] - assert "" not in result.additional_kwargs["reasoning_content"] + # Only check additional_kwargs for v0 format (content as string) + if not isinstance(result.content, list): + assert "" not in result.additional_kwargs.get("reasoning_content", "") + assert "" not in result.additional_kwargs.get("reasoning_content", "") @pytest.mark.parametrize(("model"), [("deepseek-r1:1.5b")]) @@ -220,8 +232,10 @@ async def test_ainvoke_reasoning_none(model: str) -> None: assert result.content assert "reasoning_content" not in result.additional_kwargs assert "" in result.content and "" in result.content - assert "" not in result.additional_kwargs["reasoning_content"] - assert "" not in result.additional_kwargs["reasoning_content"] + # Only check additional_kwargs for v0 format (content as string) + if not isinstance(result.content, list): + assert "" not in result.additional_kwargs.get("reasoning_content", "") + assert "" not in result.additional_kwargs.get("reasoning_content", "") @pytest.mark.parametrize(("model"), [("deepseek-r1:1.5b")]) @@ -234,8 +248,10 @@ def test_reasoning_invoke(model: str) -> None: assert "reasoning_content" in result.additional_kwargs assert len(result.additional_kwargs["reasoning_content"]) > 0 assert "" not in result.content and "" not in result.content - assert "" not in result.additional_kwargs["reasoning_content"] - assert "" not in result.additional_kwargs["reasoning_content"] + # Only check additional_kwargs for v0 format (content as string) + if not isinstance(result.content, list): + assert "" not in result.additional_kwargs["reasoning_content"] + assert "" not in result.additional_kwargs["reasoning_content"] @pytest.mark.parametrize(("model"), [("deepseek-r1:1.5b")]) @@ -248,5 +264,47 @@ async def test_reasoning_ainvoke(model: str) -> None: assert "reasoning_content" in result.additional_kwargs assert len(result.additional_kwargs["reasoning_content"]) > 0 assert "" not in result.content and "" not in result.content - assert "" not in result.additional_kwargs["reasoning_content"] - assert "" not in result.additional_kwargs["reasoning_content"] + # Only check additional_kwargs for v0 format (content as string) + if not isinstance(result.content, list): + assert "" not in result.additional_kwargs["reasoning_content"] + assert "" not in result.additional_kwargs["reasoning_content"] + + +@pytest.mark.parametrize(("model"), [("deepseek-r1:1.5b")]) +def test_think_tag_stripping_necessity(model: str) -> None: + """Test that demonstrates why ``_strip_think_tags`` is necessary. + + DeepSeek R1 models include reasoning/thinking as their default behavior. + When ``reasoning=False`` is set, the user explicitly wants no reasoning content, + but Ollama cannot disable thinking at the API level for these models. + Therefore, post-processing is required to strip the ```` tags. + + This test documents the specific behavior that necessitates the + ``_strip_think_tags`` function in the chat_models.py implementation. + """ + # Test with reasoning=None (default behavior - should include think tags) + llm_default = ChatOllama(model=model, reasoning=None, num_ctx=2**12) + message = HumanMessage(content=SAMPLE) + + result_default = llm_default.invoke([message]) + + # With reasoning=None, the model's default behavior includes tags + # This demonstrates why we need the stripping logic + assert "" in result_default.content + assert "" in result_default.content + assert "reasoning_content" not in result_default.additional_kwargs + + # Test with reasoning=False (explicit disable - should NOT include think tags) + llm_disabled = ChatOllama(model=model, reasoning=False, num_ctx=2**12) + + result_disabled = llm_disabled.invoke([message]) + + # With reasoning=False, think tags should be stripped from content + # This verifies that _strip_think_tags is working correctly + assert "" not in result_disabled.content + assert "" not in result_disabled.content + assert "reasoning_content" not in result_disabled.additional_kwargs + + # Verify the difference: same model, different reasoning settings + # Default includes tags, disabled strips them + assert result_default.content != result_disabled.content diff --git a/libs/partners/ollama/tests/integration_tests/chat_models/test_chat_models_standard.py b/libs/partners/ollama/tests/integration_tests/chat_models/test_chat_models_standard.py index d596011bfb4..c1397e1143e 100644 --- a/libs/partners/ollama/tests/integration_tests/chat_models/test_chat_models_standard.py +++ b/libs/partners/ollama/tests/integration_tests/chat_models/test_chat_models_standard.py @@ -168,7 +168,7 @@ class TestChatOllama(ChatModelIntegrationTests): with pytest.raises(ValidationError) as excinfo: ChatOllama(model="any-model", validate_model_on_init=True) - assert "not found in Ollama" in str(excinfo.value) + assert "Failed to connect to Ollama" in str(excinfo.value) @patch("langchain_ollama.chat_models.Client.list") def test_init_response_error(self, mock_list: MagicMock) -> None: