From 4d261089c6de5224cd1c33d9c358475bace1a3eb Mon Sep 17 00:00:00 2001 From: Mason Daugherty Date: Wed, 6 Aug 2025 13:11:46 -0400 Subject: [PATCH] more tests refactors --- libs/partners/ollama/README.md | 8 + .../chat_models/test_chat_models_reasoning.py | 78 ++-- .../tests/integration_tests/test_llms.py | 9 +- .../test_chat_models_standard_v1.py | 9 +- .../v1/chat_models/test_chat_models_v1.py | 368 ++++++++++++++++-- .../integration_tests/chat_models_v1.py | 12 +- 6 files changed, 381 insertions(+), 103 deletions(-) diff --git a/libs/partners/ollama/README.md b/libs/partners/ollama/README.md index 937ef85b1c1..e0feb34d8ac 100644 --- a/libs/partners/ollama/README.md +++ b/libs/partners/ollama/README.md @@ -32,6 +32,14 @@ llm = ChatOllama(model="llama3.1") llm.invoke("Sing a ballad of LangChain.") ``` +### v1 Chat Models + +For v1 chat models, you can use the `ChatOllama` class with the `v1` namespace. + +```python +from langchain_ollama.v1.chat_models import ChatOllama +``` + ## [Embeddings](https://python.langchain.com/api_reference/ollama/embeddings/langchain_ollama.embeddings.OllamaEmbeddings.html#ollamaembeddings) `OllamaEmbeddings` class exposes embeddings from Ollama. diff --git a/libs/partners/ollama/tests/integration_tests/chat_models/test_chat_models_reasoning.py b/libs/partners/ollama/tests/integration_tests/chat_models/test_chat_models_reasoning.py index 84ad466a32c..82198050b40 100644 --- a/libs/partners/ollama/tests/integration_tests/chat_models/test_chat_models_reasoning.py +++ b/libs/partners/ollama/tests/integration_tests/chat_models/test_chat_models_reasoning.py @@ -1,21 +1,13 @@ -"""Ollama specific chat model integration tests for reasoning models.""" +"""Ollama integration tests for reasoning chat models.""" import pytest from langchain_core.messages import AIMessageChunk, BaseMessageChunk, HumanMessage -from pydantic import BaseModel, Field from langchain_ollama import ChatOllama SAMPLE = "What is 3^3?" -class MathAnswer(BaseModel): - """A mathematical expression and its numerical answer.""" - - expression: str = Field(description="The mathematical expression to evaluate.") - answer: int = Field(description="The numerical answer to the expression.") - - @pytest.mark.parametrize(("model"), [("deepseek-r1:1.5b")]) def test_stream_no_reasoning(model: str) -> None: """Test streaming with `reasoning=False`""" @@ -36,9 +28,7 @@ def test_stream_no_reasoning(model: str) -> None: assert isinstance(result, AIMessageChunk) assert result.content assert "" not in result.content and "" not in result.content - if hasattr(result, "additional_kwargs"): - # v0 format - assert "reasoning_content" not in result.additional_kwargs + assert "reasoning_content" not in result.additional_kwargs @pytest.mark.parametrize(("model"), [("deepseek-r1:1.5b")]) @@ -61,9 +51,7 @@ async def test_astream_no_reasoning(model: str) -> None: assert isinstance(result, AIMessageChunk) assert result.content assert "" not in result.content and "" not in result.content - if hasattr(result, "additional_kwargs"): - # v0 format - assert "reasoning_content" not in result.additional_kwargs + assert "reasoning_content" not in result.additional_kwargs @pytest.mark.parametrize(("model"), [("deepseek-r1:1.5b")]) @@ -85,12 +73,10 @@ def test_stream_reasoning_none(model: str) -> None: result += chunk assert isinstance(result, AIMessageChunk) assert result.content - assert "reasoning_content" not in result.additional_kwargs assert "" in result.content and "" in result.content - if not isinstance(result.content, list): - # v0 format (content as string) - assert "" not in result.additional_kwargs.get("reasoning_content", "") - assert "" not in result.additional_kwargs.get("reasoning_content", "") + assert "reasoning_content" not in result.additional_kwargs + assert "" not in result.additional_kwargs.get("reasoning_content", "") + assert "" not in result.additional_kwargs.get("reasoning_content", "") @pytest.mark.parametrize(("model"), [("deepseek-r1:1.5b")]) @@ -112,12 +98,10 @@ async def test_astream_reasoning_none(model: str) -> None: result += chunk assert isinstance(result, AIMessageChunk) assert result.content - assert "reasoning_content" not in result.additional_kwargs assert "" in result.content and "" in result.content - if not isinstance(result.content, list): - # v0 format (content as string) - assert "" not in result.additional_kwargs.get("reasoning_content", "") - assert "" not in result.additional_kwargs.get("reasoning_content", "") + assert "reasoning_content" not in result.additional_kwargs + assert "" not in result.additional_kwargs.get("reasoning_content", "") + assert "" not in result.additional_kwargs.get("reasoning_content", "") @pytest.mark.parametrize(("model"), [("deepseek-r1:1.5b")]) @@ -142,10 +126,8 @@ def test_reasoning_stream(model: str) -> None: assert "reasoning_content" in result.additional_kwargs assert len(result.additional_kwargs["reasoning_content"]) > 0 assert "" not in result.content and "" not in result.content - if not isinstance(result.content, list): - # v0 format (content as string) - assert "" not in result.additional_kwargs["reasoning_content"] - assert "" not in result.additional_kwargs["reasoning_content"] + assert "" not in result.additional_kwargs["reasoning_content"] + assert "" not in result.additional_kwargs["reasoning_content"] @pytest.mark.parametrize(("model"), [("deepseek-r1:1.5b")]) @@ -170,10 +152,8 @@ async def test_reasoning_astream(model: str) -> None: assert "reasoning_content" in result.additional_kwargs assert len(result.additional_kwargs["reasoning_content"]) > 0 assert "" not in result.content and "" not in result.content - if not isinstance(result.content, list): - # v0 format (content as string) - assert "" not in result.additional_kwargs["reasoning_content"] - assert "" not in result.additional_kwargs["reasoning_content"] + assert "" not in result.additional_kwargs["reasoning_content"] + assert "" not in result.additional_kwargs["reasoning_content"] @pytest.mark.parametrize(("model"), [("deepseek-r1:1.5b")]) @@ -183,10 +163,8 @@ def test_invoke_no_reasoning(model: str) -> None: message = HumanMessage(content=SAMPLE) result = llm.invoke([message]) assert result.content + assert "reasoning_content" not in result.additional_kwargs assert "" not in result.content and "" not in result.content - if hasattr(result, "additional_kwargs"): - # v0 format - assert "reasoning_content" not in result.additional_kwargs @pytest.mark.parametrize(("model"), [("deepseek-r1:1.5b")]) @@ -196,10 +174,8 @@ async def test_ainvoke_no_reasoning(model: str) -> None: message = HumanMessage(content=SAMPLE) result = await llm.ainvoke([message]) assert result.content + assert "reasoning_content" not in result.additional_kwargs assert "" not in result.content and "" not in result.content - if hasattr(result, "additional_kwargs"): - # v0 format - assert "reasoning_content" not in result.additional_kwargs @pytest.mark.parametrize(("model"), [("deepseek-r1:1.5b")]) @@ -211,10 +187,8 @@ def test_invoke_reasoning_none(model: str) -> None: assert result.content assert "reasoning_content" not in result.additional_kwargs assert "" in result.content and "" in result.content - if not isinstance(result.content, list): - # v0 format (content as string) - assert "" not in result.additional_kwargs.get("reasoning_content", "") - assert "" not in result.additional_kwargs.get("reasoning_content", "") + assert "" not in result.additional_kwargs.get("reasoning_content", "") + assert "" not in result.additional_kwargs.get("reasoning_content", "") @pytest.mark.parametrize(("model"), [("deepseek-r1:1.5b")]) @@ -226,10 +200,8 @@ async def test_ainvoke_reasoning_none(model: str) -> None: assert result.content assert "reasoning_content" not in result.additional_kwargs assert "" in result.content and "" in result.content - if not isinstance(result.content, list): - # v0 format (content as string) - assert "" not in result.additional_kwargs.get("reasoning_content", "") - assert "" not in result.additional_kwargs.get("reasoning_content", "") + assert "" not in result.additional_kwargs.get("reasoning_content", "") + assert "" not in result.additional_kwargs.get("reasoning_content", "") @pytest.mark.parametrize(("model"), [("deepseek-r1:1.5b")]) @@ -242,10 +214,8 @@ def test_reasoning_invoke(model: str) -> None: assert "reasoning_content" in result.additional_kwargs assert len(result.additional_kwargs["reasoning_content"]) > 0 assert "" not in result.content and "" not in result.content - if not isinstance(result.content, list): - # v0 format (content as string) - assert "" not in result.additional_kwargs["reasoning_content"] - assert "" not in result.additional_kwargs["reasoning_content"] + assert "" not in result.additional_kwargs["reasoning_content"] + assert "" not in result.additional_kwargs["reasoning_content"] @pytest.mark.parametrize(("model"), [("deepseek-r1:1.5b")]) @@ -258,10 +228,8 @@ async def test_reasoning_ainvoke(model: str) -> None: assert "reasoning_content" in result.additional_kwargs assert len(result.additional_kwargs["reasoning_content"]) > 0 assert "" not in result.content and "" not in result.content - # Only check additional_kwargs for v0 format (content as string) - if not isinstance(result.content, list): - assert "" not in result.additional_kwargs["reasoning_content"] - assert "" not in result.additional_kwargs["reasoning_content"] + assert "" not in result.additional_kwargs["reasoning_content"] + assert "" not in result.additional_kwargs["reasoning_content"] @pytest.mark.parametrize(("model"), [("deepseek-r1:1.5b")]) diff --git a/libs/partners/ollama/tests/integration_tests/test_llms.py b/libs/partners/ollama/tests/integration_tests/test_llms.py index 129fb2cff5d..9f2209d4d95 100644 --- a/libs/partners/ollama/tests/integration_tests/test_llms.py +++ b/libs/partners/ollama/tests/integration_tests/test_llms.py @@ -7,6 +7,7 @@ from langchain_core.runnables import RunnableConfig from langchain_ollama.llms import OllamaLLM MODEL_NAME = "llama3.1" +REASONING_MODEL_NAME = "deepseek-r1:1.5b" SAMPLE = "What is 3^3?" @@ -18,7 +19,7 @@ def test_stream_text_tokens() -> None: assert isinstance(token, str) -@pytest.mark.parametrize(("model"), [("deepseek-r1:1.5b")]) +@pytest.mark.parametrize(("model"), [(REASONING_MODEL_NAME)]) def test__stream_no_reasoning(model: str) -> None: """Test low-level chunk streaming of a simple prompt with `reasoning=False`.""" llm = OllamaLLM(model=model, num_ctx=2**12) @@ -39,7 +40,7 @@ def test__stream_no_reasoning(model: str) -> None: assert "reasoning_content" not in result_chunk.generation_info # type: ignore[operator] -@pytest.mark.parametrize(("model"), [("deepseek-r1:1.5b")]) +@pytest.mark.parametrize(("model"), [(REASONING_MODEL_NAME)]) def test__stream_with_reasoning(model: str) -> None: """Test low-level chunk streaming with `reasoning=True`.""" llm = OllamaLLM(model=model, num_ctx=2**12, reasoning=True) @@ -71,7 +72,7 @@ async def test_astream_text_tokens() -> None: assert isinstance(token, str) -@pytest.mark.parametrize(("model"), [("deepseek-r1:1.5b")]) +@pytest.mark.parametrize(("model"), [(REASONING_MODEL_NAME)]) async def test__astream_no_reasoning(model: str) -> None: """Test low-level async chunk streaming with `reasoning=False`.""" llm = OllamaLLM(model=model, num_ctx=2**12) @@ -89,7 +90,7 @@ async def test__astream_no_reasoning(model: str) -> None: assert "reasoning_content" not in result_chunk.generation_info # type: ignore[operator] -@pytest.mark.parametrize(("model"), [("deepseek-r1:1.5b")]) +@pytest.mark.parametrize(("model"), [(REASONING_MODEL_NAME)]) async def test__astream_with_reasoning(model: str) -> None: """Test low-level async chunk streaming with `reasoning=True`.""" llm = OllamaLLM(model=model, num_ctx=2**12, reasoning=True) diff --git a/libs/partners/ollama/tests/integration_tests/v1/chat_models/test_chat_models_standard_v1.py b/libs/partners/ollama/tests/integration_tests/v1/chat_models/test_chat_models_standard_v1.py index 656e79579a9..74e30041f8c 100644 --- a/libs/partners/ollama/tests/integration_tests/v1/chat_models/test_chat_models_standard_v1.py +++ b/libs/partners/ollama/tests/integration_tests/v1/chat_models/test_chat_models_standard_v1.py @@ -15,6 +15,7 @@ from pydantic import ValidationError from langchain_ollama.v1.chat_models import ChatOllama DEFAULT_MODEL_NAME = "llama3.1" +REASONING_MODEL_NAME = "deepseek-r1:1.5b" @tool @@ -219,8 +220,8 @@ class TestChatOllamaV1(ChatModelV1IntegrationTests): @pytest.mark.xfail( reason=( - "Default llama3.1 model does not support reasoning. Override uses " - "reasoning-capable model with reasoning=True enabled." + f"{DEFAULT_MODEL_NAME} does not support reasoning. Override uses " + "reasoning-capable model with `reasoning=True` enabled." ), strict=False, ) @@ -234,7 +235,7 @@ class TestChatOllamaV1(ChatModelV1IntegrationTests): pytest.skip("Model does not support ReasoningContentBlock.") reasoning_enabled_model = ChatOllama( - model="deepseek-r1:1.5b", reasoning=True, validate_model_on_init=True + model=REASONING_MODEL_NAME, reasoning=True, validate_model_on_init=True ) message = HumanMessage("Think step by step: What is 2 + 2?") @@ -251,6 +252,8 @@ class TestChatOllamaV1(ChatModelV1IntegrationTests): f"Content blocks: {[block.get('type') for block in result.content]}" ) + # Additional Ollama reasoning tests in v1/chat_models/test_chat_models_v1.py + @patch("langchain_ollama.v1.chat_models.Client.list") def test_init_model_not_found(self, mock_list: MagicMock) -> None: """Test that a ValueError is raised when the model is not found.""" diff --git a/libs/partners/ollama/tests/integration_tests/v1/chat_models/test_chat_models_v1.py b/libs/partners/ollama/tests/integration_tests/v1/chat_models/test_chat_models_v1.py index c00955a2044..0b9869bfd2c 100644 --- a/libs/partners/ollama/tests/integration_tests/v1/chat_models/test_chat_models_v1.py +++ b/libs/partners/ollama/tests/integration_tests/v1/chat_models/test_chat_models_v1.py @@ -9,12 +9,17 @@ from __future__ import annotations from typing import Annotated, Optional import pytest +from langchain_core.messages.content_blocks import is_reasoning_block +from langchain_core.v1.messages import AIMessageChunk, HumanMessage from pydantic import BaseModel, Field from typing_extensions import TypedDict from langchain_ollama.v1.chat_models import ChatOllama DEFAULT_MODEL_NAME = "llama3.1" +REASONING_MODEL_NAME = "deepseek-r1:1.5b" + +SAMPLE = "What is 3^3?" @pytest.mark.parametrize(("method"), [("function_calling"), ("json_schema")]) @@ -104,50 +109,335 @@ def test_structured_output_deeply_nested(model: str) -> None: assert isinstance(chunk, Data) -# def test_reasoning_content_blocks() -> None: -# """Test that the model supports reasoning content blocks.""" -# llm = ChatOllama(model=DEFAULT_MODEL_NAME, temperature=0) +@pytest.mark.parametrize(("model"), [(REASONING_MODEL_NAME)]) +def test_stream_no_reasoning(model: str) -> None: + """Test streaming with `reasoning=False`""" + llm = ChatOllama(model=model, num_ctx=2**12, reasoning=False) + result = None + for chunk in llm.stream(SAMPLE): + assert isinstance(chunk, AIMessageChunk) + if result is None: + result = chunk + continue + result += chunk + assert isinstance(result, AIMessageChunk) + assert result.content -# # Test with a reasoning prompt -# messages = [HumanMessage("Think step by step and solve: What is 2 + 2?")] + content_types = set() + for content_block in result.content: + type_ = content_block.get("type") + if type_: + content_types.add(type_) -# result = llm.invoke(messages) - -# # Check that we get an AIMessage with content blocks -# assert isinstance(result, AIMessage) -# assert len(result.content) > 0 - -# # For streaming, check that reasoning blocks are properly handled -# chunks = [] -# for chunk in llm.stream(messages): -# chunks.append(chunk) -# assert isinstance(chunk, AIMessageChunk) - -# assert len(chunks) > 0 + assert "reasoning" not in content_types, ( + f"Expected no reasoning content, got types: {content_types}" + ) + assert "non_standard" not in content_types, ( + f"Expected no non-standard content, got types: {content_types}" + ) + assert "" not in result.text and "" not in result.text -# def test_multimodal_support() -> None: -# """Test that the model supports image content blocks.""" -# llm = ChatOllama(model=DEFAULT_MODEL_NAME, temperature=0) +@pytest.mark.parametrize(("model"), [(REASONING_MODEL_NAME)]) +async def test_astream_no_reasoning(model: str) -> None: + """Test async streaming with `reasoning=False`""" + llm = ChatOllama(model=model, num_ctx=2**12, reasoning=False) + result = None + async for chunk in llm.astream(SAMPLE): + assert isinstance(chunk, AIMessageChunk) + if result is None: + result = chunk + continue + result += chunk + assert isinstance(result, AIMessageChunk) + assert result.content -# # Create a message with image content block -# from langchain_core.messages.content_blocks import ( -# create_image_block, -# create_text_block, -# ) + content_types = set() + for content_block in result.content: + type_ = content_block.get("type") + if type_: + content_types.add(type_) -# # Test with a simple base64 placeholder (real integration would use actual image) -# message = HumanMessage( -# content=[ -# create_text_block("Describe this image:"), -# create_image_block( -# base64="iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNkYPhfDwAChwGA60e6kgAAAABJRU5ErkJggg==" # noqa: E501 -# ), -# ] -# ) + assert "reasoning" not in content_types, ( + f"Expected no reasoning content, got types: {content_types}" + ) + assert "non_standard" not in content_types, ( + f"Expected no non-standard content, got types: {content_types}" + ) + assert "" not in result.text and "" not in result.text -# result = llm.invoke([message]) -# # Check that we get a response (even if it's just acknowledging the image) -# assert isinstance(result, AIMessage) -# assert len(result.content) > 0 +@pytest.mark.parametrize(("model"), [(REASONING_MODEL_NAME)]) +def test_stream_reasoning_none(model: str) -> None: + """Test streaming with `reasoning=None`""" + llm = ChatOllama(model=model, num_ctx=2**12, reasoning=None) + result = None + for chunk in llm.stream(SAMPLE): + assert isinstance(chunk, AIMessageChunk) + if result is None: + result = chunk + continue + result += chunk + assert isinstance(result, AIMessageChunk) + assert result.content + + assert "" in result.text and "" in result.text + + +@pytest.mark.parametrize(("model"), [(REASONING_MODEL_NAME)]) +async def test_astream_reasoning_none(model: str) -> None: + """Test async streaming with `reasoning=None`""" + llm = ChatOllama(model=model, num_ctx=2**12, reasoning=None) + result = None + async for chunk in llm.astream(SAMPLE): + assert isinstance(chunk, AIMessageChunk) + if result is None: + result = chunk + continue + result += chunk + assert isinstance(result, AIMessageChunk) + assert result.content + + assert "" in result.text and "" in result.text + + +@pytest.mark.parametrize(("model"), [(REASONING_MODEL_NAME)]) +def test_reasoning_stream(model: str) -> None: + """Test streaming with `reasoning=True`""" + llm = ChatOllama(model=model, num_ctx=2**12, reasoning=True) + result = None + for chunk in llm.stream(SAMPLE): + assert isinstance(chunk, AIMessageChunk) + if result is None: + result = chunk + continue + result += chunk + assert isinstance(result, AIMessageChunk) + assert result.content + + content_types = set() + for content_block in result.content: + type_ = content_block.get("type") + if type_: + content_types.add(type_) + + assert "reasoning" in content_types, ( + f"Expected reasoning content, got types: {content_types}" + ) + assert "non_standard" not in content_types, ( + f"Expected no non-standard content, got types: {content_types}" + ) + assert "" not in result.text and "" not in result.text + + # Assert non-empty reasoning content in ReasoningContentBlock + reasoning_blocks = [block for block in result.content if is_reasoning_block(block)] + for block in reasoning_blocks: + assert block.get("reasoning"), "Expected non-empty reasoning content" + assert len(block.get("reasoning", "")) > 0, ( + "Expected reasoning content to be non-empty" + ) + + +@pytest.mark.parametrize(("model"), [(REASONING_MODEL_NAME)]) +async def test_reasoning_astream(model: str) -> None: + """Test async streaming with `reasoning=True`""" + llm = ChatOllama(model=model, num_ctx=2**12, reasoning=True) + result = None + async for chunk in llm.astream(SAMPLE): + assert isinstance(chunk, AIMessageChunk) + if result is None: + result = chunk + continue + result += chunk + assert isinstance(result, AIMessageChunk) + assert result.content + + content_types = set() + for content_block in result.content: + type_ = content_block.get("type") + if type_: + content_types.add(type_) + + assert "reasoning" in content_types, ( + f"Expected reasoning content, got types: {content_types}" + ) + assert "non_standard" not in content_types, ( + f"Expected no non-standard content, got types: {content_types}" + ) + assert "" not in result.text and "" not in result.text + + # Assert non-empty reasoning content in ReasoningContentBlock + reasoning_blocks = [block for block in result.content if is_reasoning_block(block)] + for block in reasoning_blocks: + assert block.get("reasoning"), "Expected non-empty reasoning content" + assert len(block.get("reasoning", "")) > 0, ( + "Expected reasoning content to be non-empty" + ) + + +@pytest.mark.parametrize(("model"), [(REASONING_MODEL_NAME)]) +def test_invoke_no_reasoning(model: str) -> None: + """Test using invoke with `reasoning=False`""" + llm = ChatOllama(model=model, num_ctx=2**12, reasoning=False) + message = HumanMessage(SAMPLE) + result = llm.invoke([message]) + assert result.content + + content_types = set() + for content_block in result.content: + type_ = content_block.get("type") + if type_: + content_types.add(type_) + + assert "reasoning" not in content_types, ( + f"Expected no reasoning content, got types: {content_types}" + ) + assert "non_standard" not in content_types, ( + f"Expected no non-standard content, got types: {content_types}" + ) + assert "" not in result.text and "" not in result.text + + +@pytest.mark.parametrize(("model"), [(REASONING_MODEL_NAME)]) +async def test_ainvoke_no_reasoning(model: str) -> None: + """Test using async invoke with `reasoning=False`""" + llm = ChatOllama(model=model, num_ctx=2**12, reasoning=False) + message = HumanMessage(content=SAMPLE) + result = await llm.ainvoke([message]) + assert result.content + + content_types = set() + for content_block in result.content: + type_ = content_block.get("type") + if type_: + content_types.add(type_) + + assert "reasoning" not in content_types, ( + f"Expected no reasoning content, got types: {content_types}" + ) + assert "non_standard" not in content_types, ( + f"Expected no non-standard content, got types: {content_types}" + ) + assert "" not in result.text and "" not in result.text + + +@pytest.mark.parametrize(("model"), [(REASONING_MODEL_NAME)]) +def test_invoke_reasoning_none(model: str) -> None: + """Test using invoke with `reasoning=None`""" + llm = ChatOllama(model=model, num_ctx=2**12, reasoning=None) + message = HumanMessage(content=SAMPLE) + result = llm.invoke([message]) + assert result.content + + assert "" in result.text and "" in result.text + + +@pytest.mark.parametrize(("model"), [(REASONING_MODEL_NAME)]) +async def test_ainvoke_reasoning_none(model: str) -> None: + """Test using async invoke with `reasoning=None`""" + llm = ChatOllama(model=model, num_ctx=2**12, reasoning=None) + message = HumanMessage(content=SAMPLE) + result = await llm.ainvoke([message]) + assert result.content + + assert "" in result.text and "" in result.text + + +@pytest.mark.parametrize(("model"), [(REASONING_MODEL_NAME)]) +def test_reasoning_invoke(model: str) -> None: + """Test invoke with `reasoning=True`""" + llm = ChatOllama(model=model, num_ctx=2**12, reasoning=True) + message = HumanMessage(content=SAMPLE) + result = llm.invoke([message]) + assert result.content + + content_types = set() + for content_block in result.content: + type_ = content_block.get("type") + if type_: + content_types.add(type_) + + assert "reasoning" in content_types, ( + f"Expected reasoning content, got types: {content_types}" + ) + assert "non_standard" not in content_types, ( + f"Expected no non-standard content, got types: {content_types}" + ) + assert "" not in result.text and "" not in result.text + + # Assert non-empty reasoning content in ReasoningContentBlock + reasoning_blocks = [block for block in result.content if is_reasoning_block(block)] + for block in reasoning_blocks: + assert block.get("reasoning"), "Expected non-empty reasoning content" + assert len(block.get("reasoning", "")) > 0, ( + "Expected reasoning content to be non-empty" + ) + + +@pytest.mark.parametrize(("model"), [(REASONING_MODEL_NAME)]) +async def test_reasoning_ainvoke(model: str) -> None: + """Test invoke with `reasoning=True`""" + llm = ChatOllama(model=model, num_ctx=2**12, reasoning=True) + message = HumanMessage(content=SAMPLE) + result = await llm.ainvoke([message]) + assert result.content + + content_types = set() + for content_block in result.content: + type_ = content_block.get("type") + if type_: + content_types.add(type_) + + assert "reasoning" in content_types, ( + f"Expected reasoning content, got types: {content_types}" + ) + assert "non_standard" not in content_types, ( + f"Expected no non-standard content, got types: {content_types}" + ) + assert "" not in result.text and "" not in result.text + + # Assert non-empty reasoning content in ReasoningContentBlock + reasoning_blocks = [block for block in result.content if is_reasoning_block(block)] + for block in reasoning_blocks: + assert block.get("reasoning"), "Expected non-empty reasoning content" + assert len(block.get("reasoning", "")) > 0, ( + "Expected reasoning content to be non-empty" + ) + + +@pytest.mark.parametrize(("model"), [(REASONING_MODEL_NAME)]) +def test_think_tag_stripping_necessity(model: str) -> None: + """Test that demonstrates why ``_strip_think_tags`` is necessary. + + DeepSeek R1 models include reasoning/thinking as their default behavior. + When ``reasoning=False`` is set, the user explicitly wants no reasoning content, + but Ollama cannot disable thinking at the API level for these models. + Therefore, post-processing is required to strip the ```` tags. + + This test documents the specific behavior that necessitates the + ``_strip_think_tags`` function in the chat_models.py implementation. + """ + # Test with reasoning=None (default behavior - should include think tags) + llm_default = ChatOllama(model=model, reasoning=None, num_ctx=2**12) + message = HumanMessage(content=SAMPLE) + + result_default = llm_default.invoke([message]) + + # With reasoning=None, the model's default behavior includes tags + # This demonstrates why we need the stripping logic + assert "" in result_default.text + assert "" in result_default.text + + # Test with reasoning=False (explicit disable - should NOT include think tags) + llm_disabled = ChatOllama(model=model, reasoning=False, num_ctx=2**12) + + result_disabled = llm_disabled.invoke([message]) + + # With reasoning=False, think tags should be stripped from content + # This verifies that _strip_think_tags is working correctly + assert "" not in result_disabled.text + assert "" not in result_disabled.text + + # Verify the difference: same model, different reasoning settings + # Default includes tags, disabled strips them + assert result_default.content != result_disabled.content diff --git a/libs/standard-tests/langchain_tests/integration_tests/chat_models_v1.py b/libs/standard-tests/langchain_tests/integration_tests/chat_models_v1.py index 8bf6bee1c08..26cec08179a 100644 --- a/libs/standard-tests/langchain_tests/integration_tests/chat_models_v1.py +++ b/libs/standard-tests/langchain_tests/integration_tests/chat_models_v1.py @@ -3188,7 +3188,12 @@ class ChatModelV1IntegrationTests(ChatModelV1Tests): assert result.content is not None def test_reasoning_content_blocks_basic(self, model: BaseChatModel) -> None: - """Test that the model can generate ``ReasoningContentBlock``.""" + """Test that the model can generate ``ReasoningContentBlock``. + + If your integration requires a reasoning parameter to be explicitly set, you + will need to override this test to set it appropriately. + + """ if not self.supports_reasoning_content_blocks: pytest.skip("Model does not support ReasoningContentBlock.") @@ -3202,7 +3207,10 @@ class ChatModelV1IntegrationTests(ChatModelV1Tests): for block in result.content if isinstance(block, dict) and is_reasoning_block(block) ] - assert len(reasoning_blocks) > 0 + assert len(reasoning_blocks) > 0, ( + "Expected reasoning content blocks but found none. " + f"Content blocks: {[block.get('type') for block in result.content]}" + ) def test_non_standard_content_blocks_basic(self, model: BaseChatModel) -> None: """Test that the model can handle ``NonStandardContentBlock``."""