more

2025-08-26 21:11:25 +00:00 · 2025-07-25 17:01:17 -04:00 · 2025-07-25 17:01:17 -04:00 · fd5c29a268
commit fd5c29a268
parent 10349b019a
3 changed files with 343 additions and 5 deletions
--- a/libs/partners/ollama/langchain_ollama/chat_models.py
+++ b/libs/partners/ollama/langchain_ollama/chat_models.py
@ -68,6 +68,29 @@ from langchain_ollama._utils import validate_model
 log = logging.getLogger(__name__)
 def _strip_think_tags(content: str) -> str:
    """Strip ``<think>`` tags from content.
    This is needed because some models have reasoning/thinking as their default
    behavior and will include ``<think>`` tags even when ``reasoning=False`` is set.
    Since Ollama doesn't provide a way to completely disable thinking for models
    that do it by default, we must post-process the response to remove the tags
    when the user has explicitly disabled reasoning.
    Args:
        content: The content that may contain think tags.
    Returns:
        Content with think tags and their contents removed.
    """
    import re
    # Remove everything between <think> and </think> tags, including the tags
    pattern = r"<think>.*?</think>"
    return re.sub(pattern, "", content, flags=re.DOTALL).strip()
 def _get_usage_metadata_from_generation_info(
    generation_info: Optional[Mapping[str, Any]],
 ) -> Optional[UsageMetadata]:
@ -615,6 +638,72 @@ class ChatOllama(BaseChatModel):
    The async client to use for making requests.
    """
    def _chat_params_v1(
        self,
        messages: list[BaseMessage],
        stop: Optional[list[str]] = None,
        **kwargs: Any,
    ) -> dict[str, Any]:
        """Generate chat parameters with native v1 message support.
        This method uses the v1-native message conversion and is preferred for handling
        v1 format messages.
        Args:
            messages: List of messages to convert.
            stop: Optional stop sequences.
            **kwargs: Additional parameters.
        Returns:
            Dictionary of parameters for Ollama API.
        """
        # TODO make this just part of _chat_params ?
        # Depends on longrun decision and message formatting probably
        ollama_messages = self._convert_messages_to_ollama_messages_v1(messages)
        if self.stop is not None and stop is not None:
            msg = "`stop` found in both the input and default params."
            raise ValueError(msg)
        if self.stop is not None:
            stop = self.stop
        options_dict = kwargs.pop(
            "options",
            {
                "mirostat": self.mirostat,
                "mirostat_eta": self.mirostat_eta,
                "mirostat_tau": self.mirostat_tau,
                "num_ctx": self.num_ctx,
                "num_gpu": self.num_gpu,
                "num_thread": self.num_thread,
                "num_predict": self.num_predict,
                "repeat_last_n": self.repeat_last_n,
                "repeat_penalty": self.repeat_penalty,
                "temperature": self.temperature,
                "seed": self.seed,
                "stop": self.stop if stop is None else stop,
                "tfs_z": self.tfs_z,
                "top_k": self.top_k,
                "top_p": self.top_p,
            },
        )
        params = {
            "messages": ollama_messages,
            "stream": kwargs.pop("stream", True),
            "model": kwargs.pop("model", self.model),
            "think": kwargs.pop("reasoning", self.reasoning),
            "format": kwargs.pop("format", self.format),
            "options": Options(**options_dict),
            "keep_alive": kwargs.pop("keep_alive", self.keep_alive),
            **kwargs,
        }
        if tools := kwargs.get("tools"):
            params["tools"] = tools
        return params
    def _chat_params(
        self,
        messages: list[BaseMessage],
@ -666,6 +755,34 @@ class ChatOllama(BaseChatModel):
        return params
    def _get_chat_params(
        self,
        messages: list[BaseMessage],
        stop: Optional[list[str]] = None,
        **kwargs: Any,
    ) -> dict[str, Any]:
        """Get chat parameters, choosing between v0 and v1 methods.
        This method automatically chooses the appropriate parameter generation method
        based on whether messages contain v1 format content.
        Args:
            messages: List of messages to convert.
            stop: Optional stop sequences.
            **kwargs: Additional parameters.
        Returns:
            Dictionary of parameters for Ollama API.
        """
        # Check if any message has v1 format content (list of content blocks)
        has_v1_messages = any(isinstance(msg.content, list) for msg in messages)
        if has_v1_messages:
            # Use v1-native method for better handling
            return self._chat_params_v1(messages, stop, **kwargs)
        # Use legacy v0 method for backward compatibility
        return self._chat_params(messages, stop, **kwargs)
    @model_validator(mode="after")
    def _set_clients(self) -> Self:
        """Set clients to use for ollama."""
@ -685,6 +802,179 @@ class ChatOllama(BaseChatModel):
            validate_model(self._client, self.model)
        return self
    def _convert_messages_to_ollama_messages_v1(
        self, messages: list[BaseMessage]
    ) -> Sequence[Message]:
        """Convert messages to Ollama format with native v1 support.
        This method handles v1 format messages natively without converting to v0 first.
        This is the preferred method for v1 message handling.
        Args:
            messages: List of messages to convert, may include v1 format.
        Returns:
            Sequence of Ollama Message objects.
        """
        ollama_messages: list = []
        for message in messages:
            # Handle v1 format messages natively (don't convert to v0)
            role: str
            tool_call_id: Optional[str] = None
            tool_calls: Optional[list[dict[str, Any]]] = None
            if isinstance(message, HumanMessage):
                role = "user"
            elif isinstance(message, AIMessage):
                role = "assistant"
                tool_calls = (
                    [
                        _lc_tool_call_to_openai_tool_call(tool_call)
                        for tool_call in message.tool_calls
                    ]
                    if message.tool_calls
                    else None
                )
            elif isinstance(message, SystemMessage):
                role = "system"
            elif isinstance(message, ChatMessage):
                role = message.role
            elif isinstance(message, ToolMessage):
                role = "tool"
                tool_call_id = message.tool_call_id
            else:
                msg = "Received unsupported message type for Ollama."
                raise ValueError(msg)
            content = ""
            images = []
            reasoning_content = None
            # Handle v1 format content (list of content blocks)
            if isinstance(message.content, list):
                for content_part in message.content:
                    if isinstance(content_part, dict):
                        block_type = content_part.get("type")
                        if block_type == "text":
                            content += content_part.get("text", "")
                        elif block_type == "reasoning":
                            # Extract reasoning content for separate handling
                            reasoning_content = content_part.get("reasoning", "")
                        elif block_type == "tool_call":
                            # Skip - handled by tool_calls property
                            continue
                        elif block_type == "image_url":
                            image_url = None
                            temp_image_url = content_part.get("image_url")
                            if isinstance(temp_image_url, str):
                                image_url = temp_image_url
                            elif (
                                isinstance(temp_image_url, dict)
                                and "url" in temp_image_url
                                and isinstance(temp_image_url["url"], str)
                            ):
                                image_url = temp_image_url["url"]
                            else:
                                msg = (
                                    "Only string image_url or dict with string 'url' "
                                    "inside content parts are supported."
                                )
                                raise ValueError(msg)
                            image_url_components = image_url.split(",")
                            # Support data:image/jpeg;base64,<image> format
                            # and base64 strings
                            if len(image_url_components) > 1:
                                images.append(image_url_components[1])
                            else:
                                images.append(image_url_components[0])
                        elif is_data_content_block(content_part):
                            image = _get_image_from_data_content_block(content_part)
                            images.append(image)
                        else:
                            # Convert unknown content blocks to NonStandardContentBlock
                            # TODO what to do with these?
                            _convert_unknown_content_block_to_non_standard(content_part)
                            continue
                    else:
                        # Handle content blocks that are not dicts
                        # (e.g., TextContentBlock objects)
                        if hasattr(content_part, "type"):
                            if content_part.type == "text":
                                content += getattr(content_part, "text", "")
                            elif content_part.type == "reasoning":
                                reasoning_content = getattr(
                                    content_part, "reasoning", ""
                                )
                            # Add other content block types as needed
            # Handle v0 format content (string)
            elif isinstance(message.content, str):
                content = message.content
            else:
                # Handle other content formats if needed
                for content_part in cast(list[dict], message.content):
                    if content_part.get("type") == "text":
                        content += f"\n{content_part['text']}"
                    elif content_part.get("type") == "tool_use":
                        continue
                    elif content_part.get("type") == "tool_call":
                        # Skip - handled by tool_calls property
                        continue
                    elif content_part.get("type") == "reasoning":
                        # Skip - handled by reasoning parameter
                        continue
                    elif content_part.get("type") == "image_url":
                        image_url = None
                        temp_image_url = content_part.get("image_url")
                        if isinstance(temp_image_url, str):
                            image_url = temp_image_url
                        elif (
                            isinstance(temp_image_url, dict)
                            and "url" in temp_image_url
                            and isinstance(temp_image_url["url"], str)
                        ):
                            image_url = temp_image_url["url"]
                        else:
                            msg = (
                                "Only string image_url or dict with string 'url' "
                                "inside content parts are supported."
                            )
                            raise ValueError(msg)
                        image_url_components = image_url.split(",")
                        # Support data:image/jpeg;base64,<image> format
                        # and base64 strings
                        if len(image_url_components) > 1:
                            images.append(image_url_components[1])
                        else:
                            images.append(image_url_components[0])
                    elif is_data_content_block(content_part):
                        image = _get_image_from_data_content_block(content_part)
                        images.append(image)
                    else:
                        # Convert unknown content blocks to NonStandardContentBlock
                        # TODO what to do with these?
                        _convert_unknown_content_block_to_non_standard(content_part)
                        continue
            # Should convert to ollama.Message once role includes tool,
            # and tool_call_id is in Message
            msg_: dict = {
                "role": role,
                "content": content,
                "images": images,
            }
            if tool_calls:
                msg_["tool_calls"] = tool_calls
            if tool_call_id:
                msg_["tool_call_id"] = tool_call_id
            # Store reasoning content for later use if present
            if reasoning_content:
                msg_["_reasoning_content"] = reasoning_content
            ollama_messages.append(msg_)
        return ollama_messages
    def _convert_messages_to_ollama_messages(
        self, messages: list[BaseMessage]
    ) -> Sequence[Message]:
@ -922,6 +1212,10 @@ class ChatOllama(BaseChatModel):
                    else ""
                )
                # Strip think tags if reasoning is explicitly disabled
                if reasoning is False:
                    content = _strip_think_tags(content)
                # Warn and skip responses with done_reason: 'load' and empty content
                # These indicate the model was loaded but no actual generation occurred
                is_load_response_with_empty_content = (
@ -1003,6 +1297,10 @@ class ChatOllama(BaseChatModel):
                    else ""
                )
                # Strip think tags if reasoning is explicitly disabled
                if reasoning is False:
                    content = _strip_think_tags(content)
                # Warn and skip responses with done_reason: 'load' and empty content
                # These indicate the model was loaded but no actual generation occurred
                is_load_response_with_empty_content = (
--- a/libs/partners/ollama/tests/integration_tests/chat_models/test_chat_models_reasoning.py
+++ b/libs/partners/ollama/tests/integration_tests/chat_models/test_chat_models_reasoning.py
@ -23,7 +23,7 @@ class MathAnswer(BaseModel):
@pytest.mark.parametrize(("model"), [("deepseek-r1:1.5b")])
 def test_stream_no_reasoning(model: str) -> None:
    """Test streaming with `reasoning=False`"""
-    llm = ChatOllama(model=model, num_ctx=2**12)
+    llm = ChatOllama(model=model, num_ctx=2**12, reasoning=False)
    messages = [
        {
            "role": "user",
@ -46,7 +46,7 @@ def test_stream_no_reasoning(model: str) -> None:
@pytest.mark.parametrize(("model"), [("deepseek-r1:1.5b")])
 async def test_astream_no_reasoning(model: str) -> None:
    """Test async streaming with `reasoning=False`"""
-    llm = ChatOllama(model=model, num_ctx=2**12)
+    llm = ChatOllama(model=model, num_ctx=2**12, reasoning=False)
    messages = [
        {
            "role": "user",
@ -175,7 +175,7 @@ async def test_reasoning_astream(model: str) -> None:
@pytest.mark.parametrize(("model"), [("deepseek-r1:1.5b")])
 def test_invoke_no_reasoning(model: str) -> None:
    """Test using invoke with `reasoning=False`"""
-    llm = ChatOllama(model=model, num_ctx=2**12)
+    llm = ChatOllama(model=model, num_ctx=2**12, reasoning=False)
    message = HumanMessage(content=SAMPLE)
    result = llm.invoke([message])
    assert result.content
@ -189,7 +189,7 @@ def test_invoke_no_reasoning(model: str) -> None:
@pytest.mark.parametrize(("model"), [("deepseek-r1:1.5b")])
 async def test_ainvoke_no_reasoning(model: str) -> None:
    """Test using async invoke with `reasoning=False`"""
-    llm = ChatOllama(model=model, num_ctx=2**12)
+    llm = ChatOllama(model=model, num_ctx=2**12, reasoning=False)
    message = HumanMessage(content=SAMPLE)
    result = await llm.ainvoke([message])
    assert result.content
@ -256,3 +256,43 @@ async def test_reasoning_ainvoke(model: str) -> None:
    assert "<think>" not in result.content and "</think>" not in result.content
    assert "<think>" not in result.additional_kwargs["reasoning_content"]
    assert "</think>" not in result.additional_kwargs["reasoning_content"]
@pytest.mark.parametrize(("model"), [("deepseek-r1:1.5b")])
 def test_think_tag_stripping_necessity(model: str) -> None:
    """Test that demonstrates why ``_strip_think_tags`` is necessary.
    DeepSeek R1 models include reasoning/thinking as their default behavior.
    When ``reasoning=False`` is set, the user explicitly wants no reasoning content,
    but Ollama cannot disable thinking at the API level for these models.
    Therefore, post-processing is required to strip the ``<think>`` tags.
    This test documents the specific behavior that necessitates the
    ``_strip_think_tags`` function in the chat_models.py implementation.
    """
    # Test with reasoning=None (default behavior - should include think tags)
    llm_default = ChatOllama(model=model, reasoning=None, num_ctx=2**12)
    message = HumanMessage(content=SAMPLE)
    result_default = llm_default.invoke([message])
    # With reasoning=None, the model's default behavior includes <think> tags
    # This demonstrates why we need the stripping logic
    assert "<think>" in result_default.content
    assert "</think>" in result_default.content
    assert "reasoning_content" not in result_default.additional_kwargs
    # Test with reasoning=False (explicit disable - should NOT include think tags)
    llm_disabled = ChatOllama(model=model, reasoning=False, num_ctx=2**12)
    result_disabled = llm_disabled.invoke([message])
    # With reasoning=False, think tags should be stripped from content
    # This verifies that _strip_think_tags is working correctly
    assert "<think>" not in result_disabled.content
    assert "</think>" not in result_disabled.content
    assert "reasoning_content" not in result_disabled.additional_kwargs
    # Verify the difference: same model, different reasoning settings
    # Default includes tags, disabled strips them
    assert result_default.content != result_disabled.content
--- a/libs/partners/ollama/tests/integration_tests/chat_models/test_chat_models_standard.py
+++ b/libs/partners/ollama/tests/integration_tests/chat_models/test_chat_models_standard.py
@ -168,7 +168,7 @@ class TestChatOllama(ChatModelIntegrationTests):
        with pytest.raises(ValidationError) as excinfo:
            ChatOllama(model="any-model", validate_model_on_init=True)
-        assert "not found in Ollama" in str(excinfo.value)
+        assert "Failed to connect to Ollama" in str(excinfo.value)
    @patch("langchain_ollama.chat_models.Client.list")
    def test_init_response_error(self, mock_list: MagicMock) -> None: