nits & namespace update for ollama

2025-08-15 15:46:47 +00:00 · 2025-08-06 12:19:28 -04:00 · 2025-08-06 12:19:28 -04:00 · e18e2c13ce
commit e18e2c13ce
parent 821527b97a
9 changed files with 154 additions and 101 deletions
--- a/libs/partners/ollama/langchain_ollama/init.py
+++ b/libs/partners/ollama/langchain_ollama/init.py
@ -16,7 +16,6 @@ service.
 from importlib import metadata
 from langchain_ollama.chat_models import ChatOllama
 from langchain_ollama.chat_models_v1 import ChatOllama as ChatOllamaV1
 from langchain_ollama.embeddings import OllamaEmbeddings
 from langchain_ollama.llms import OllamaLLM
@ -31,7 +30,6 @@ del metadata  # optional, avoids polluting the results of dir(__package__)
 __all__ = [
    "ChatOllama",
    "ChatOllamaV1",
    "OllamaEmbeddings",
    "OllamaLLM",
    "__version__",
--- a/libs/partners/ollama/langchain_ollama/_compat.py
+++ b/libs/partners/ollama/langchain_ollama/_compat.py
@ -1,4 +1,4 @@
-"""V1 message conversion utilities for Ollama."""
+"""LangChain v1 message conversion utilities for Ollama."""
 from __future__ import annotations
--- a/libs/partners/ollama/langchain_ollama/_utils.py
+++ b/libs/partners/ollama/langchain_ollama/_utils.py
@ -1,11 +1,11 @@
-"""Utility functions for validating Ollama models."""
+"""Utility function to validate Ollama models."""
 from httpx import ConnectError
 from ollama import Client, ResponseError
 def validate_model(client: Client, model_name: str) -> None:
-    """Validate that a model exists in the Ollama instance.
+    """Validate that a model exists in the local Ollama instance.
    Args:
        client: The Ollama client.
--- a/libs/partners/ollama/langchain_ollama/v1/init.py
+++ b/libs/partners/ollama/langchain_ollama/v1/init.py
@ -0,0 +1,5 @@
 from langchain_ollama.v1.chat_models import (
    ChatOllama,
 )
 __all__ = ["ChatOllama"]
--- a/libs/partners/ollama/langchain_ollama/v1/chat_models/init.py
+++ b/libs/partners/ollama/langchain_ollama/v1/chat_models/init.py
@ -0,0 +1,5 @@
 from langchain_ollama.v1.chat_models.base import (
    ChatOllama,
 )
 __all__ = ["ChatOllama"]
--- a/libs/partners/ollama/langchain_ollama/v1/chat_models/base.py
+++ b/libs/partners/ollama/langchain_ollama/v1/chat_models/base.py
@ -1,9 +1,9 @@
-"""Ollama chat model v1 implementation.
+"""v1 Ollama implementation.
-This implementation provides native support for v1 messages with structured
+Provides native support for v1 messages with standard content blocks.
 content blocks.
 .. versionadded:: 1.0.0
 """
 from __future__ import annotations
@ -45,12 +45,12 @@ from pydantic.json_schema import JsonSchemaValue
 from pydantic.v1 import BaseModel as BaseModelV1
 from typing_extensions import Self, is_typeddict
-from ._compat import (
+from langchain_ollama._compat import (
    _convert_chunk_to_v1,
    _convert_from_v1_to_ollama_format,
    _convert_to_v1_from_ollama_format,
 )
-from ._utils import validate_model
+from langchain_ollama._utils import validate_model
 log = logging.getLogger(__name__)
@ -116,7 +116,7 @@ def _parse_arguments_from_tool_call(
    Band-aid fix for issue in Ollama with inconsistent tool call argument structure.
    Should be removed/changed if fixed upstream.
-    See https://github.com/ollama/ollama/issues/6155
+    `See #6155 <https://github.com/ollama/ollama/issues/6155>`__.
    """
    if "function" not in raw_tool_call:
@ -142,12 +142,6 @@ def _parse_arguments_from_tool_call(
    return parsed_arguments
 # Removed from v0:
 # - _get_tool_calls_from_response
 # _lc_tool_call_to_openai_tool_call
 # _get_image_from_data_content_block
 def _is_pydantic_class(obj: Any) -> bool:
    return isinstance(obj, type) and is_basemodel_subclass(obj)
@ -193,7 +187,7 @@ class ChatOllama(BaseChatModel):
    Instantiate:
        .. code-block:: python
-            from langchain_ollama import ChatOllama
+            from langchain_ollama.v1 import ChatOllama
            llm = ChatOllama(
                model = "llama3",
@ -209,15 +203,13 @@ class ChatOllama(BaseChatModel):
            from langchain_core.messages.content_blocks import TextContentBlock
            messages = [
-                HumanMessage(content=[
+                HumanMessage("Hello!")
                    TextContentBlock(type="text", text="Hello!")
                ])
            ]
            llm.invoke(messages)
        .. code-block:: python
-            AIMessage(content=[{'type': 'text', 'text': 'Hello! How can I help you today?'}], response_metadata={'model': 'llama3', 'created_at': '2024-07-04T03:37:50.182604Z', 'done_reason': 'stop', 'done': True, 'total_duration': 3576619666, 'load_duration': 788524916, 'prompt_eval_count': 32, 'prompt_eval_duration': 128125000, 'eval_count': 71, 'eval_duration': 2656556000}, id='run-ba48f958-6402-41a5-b461-5e250a4ebd36-0')
+            AIMessage(content=[{'type': 'text', 'text': 'Hello! How can I help you today?'}], ...)
    Stream:
        .. code-block:: python
@ -226,18 +218,16 @@ class ChatOllama(BaseChatModel):
            from langchain_core.messages.content_blocks import TextContentBlock
            messages = [
-                HumanMessage(content=[
+                HumanMessage(Return the words Hello World!")
                    TextContentBlock(type="text", text="Return the words Hello World!")
                ])
            ]
            for chunk in llm.stream(messages):
                print(chunk.content, end="")
        .. code-block:: python
-            [{'type': 'text', 'text': 'Hello'}]
+            AIMessageChunk(content=[{'type': 'text', 'text': 'Hello'}], ...)
-            [{'type': 'text', 'text': ' World'}]
+            AIMessageChunk(content=[{'type': 'text', 'text': ' World'}], ...)
-            [{'type': 'text', 'text': '!'}]
+            AIMessageChunk(content=[{'type': 'text', 'text': '!'}], ...)
    Multi-modal input:
        .. code-block:: python
@ -249,7 +239,6 @@ class ChatOllama(BaseChatModel):
                    TextContentBlock(type="text", text="Describe this image:"),
                    ImageContentBlock(
                        type="image",
                        mime_type="image/jpeg",
                        base64="base64_encoded_image",
                    )
                ])
@ -258,7 +247,6 @@ class ChatOllama(BaseChatModel):
    Tool Calling:
        .. code-block:: python
            from langchain_ollama import ChatOllama
            from pydantic import BaseModel, Field
            class Multiply(BaseModel):
@ -267,20 +255,22 @@ class ChatOllama(BaseChatModel):
            llm_with_tools = llm.bind_tools([Multiply])
            ans = llm_with_tools.invoke([
-                HumanMessage(content=[
+                HumanMessage("What is 45*67")
                    TextContentBlock(type="text", text="What is 45*67")
                ])
            ])
            ans.tool_calls
        .. code-block:: python
-            [{'name': 'Multiply',
+            [
-            'args': {'a': 45, 'b': 67},
+                {
-            'id': '420c3f3b-df10-4188-945f-eb3abdb40622',
+                    'name': 'Multiply',
-            'type': 'tool_call'}]
+                    'args': {'a': 45, 'b': 67},
                    'id': '420c3f3b-df10-4188-945f-eb3abdb40622',
                    'type': 'tool_call'
                }
            ]
-    """  # noqa: E501, pylint: disable=line-too-long
+    """  # noqa: E501
    model: str
    """Model name to use."""
@ -297,6 +287,7 @@ class ChatOllama(BaseChatModel):
      however, if the model's default behavior *is* to perform reasoning, think tags
      (``<think>`` and ``</think>``) will be present within the main response content
      unless you set ``reasoning`` to ``True``.
    """
    validate_model_on_init: bool = False
@ -305,75 +296,126 @@ class ChatOllama(BaseChatModel):
    # Ollama-specific parameters
    mirostat: Optional[int] = None
    """Enable Mirostat sampling for controlling perplexity.
-    (default: ``0``, ``0`` = disabled, ``1`` = Mirostat, ``2`` = Mirostat 2.0)"""
+
    (Default: ``0``, ``0`` = disabled, ``1`` = Mirostat, ``2`` = Mirostat 2.0)
    """
    mirostat_eta: Optional[float] = None
-    """Influences how quickly the algorithm responds to feedback
+    """Influences how quickly the algorithm responds to feedback from generated text.
-    from the generated text. A lower learning rate will result in
+
-    slower adjustments, while a higher learning rate will make
+    A lower learning rate will result in slower adjustments, while a higher learning
-    the algorithm more responsive. (Default: ``0.1``)"""
+    rate will make the algorithm more responsive.
    (Default: ``0.1``)
    """
    mirostat_tau: Optional[float] = None
-    """Controls the balance between coherence and diversity
+    """Controls the balance between coherence and diversity of the output.
-    of the output. A lower value will result in more focused and
+
-    coherent text. (Default: ``5.0``)"""
+    A lower value will result in more focused and coherent text.
    (Default: ``5.0``)
    """
    num_ctx: Optional[int] = None
-    """Sets the size of the context window used to generate the
+    """Sets the size of the context window used to generate the next token.
-    next token. (Default: ``2048``)	"""
+
    (Default: ``2048``)
    """
    num_gpu: Optional[int] = None
-    """The number of GPUs to use. On macOS it defaults to ``1`` to
+    """The number of GPUs to use.
-    enable metal support, ``0`` to disable."""
+
    On macOS it defaults to ``1`` to enable metal support, ``0`` to disable.
    """
    num_thread: Optional[int] = None
    """Sets the number of threads to use during computation.
-    By default, Ollama will detect this for optimal performance.
+
-    It is recommended to set this value to the number of physical
+    By default, Ollama will detect this for optimal performance. It is recommended to
-    CPU cores your system has (as opposed to the logical number of cores)."""
+    set this value to the number of physical CPU cores your system has (as opposed to
    the logical number of cores).
    """
    num_predict: Optional[int] = None
    """Maximum number of tokens to predict when generating text.
-    (Default: ``128``, ``-1`` = infinite generation, ``-2`` = fill context)"""
+
    (Default: ``128``, ``-1`` = infinite generation, ``-2`` = fill context)
    """
    repeat_last_n: Optional[int] = None
-    """Sets how far back for the model to look back to prevent
+    """Sets how far back for the model to look back to prevent repetition.
-    repetition. (Default: ``64``, ``0`` = disabled, ``-1`` = ``num_ctx``)"""
+
    (Default: ``64``, ``0`` = disabled, ``-1`` = ``num_ctx``)
    """
    repeat_penalty: Optional[float] = None
-    """Sets how strongly to penalize repetitions. A higher value (e.g., ``1.5``)
+    """Sets how strongly to penalize repetitions.
-    will penalize repetitions more strongly, while a lower value (e.g., ``0.9``)
+
-    will be more lenient. (Default: ``1.1``)"""
+    A higher value (e.g., ``1.5``) will penalize repetitions more strongly, while a
    lower value (e.g., ``0.9``) will be more lenient.
    (Default: ``1.1``)
    """
    temperature: Optional[float] = None
-    """The temperature of the model. Increasing the temperature will
+    """The temperature of the model.
-    make the model answer more creatively. (Default: ``0.8``)"""
+
    Increasing the temperature will make the model answer more creatively.
    (Default: ``0.8``)"""
    seed: Optional[int] = None
-    """Sets the random number seed to use for generation. Setting this
+    """Sets the random number seed to use for generation.
-    to a specific number will make the model generate the same text for
+
-    the same prompt."""
+    Setting this to a specific number will make the model generate the same text for the
    same prompt.
    """
    stop: Optional[list[str]] = None
    """Sets the stop tokens to use."""
    tfs_z: Optional[float] = None
-    """Tail free sampling is used to reduce the impact of less probable
+    """Tail free sampling is used to reduce the impact of less probable tokens from the output.
-    tokens from the output. A higher value (e.g., ``2.0``) will reduce the
+
-    impact more, while a value of ``1.0`` disables this setting. (default: ``1``)"""
+    A higher value (e.g., ``2.0``) will reduce the impact more, while a value of ``1.0`` disables this setting.
    (Default: ``1``)
    """  # noqa: E501
    top_k: Optional[int] = None
-    """Reduces the probability of generating nonsense. A higher value (e.g. ``100``)
+    """Reduces the probability of generating nonsense.
-    will give more diverse answers, while a lower value (e.g. ``10``)
+
-    will be more conservative. (Default: ``40``)"""
+    A higher value (e.g. ``100``) will give more diverse answers, while a lower value
    (e.g. ``10``) will be more conservative.
    (Default: ``40``)
    """
    top_p: Optional[float] = None
-    """Works together with top-k. A higher value (e.g., ``0.95``) will lead
+    """Works together with top-k.
-    to more diverse text, while a lower value (e.g., ``0.5``) will
+
-    generate more focused and conservative text. (Default: ``0.9``)"""
+    A higher value (e.g., ``0.95``) will lead to more diverse text, while a lower value
    (e.g., ``0.5``) will generate more focused and conservative text.
    (Default: ``0.9``)
    """
    format: Optional[Union[Literal["", "json"], JsonSchemaValue]] = None
-    """Specify the format of the output (options: ``'json'``, JSON schema)."""
+    """Specify the format of the output (Options: ``'json'``, JSON schema)."""
    keep_alive: Optional[Union[int, str]] = None
    """How long the model will stay loaded into memory."""
@ -552,7 +594,7 @@ class ChatOllama(BaseChatModel):
                        and not part.get("message", {}).get("content", "").strip()
                    ):
                        log.warning(
-                            "Ollama returned empty response with done_reason='load'. "
+                            "Ollama returned empty response with `done_reason='load'`. "
                            "Skipping this response."
                        )
                        continue
@ -574,7 +616,6 @@ class ChatOllama(BaseChatModel):
            # Non-streaming case
            response = self._client.chat(**chat_params)
            ai_message = _convert_to_v1_from_ollama_format(response)
            # Convert to chunk for yielding
            chunk = AIMessageChunk(
                content=ai_message.content,
                response_metadata=ai_message.response_metadata,
@ -602,7 +643,7 @@ class ChatOllama(BaseChatModel):
                        and not part.get("message", {}).get("content", "").strip()
                    ):
                        log.warning(
-                            "Ollama returned empty response with done_reason='load'. "
+                            "Ollama returned empty response with `done_reason='load'`. "
                            "Skipping this response."
                        )
                        continue
@ -624,7 +665,6 @@ class ChatOllama(BaseChatModel):
            # Non-streaming case
            response = await self._async_client.chat(**chat_params)
            ai_message = _convert_to_v1_from_ollama_format(response)
            # Convert to chunk for yielding
            chunk = AIMessageChunk(
                content=ai_message.content,
                response_metadata=ai_message.response_metadata,
@ -649,6 +689,7 @@ class ChatOllama(BaseChatModel):
        Returns:
            Complete AI message response.
        """
        stream_iter = self._generate_stream(
            messages, stop=stop, run_manager=run_manager, **kwargs
@ -672,6 +713,7 @@ class ChatOllama(BaseChatModel):
        Returns:
            Complete AI message response.
        """
        stream_iter = self._agenerate_stream(
            messages, stop=stop, run_manager=run_manager, **kwargs
@ -695,6 +737,7 @@ class ChatOllama(BaseChatModel):
        Yields:
            AI message chunks in v1 format.
        """
        yield from self._generate_stream(
            messages, stop=stop, run_manager=run_manager, **kwargs
@ -717,6 +760,7 @@ class ChatOllama(BaseChatModel):
        Yields:
            AI message chunks in v1 format.
        """
        async for chunk in self._agenerate_stream(
            messages, stop=stop, run_manager=run_manager, **kwargs
@ -735,7 +779,8 @@ class ChatOllama(BaseChatModel):
        Args:
            tools: A list of tool definitions to bind to this chat model.
            tool_choice: Tool choice parameter (currently ignored by Ollama).
-            kwargs: Additional parameters passed to bind().
+            kwargs: Additional parameters passed to ``bind()``.
        """
        formatted_tools = [convert_to_openai_tool(tool) for tool in tools]
        return super().bind(tools=formatted_tools, **kwargs)
--- a/libs/partners/ollama/tests/integration_tests/test_chat_models_v1.py
+++ b/libs/partners/ollama/tests/integration_tests/test_chat_models_v1.py
@ -12,7 +12,7 @@ import pytest
 from pydantic import BaseModel, Field
 from typing_extensions import TypedDict
-from langchain_ollama.chat_models_v1 import ChatOllama
+from langchain_ollama.v1.chat_models import ChatOllama
 DEFAULT_MODEL_NAME = "llama3.1"
--- a/libs/partners/ollama/tests/integration_tests/test_chat_models_v1_standard.py
+++ b/libs/partners/ollama/tests/integration_tests/test_chat_models_v1_standard.py
@ -12,7 +12,7 @@ from langchain_tests.integration_tests.chat_models_v1 import ChatModelV1Integrat
 from ollama import ResponseError
 from pydantic import ValidationError
-from langchain_ollama.chat_models_v1 import ChatOllama
+from langchain_ollama.v1.chat_models import ChatOllama
 DEFAULT_MODEL_NAME = "llama3.1"
@ -251,7 +251,7 @@ class TestChatOllamaV1(ChatModelV1IntegrationTests):
                f"Content blocks: {[block.get('type') for block in result.content]}"
            )
-    @patch("langchain_ollama.chat_models_v1.Client.list")
+    @patch("langchain_ollama.v1.chat_models.Client.list")
    def test_init_model_not_found(self, mock_list: MagicMock) -> None:
        """Test that a ValueError is raised when the model is not found."""
        mock_list.side_effect = ValueError("Test model not found")
@ -259,7 +259,7 @@ class TestChatOllamaV1(ChatModelV1IntegrationTests):
            ChatOllama(model="non-existent-model", validate_model_on_init=True)
        assert "Test model not found" in str(excinfo.value)
-    @patch("langchain_ollama.chat_models_v1.Client.list")
+    @patch("langchain_ollama.v1.chat_models.Client.list")
    def test_init_connection_error(self, mock_list: MagicMock) -> None:
        """Test that a ValidationError is raised on connect failure during init."""
        mock_list.side_effect = ConnectError("Test connection error")
@ -268,7 +268,7 @@ class TestChatOllamaV1(ChatModelV1IntegrationTests):
            ChatOllama(model="any-model", validate_model_on_init=True)
        assert "Failed to connect to Ollama" in str(excinfo.value)
-    @patch("langchain_ollama.chat_models_v1.Client.list")
+    @patch("langchain_ollama.v1.chat_models.Client.list")
    def test_init_response_error(self, mock_list: MagicMock) -> None:
        """Test that a ResponseError is raised."""
        mock_list.side_effect = ResponseError("Test response error")
--- a/libs/partners/ollama/tests/unit_tests/test_chat_models_v1.py
+++ b/libs/partners/ollama/tests/unit_tests/test_chat_models_v1.py
@ -20,7 +20,7 @@ from langchain_ollama._compat import (
    _convert_from_v1_to_ollama_format,
    _convert_to_v1_from_ollama_format,
 )
-from langchain_ollama.chat_models_v1 import (
+from langchain_ollama.v1.chat_models import (
    ChatOllama,
    _parse_arguments_from_tool_call,
    _parse_json_string,
@ -246,8 +246,8 @@ class TestChatOllama(ChatModelV1UnitTests):
    @pytest.fixture
    def model(self) -> Generator[ChatOllama, None, None]:  # type: ignore[override]
        """Create a ChatOllama instance for testing."""
-        sync_patcher = patch("langchain_ollama.chat_models_v1.Client")
+        sync_patcher = patch("langchain_ollama.v1.chat_models.Client")
-        async_patcher = patch("langchain_ollama.chat_models_v1.AsyncClient")
+        async_patcher = patch("langchain_ollama.v1.chat_models.AsyncClient")
        mock_sync_client_class = sync_patcher.start()
        mock_async_client_class = async_patcher.start()
@ -328,8 +328,8 @@ class TestChatOllama(ChatModelV1UnitTests):
    def test_initialization(self) -> None:
        """Test `ChatOllama` initialization."""
        with (
-            patch("langchain_ollama.chat_models_v1.Client"),
+            patch("langchain_ollama.v1.chat_models.Client"),
-            patch("langchain_ollama.chat_models_v1.AsyncClient"),
+            patch("langchain_ollama.v1.chat_models.AsyncClient"),
        ):
            llm = ChatOllama(model=MODEL_NAME)
@ -339,8 +339,8 @@ class TestChatOllama(ChatModelV1UnitTests):
    def test_chat_params(self) -> None:
        """Test `_chat_params()`."""
        with (
-            patch("langchain_ollama.chat_models_v1.Client"),
+            patch("langchain_ollama.v1.chat_models.Client"),
-            patch("langchain_ollama.chat_models_v1.AsyncClient"),
+            patch("langchain_ollama.v1.chat_models.AsyncClient"),
        ):
            llm = ChatOllama(model=MODEL_NAME, temperature=0.7)
@ -359,8 +359,8 @@ class TestChatOllama(ChatModelV1UnitTests):
    def test_ls_params(self) -> None:
        """Test LangSmith parameters."""
        with (
-            patch("langchain_ollama.chat_models_v1.Client"),
+            patch("langchain_ollama.v1.chat_models.Client"),
-            patch("langchain_ollama.chat_models_v1.AsyncClient"),
+            patch("langchain_ollama.v1.chat_models.AsyncClient"),
        ):
            llm = ChatOllama(model=MODEL_NAME, temperature=0.5)
@ -374,8 +374,8 @@ class TestChatOllama(ChatModelV1UnitTests):
    def test_bind_tools_basic(self) -> None:
        """Test basic tool binding functionality."""
        with (
-            patch("langchain_ollama.chat_models_v1.Client"),
+            patch("langchain_ollama.v1.chat_models.Client"),
-            patch("langchain_ollama.chat_models_v1.AsyncClient"),
+            patch("langchain_ollama.v1.chat_models.AsyncClient"),
        ):
            llm = ChatOllama(model=MODEL_NAME)
@ -394,8 +394,8 @@ class TestChatOllama(ChatModelV1UnitTests):
 # But can be added if needed in the future.
-@patch("langchain_ollama.chat_models_v1.validate_model")
+@patch("langchain_ollama.v1.chat_models.validate_model")
-@patch("langchain_ollama.chat_models_v1.Client")
+@patch("langchain_ollama.v1.chat_models.Client")
 def test_validate_model_on_init(
    mock_client_class: Any, mock_validate_model: Any
 ) -> None:
@ -501,7 +501,7 @@ def test_load_response_with_empty_content_is_skipped(
        }
    ]
-    with patch("langchain_ollama.chat_models_v1.Client") as mock_client_class:
+    with patch("langchain_ollama.v1.chat_models.Client") as mock_client_class:
        mock_client = MagicMock()
        mock_client_class.return_value = mock_client
        mock_client.chat.return_value = iter(load_only_response)
@ -531,7 +531,7 @@ def test_load_response_with_whitespace_content_is_skipped(
        }
    ]
-    with patch("langchain_ollama.chat_models_v1.Client") as mock_client_class:
+    with patch("langchain_ollama.v1.chat_models.Client") as mock_client_class:
        mock_client = MagicMock()
        mock_client_class.return_value = mock_client
        mock_client.chat.return_value = iter(load_whitespace_response)
@ -570,7 +570,7 @@ def test_load_followed_by_content_response(
        },
    ]
-    with patch("langchain_ollama.chat_models_v1.Client") as mock_client_class:
+    with patch("langchain_ollama.v1.chat_models.Client") as mock_client_class:
        mock_client = MagicMock()
        mock_client_class.return_value = mock_client
        mock_client.chat.return_value = iter(load_then_content_response)
@ -600,7 +600,7 @@ def test_load_response_with_actual_content_is_not_skipped(
        }
    ]
-    with patch("langchain_ollama.chat_models_v1.Client") as mock_client_class:
+    with patch("langchain_ollama.v1.chat_models.Client") as mock_client_class:
        mock_client = MagicMock()
        mock_client_class.return_value = mock_client
        mock_client.chat.return_value = iter(load_with_content_response)
`@ -1,4 +1,4 @@`
	`"""V1 message conversion utilities for Ollama."""`	`"""LangChain v1 message conversion utilities for Ollama."""`

	`from __future__ import annotations`	`from __future__ import annotations`