From e18e2c13ce7cb064b87ccc615c42bb3bd6142285 Mon Sep 17 00:00:00 2001
From: Mason Daugherty <github@mdrxy.com>
Date: Wed, 6 Aug 2025 12:19:28 -0400
Subject: [PATCH] nits & namespace update for ollama

---
 .../ollama/langchain_ollama/__init__.py       |   2 -
 .../ollama/langchain_ollama/_compat.py        |   2 +-
 .../ollama/langchain_ollama/_utils.py         |   4 +-
 .../ollama/langchain_ollama/v1/__init__.py    |   5 +
 .../v1/chat_models/__init__.py                |   5 +
 .../chat_models/base.py}                      | 193 +++++++++++-------
 .../integration_tests/test_chat_models_v1.py  |   2 +-
 .../test_chat_models_v1_standard.py           |   8 +-
 .../tests/unit_tests/test_chat_models_v1.py   |  34 +--
 9 files changed, 154 insertions(+), 101 deletions(-)
 create mode 100644 libs/partners/ollama/langchain_ollama/v1/__init__.py
 create mode 100644 libs/partners/ollama/langchain_ollama/v1/chat_models/__init__.py
 rename libs/partners/ollama/langchain_ollama/{chat_models_v1.py => v1/chat_models/base.py} (88%)

diff --git a/libs/partners/ollama/langchain_ollama/__init__.py b/libs/partners/ollama/langchain_ollama/__init__.py
index 32543822c84..3789c6de8a4 100644
--- a/libs/partners/ollama/langchain_ollama/__init__.py
+++ b/libs/partners/ollama/langchain_ollama/__init__.py
@@ -16,7 +16,6 @@ service.
 from importlib import metadata
 
 from langchain_ollama.chat_models import ChatOllama
-from langchain_ollama.chat_models_v1 import ChatOllama as ChatOllamaV1
 from langchain_ollama.embeddings import OllamaEmbeddings
 from langchain_ollama.llms import OllamaLLM
 
@@ -31,7 +30,6 @@ del metadata  # optional, avoids polluting the results of dir(__package__)
 
 __all__ = [
     "ChatOllama",
-    "ChatOllamaV1",
     "OllamaEmbeddings",
     "OllamaLLM",
     "__version__",
diff --git a/libs/partners/ollama/langchain_ollama/_compat.py b/libs/partners/ollama/langchain_ollama/_compat.py
index a1027c4541c..ac2cedf4d5b 100644
--- a/libs/partners/ollama/langchain_ollama/_compat.py
+++ b/libs/partners/ollama/langchain_ollama/_compat.py
@@ -1,4 +1,4 @@
-"""V1 message conversion utilities for Ollama."""
+"""LangChain v1 message conversion utilities for Ollama."""
 
 from __future__ import annotations
 
diff --git a/libs/partners/ollama/langchain_ollama/_utils.py b/libs/partners/ollama/langchain_ollama/_utils.py
index 3da9981621f..8d08ed87a41 100644
--- a/libs/partners/ollama/langchain_ollama/_utils.py
+++ b/libs/partners/ollama/langchain_ollama/_utils.py
@@ -1,11 +1,11 @@
-"""Utility functions for validating Ollama models."""
+"""Utility function to validate Ollama models."""
 
 from httpx import ConnectError
 from ollama import Client, ResponseError
 
 
 def validate_model(client: Client, model_name: str) -> None:
-    """Validate that a model exists in the Ollama instance.
+    """Validate that a model exists in the local Ollama instance.
 
     Args:
         client: The Ollama client.
diff --git a/libs/partners/ollama/langchain_ollama/v1/__init__.py b/libs/partners/ollama/langchain_ollama/v1/__init__.py
new file mode 100644
index 00000000000..b4440f2d095
--- /dev/null
+++ b/libs/partners/ollama/langchain_ollama/v1/__init__.py
@@ -0,0 +1,5 @@
+from langchain_ollama.v1.chat_models import (
+    ChatOllama,
+)
+
+__all__ = ["ChatOllama"]
diff --git a/libs/partners/ollama/langchain_ollama/v1/chat_models/__init__.py b/libs/partners/ollama/langchain_ollama/v1/chat_models/__init__.py
new file mode 100644
index 00000000000..d87e3c37866
--- /dev/null
+++ b/libs/partners/ollama/langchain_ollama/v1/chat_models/__init__.py
@@ -0,0 +1,5 @@
+from langchain_ollama.v1.chat_models.base import (
+    ChatOllama,
+)
+
+__all__ = ["ChatOllama"]
diff --git a/libs/partners/ollama/langchain_ollama/chat_models_v1.py b/libs/partners/ollama/langchain_ollama/v1/chat_models/base.py
similarity index 88%
rename from libs/partners/ollama/langchain_ollama/chat_models_v1.py
rename to libs/partners/ollama/langchain_ollama/v1/chat_models/base.py
index 54f6fc3ba34..e21bb38dad5 100644
--- a/libs/partners/ollama/langchain_ollama/chat_models_v1.py
+++ b/libs/partners/ollama/langchain_ollama/v1/chat_models/base.py
@@ -1,9 +1,9 @@
-"""Ollama chat model v1 implementation.
+"""v1 Ollama implementation.
 
-This implementation provides native support for v1 messages with structured
-content blocks.
+Provides native support for v1 messages with standard content blocks.
 
 .. versionadded:: 1.0.0
+
 """
 
 from __future__ import annotations
@@ -45,12 +45,12 @@ from pydantic.json_schema import JsonSchemaValue
 from pydantic.v1 import BaseModel as BaseModelV1
 from typing_extensions import Self, is_typeddict
 
-from ._compat import (
+from langchain_ollama._compat import (
     _convert_chunk_to_v1,
     _convert_from_v1_to_ollama_format,
     _convert_to_v1_from_ollama_format,
 )
-from ._utils import validate_model
+from langchain_ollama._utils import validate_model
 
 log = logging.getLogger(__name__)
 
@@ -116,7 +116,7 @@ def _parse_arguments_from_tool_call(
     Band-aid fix for issue in Ollama with inconsistent tool call argument structure.
     Should be removed/changed if fixed upstream.
 
-    See https://github.com/ollama/ollama/issues/6155
+    `See #6155 <https://github.com/ollama/ollama/issues/6155>`__.
 
     """
     if "function" not in raw_tool_call:
@@ -142,12 +142,6 @@ def _parse_arguments_from_tool_call(
     return parsed_arguments
 
 
-# Removed from v0:
-# - _get_tool_calls_from_response
-# _lc_tool_call_to_openai_tool_call
-# _get_image_from_data_content_block
-
-
 def _is_pydantic_class(obj: Any) -> bool:
     return isinstance(obj, type) and is_basemodel_subclass(obj)
 
@@ -193,7 +187,7 @@ class ChatOllama(BaseChatModel):
     Instantiate:
         .. code-block:: python
 
-            from langchain_ollama import ChatOllama
+            from langchain_ollama.v1 import ChatOllama
 
             llm = ChatOllama(
                 model = "llama3",
@@ -209,15 +203,13 @@ class ChatOllama(BaseChatModel):
             from langchain_core.messages.content_blocks import TextContentBlock
 
             messages = [
-                HumanMessage(content=[
-                    TextContentBlock(type="text", text="Hello!")
-                ])
+                HumanMessage("Hello!")
             ]
             llm.invoke(messages)
 
         .. code-block:: python
 
-            AIMessage(content=[{'type': 'text', 'text': 'Hello! How can I help you today?'}], response_metadata={'model': 'llama3', 'created_at': '2024-07-04T03:37:50.182604Z', 'done_reason': 'stop', 'done': True, 'total_duration': 3576619666, 'load_duration': 788524916, 'prompt_eval_count': 32, 'prompt_eval_duration': 128125000, 'eval_count': 71, 'eval_duration': 2656556000}, id='run-ba48f958-6402-41a5-b461-5e250a4ebd36-0')
+            AIMessage(content=[{'type': 'text', 'text': 'Hello! How can I help you today?'}], ...)
 
     Stream:
         .. code-block:: python
@@ -226,18 +218,16 @@ class ChatOllama(BaseChatModel):
             from langchain_core.messages.content_blocks import TextContentBlock
 
             messages = [
-                HumanMessage(content=[
-                    TextContentBlock(type="text", text="Return the words Hello World!")
-                ])
+                HumanMessage(Return the words Hello World!")
             ]
             for chunk in llm.stream(messages):
                 print(chunk.content, end="")
 
         .. code-block:: python
 
-            [{'type': 'text', 'text': 'Hello'}]
-            [{'type': 'text', 'text': ' World'}]
-            [{'type': 'text', 'text': '!'}]
+            AIMessageChunk(content=[{'type': 'text', 'text': 'Hello'}], ...)
+            AIMessageChunk(content=[{'type': 'text', 'text': ' World'}], ...)
+            AIMessageChunk(content=[{'type': 'text', 'text': '!'}], ...)
 
     Multi-modal input:
         .. code-block:: python
@@ -249,7 +239,6 @@ class ChatOllama(BaseChatModel):
                     TextContentBlock(type="text", text="Describe this image:"),
                     ImageContentBlock(
                         type="image",
-                        mime_type="image/jpeg",
                         base64="base64_encoded_image",
                     )
                 ])
@@ -258,7 +247,6 @@ class ChatOllama(BaseChatModel):
     Tool Calling:
         .. code-block:: python
 
-            from langchain_ollama import ChatOllama
             from pydantic import BaseModel, Field
 
             class Multiply(BaseModel):
@@ -267,20 +255,22 @@ class ChatOllama(BaseChatModel):
 
             llm_with_tools = llm.bind_tools([Multiply])
             ans = llm_with_tools.invoke([
-                HumanMessage(content=[
-                    TextContentBlock(type="text", text="What is 45*67")
-                ])
+                HumanMessage("What is 45*67")
             ])
             ans.tool_calls
 
         .. code-block:: python
 
-            [{'name': 'Multiply',
-            'args': {'a': 45, 'b': 67},
-            'id': '420c3f3b-df10-4188-945f-eb3abdb40622',
-            'type': 'tool_call'}]
+            [
+                {
+                    'name': 'Multiply',
+                    'args': {'a': 45, 'b': 67},
+                    'id': '420c3f3b-df10-4188-945f-eb3abdb40622',
+                    'type': 'tool_call'
+                }
+            ]
 
-    """  # noqa: E501, pylint: disable=line-too-long
+    """  # noqa: E501
 
     model: str
     """Model name to use."""
@@ -297,6 +287,7 @@ class ChatOllama(BaseChatModel):
       however, if the model's default behavior *is* to perform reasoning, think tags
       (``<think>`` and ``</think>``) will be present within the main response content
       unless you set ``reasoning`` to ``True``.
+
     """
 
     validate_model_on_init: bool = False
@@ -305,75 +296,126 @@ class ChatOllama(BaseChatModel):
     # Ollama-specific parameters
     mirostat: Optional[int] = None
     """Enable Mirostat sampling for controlling perplexity.
-    (default: ``0``, ``0`` = disabled, ``1`` = Mirostat, ``2`` = Mirostat 2.0)"""
+
+    (Default: ``0``, ``0`` = disabled, ``1`` = Mirostat, ``2`` = Mirostat 2.0)
+
+    """
 
     mirostat_eta: Optional[float] = None
-    """Influences how quickly the algorithm responds to feedback
-    from the generated text. A lower learning rate will result in
-    slower adjustments, while a higher learning rate will make
-    the algorithm more responsive. (Default: ``0.1``)"""
+    """Influences how quickly the algorithm responds to feedback from generated text.
+
+    A lower learning rate will result in slower adjustments, while a higher learning
+    rate will make the algorithm more responsive.
+
+    (Default: ``0.1``)
+
+    """
 
     mirostat_tau: Optional[float] = None
-    """Controls the balance between coherence and diversity
-    of the output. A lower value will result in more focused and
-    coherent text. (Default: ``5.0``)"""
+    """Controls the balance between coherence and diversity of the output.
+
+    A lower value will result in more focused and coherent text.
+
+    (Default: ``5.0``)
+
+    """
 
     num_ctx: Optional[int] = None
-    """Sets the size of the context window used to generate the
-    next token. (Default: ``2048``)	"""
+    """Sets the size of the context window used to generate the next token.
+
+    (Default: ``2048``)
+
+    """
 
     num_gpu: Optional[int] = None
-    """The number of GPUs to use. On macOS it defaults to ``1`` to
-    enable metal support, ``0`` to disable."""
+    """The number of GPUs to use.
+
+    On macOS it defaults to ``1`` to enable metal support, ``0`` to disable.
+
+    """
 
     num_thread: Optional[int] = None
     """Sets the number of threads to use during computation.
-    By default, Ollama will detect this for optimal performance.
-    It is recommended to set this value to the number of physical
-    CPU cores your system has (as opposed to the logical number of cores)."""
+
+    By default, Ollama will detect this for optimal performance. It is recommended to
+    set this value to the number of physical CPU cores your system has (as opposed to
+    the logical number of cores).
+
+    """
 
     num_predict: Optional[int] = None
     """Maximum number of tokens to predict when generating text.
-    (Default: ``128``, ``-1`` = infinite generation, ``-2`` = fill context)"""
+
+    (Default: ``128``, ``-1`` = infinite generation, ``-2`` = fill context)
+
+    """
 
     repeat_last_n: Optional[int] = None
-    """Sets how far back for the model to look back to prevent
-    repetition. (Default: ``64``, ``0`` = disabled, ``-1`` = ``num_ctx``)"""
+    """Sets how far back for the model to look back to prevent repetition.
+
+    (Default: ``64``, ``0`` = disabled, ``-1`` = ``num_ctx``)
+
+    """
 
     repeat_penalty: Optional[float] = None
-    """Sets how strongly to penalize repetitions. A higher value (e.g., ``1.5``)
-    will penalize repetitions more strongly, while a lower value (e.g., ``0.9``)
-    will be more lenient. (Default: ``1.1``)"""
+    """Sets how strongly to penalize repetitions.
+
+    A higher value (e.g., ``1.5``) will penalize repetitions more strongly, while a
+    lower value (e.g., ``0.9``) will be more lenient.
+
+    (Default: ``1.1``)
+
+    """
 
     temperature: Optional[float] = None
-    """The temperature of the model. Increasing the temperature will
-    make the model answer more creatively. (Default: ``0.8``)"""
+    """The temperature of the model.
+
+    Increasing the temperature will make the model answer more creatively.
+
+    (Default: ``0.8``)"""
 
     seed: Optional[int] = None
-    """Sets the random number seed to use for generation. Setting this
-    to a specific number will make the model generate the same text for
-    the same prompt."""
+    """Sets the random number seed to use for generation.
+
+    Setting this to a specific number will make the model generate the same text for the
+    same prompt.
+
+    """
 
     stop: Optional[list[str]] = None
     """Sets the stop tokens to use."""
 
     tfs_z: Optional[float] = None
-    """Tail free sampling is used to reduce the impact of less probable
-    tokens from the output. A higher value (e.g., ``2.0``) will reduce the
-    impact more, while a value of ``1.0`` disables this setting. (default: ``1``)"""
+    """Tail free sampling is used to reduce the impact of less probable tokens from the output.
+
+    A higher value (e.g., ``2.0``) will reduce the impact more, while a value of ``1.0`` disables this setting.
+
+    (Default: ``1``)
+
+    """  # noqa: E501
 
     top_k: Optional[int] = None
-    """Reduces the probability of generating nonsense. A higher value (e.g. ``100``)
-    will give more diverse answers, while a lower value (e.g. ``10``)
-    will be more conservative. (Default: ``40``)"""
+    """Reduces the probability of generating nonsense.
+
+    A higher value (e.g. ``100``) will give more diverse answers, while a lower value
+    (e.g. ``10``) will be more conservative.
+
+    (Default: ``40``)
+
+    """
 
     top_p: Optional[float] = None
-    """Works together with top-k. A higher value (e.g., ``0.95``) will lead
-    to more diverse text, while a lower value (e.g., ``0.5``) will
-    generate more focused and conservative text. (Default: ``0.9``)"""
+    """Works together with top-k.
+
+    A higher value (e.g., ``0.95``) will lead to more diverse text, while a lower value
+    (e.g., ``0.5``) will generate more focused and conservative text.
+
+    (Default: ``0.9``)
+
+    """
 
     format: Optional[Union[Literal["", "json"], JsonSchemaValue]] = None
-    """Specify the format of the output (options: ``'json'``, JSON schema)."""
+    """Specify the format of the output (Options: ``'json'``, JSON schema)."""
 
     keep_alive: Optional[Union[int, str]] = None
     """How long the model will stay loaded into memory."""
@@ -552,7 +594,7 @@ class ChatOllama(BaseChatModel):
                         and not part.get("message", {}).get("content", "").strip()
                     ):
                         log.warning(
-                            "Ollama returned empty response with done_reason='load'. "
+                            "Ollama returned empty response with `done_reason='load'`. "
                             "Skipping this response."
                         )
                         continue
@@ -574,7 +616,6 @@ class ChatOllama(BaseChatModel):
             # Non-streaming case
             response = self._client.chat(**chat_params)
             ai_message = _convert_to_v1_from_ollama_format(response)
-            # Convert to chunk for yielding
             chunk = AIMessageChunk(
                 content=ai_message.content,
                 response_metadata=ai_message.response_metadata,
@@ -602,7 +643,7 @@ class ChatOllama(BaseChatModel):
                         and not part.get("message", {}).get("content", "").strip()
                     ):
                         log.warning(
-                            "Ollama returned empty response with done_reason='load'. "
+                            "Ollama returned empty response with `done_reason='load'`. "
                             "Skipping this response."
                         )
                         continue
@@ -624,7 +665,6 @@ class ChatOllama(BaseChatModel):
             # Non-streaming case
             response = await self._async_client.chat(**chat_params)
             ai_message = _convert_to_v1_from_ollama_format(response)
-            # Convert to chunk for yielding
             chunk = AIMessageChunk(
                 content=ai_message.content,
                 response_metadata=ai_message.response_metadata,
@@ -649,6 +689,7 @@ class ChatOllama(BaseChatModel):
 
         Returns:
             Complete AI message response.
+
         """
         stream_iter = self._generate_stream(
             messages, stop=stop, run_manager=run_manager, **kwargs
@@ -672,6 +713,7 @@ class ChatOllama(BaseChatModel):
 
         Returns:
             Complete AI message response.
+
         """
         stream_iter = self._agenerate_stream(
             messages, stop=stop, run_manager=run_manager, **kwargs
@@ -695,6 +737,7 @@ class ChatOllama(BaseChatModel):
 
         Yields:
             AI message chunks in v1 format.
+
         """
         yield from self._generate_stream(
             messages, stop=stop, run_manager=run_manager, **kwargs
@@ -717,6 +760,7 @@ class ChatOllama(BaseChatModel):
 
         Yields:
             AI message chunks in v1 format.
+
         """
         async for chunk in self._agenerate_stream(
             messages, stop=stop, run_manager=run_manager, **kwargs
@@ -735,7 +779,8 @@ class ChatOllama(BaseChatModel):
         Args:
             tools: A list of tool definitions to bind to this chat model.
             tool_choice: Tool choice parameter (currently ignored by Ollama).
-            kwargs: Additional parameters passed to bind().
+            kwargs: Additional parameters passed to ``bind()``.
+
         """
         formatted_tools = [convert_to_openai_tool(tool) for tool in tools]
         return super().bind(tools=formatted_tools, **kwargs)
diff --git a/libs/partners/ollama/tests/integration_tests/test_chat_models_v1.py b/libs/partners/ollama/tests/integration_tests/test_chat_models_v1.py
index 7281c819df9..c00955a2044 100644
--- a/libs/partners/ollama/tests/integration_tests/test_chat_models_v1.py
+++ b/libs/partners/ollama/tests/integration_tests/test_chat_models_v1.py
@@ -12,7 +12,7 @@ import pytest
 from pydantic import BaseModel, Field
 from typing_extensions import TypedDict
 
-from langchain_ollama.chat_models_v1 import ChatOllama
+from langchain_ollama.v1.chat_models import ChatOllama
 
 DEFAULT_MODEL_NAME = "llama3.1"
 
diff --git a/libs/partners/ollama/tests/integration_tests/test_chat_models_v1_standard.py b/libs/partners/ollama/tests/integration_tests/test_chat_models_v1_standard.py
index b74254a628d..656e79579a9 100644
--- a/libs/partners/ollama/tests/integration_tests/test_chat_models_v1_standard.py
+++ b/libs/partners/ollama/tests/integration_tests/test_chat_models_v1_standard.py
@@ -12,7 +12,7 @@ from langchain_tests.integration_tests.chat_models_v1 import ChatModelV1Integrat
 from ollama import ResponseError
 from pydantic import ValidationError
 
-from langchain_ollama.chat_models_v1 import ChatOllama
+from langchain_ollama.v1.chat_models import ChatOllama
 
 DEFAULT_MODEL_NAME = "llama3.1"
 
@@ -251,7 +251,7 @@ class TestChatOllamaV1(ChatModelV1IntegrationTests):
                 f"Content blocks: {[block.get('type') for block in result.content]}"
             )
 
-    @patch("langchain_ollama.chat_models_v1.Client.list")
+    @patch("langchain_ollama.v1.chat_models.Client.list")
     def test_init_model_not_found(self, mock_list: MagicMock) -> None:
         """Test that a ValueError is raised when the model is not found."""
         mock_list.side_effect = ValueError("Test model not found")
@@ -259,7 +259,7 @@ class TestChatOllamaV1(ChatModelV1IntegrationTests):
             ChatOllama(model="non-existent-model", validate_model_on_init=True)
         assert "Test model not found" in str(excinfo.value)
 
-    @patch("langchain_ollama.chat_models_v1.Client.list")
+    @patch("langchain_ollama.v1.chat_models.Client.list")
     def test_init_connection_error(self, mock_list: MagicMock) -> None:
         """Test that a ValidationError is raised on connect failure during init."""
         mock_list.side_effect = ConnectError("Test connection error")
@@ -268,7 +268,7 @@ class TestChatOllamaV1(ChatModelV1IntegrationTests):
             ChatOllama(model="any-model", validate_model_on_init=True)
         assert "Failed to connect to Ollama" in str(excinfo.value)
 
-    @patch("langchain_ollama.chat_models_v1.Client.list")
+    @patch("langchain_ollama.v1.chat_models.Client.list")
     def test_init_response_error(self, mock_list: MagicMock) -> None:
         """Test that a ResponseError is raised."""
         mock_list.side_effect = ResponseError("Test response error")
diff --git a/libs/partners/ollama/tests/unit_tests/test_chat_models_v1.py b/libs/partners/ollama/tests/unit_tests/test_chat_models_v1.py
index 5788bf54b61..bbb830b8437 100644
--- a/libs/partners/ollama/tests/unit_tests/test_chat_models_v1.py
+++ b/libs/partners/ollama/tests/unit_tests/test_chat_models_v1.py
@@ -20,7 +20,7 @@ from langchain_ollama._compat import (
     _convert_from_v1_to_ollama_format,
     _convert_to_v1_from_ollama_format,
 )
-from langchain_ollama.chat_models_v1 import (
+from langchain_ollama.v1.chat_models import (
     ChatOllama,
     _parse_arguments_from_tool_call,
     _parse_json_string,
@@ -246,8 +246,8 @@ class TestChatOllama(ChatModelV1UnitTests):
     @pytest.fixture
     def model(self) -> Generator[ChatOllama, None, None]:  # type: ignore[override]
         """Create a ChatOllama instance for testing."""
-        sync_patcher = patch("langchain_ollama.chat_models_v1.Client")
-        async_patcher = patch("langchain_ollama.chat_models_v1.AsyncClient")
+        sync_patcher = patch("langchain_ollama.v1.chat_models.Client")
+        async_patcher = patch("langchain_ollama.v1.chat_models.AsyncClient")
 
         mock_sync_client_class = sync_patcher.start()
         mock_async_client_class = async_patcher.start()
@@ -328,8 +328,8 @@ class TestChatOllama(ChatModelV1UnitTests):
     def test_initialization(self) -> None:
         """Test `ChatOllama` initialization."""
         with (
-            patch("langchain_ollama.chat_models_v1.Client"),
-            patch("langchain_ollama.chat_models_v1.AsyncClient"),
+            patch("langchain_ollama.v1.chat_models.Client"),
+            patch("langchain_ollama.v1.chat_models.AsyncClient"),
         ):
             llm = ChatOllama(model=MODEL_NAME)
 
@@ -339,8 +339,8 @@ class TestChatOllama(ChatModelV1UnitTests):
     def test_chat_params(self) -> None:
         """Test `_chat_params()`."""
         with (
-            patch("langchain_ollama.chat_models_v1.Client"),
-            patch("langchain_ollama.chat_models_v1.AsyncClient"),
+            patch("langchain_ollama.v1.chat_models.Client"),
+            patch("langchain_ollama.v1.chat_models.AsyncClient"),
         ):
             llm = ChatOllama(model=MODEL_NAME, temperature=0.7)
 
@@ -359,8 +359,8 @@ class TestChatOllama(ChatModelV1UnitTests):
     def test_ls_params(self) -> None:
         """Test LangSmith parameters."""
         with (
-            patch("langchain_ollama.chat_models_v1.Client"),
-            patch("langchain_ollama.chat_models_v1.AsyncClient"),
+            patch("langchain_ollama.v1.chat_models.Client"),
+            patch("langchain_ollama.v1.chat_models.AsyncClient"),
         ):
             llm = ChatOllama(model=MODEL_NAME, temperature=0.5)
 
@@ -374,8 +374,8 @@ class TestChatOllama(ChatModelV1UnitTests):
     def test_bind_tools_basic(self) -> None:
         """Test basic tool binding functionality."""
         with (
-            patch("langchain_ollama.chat_models_v1.Client"),
-            patch("langchain_ollama.chat_models_v1.AsyncClient"),
+            patch("langchain_ollama.v1.chat_models.Client"),
+            patch("langchain_ollama.v1.chat_models.AsyncClient"),
         ):
             llm = ChatOllama(model=MODEL_NAME)
 
@@ -394,8 +394,8 @@ class TestChatOllama(ChatModelV1UnitTests):
 # But can be added if needed in the future.
 
 
-@patch("langchain_ollama.chat_models_v1.validate_model")
-@patch("langchain_ollama.chat_models_v1.Client")
+@patch("langchain_ollama.v1.chat_models.validate_model")
+@patch("langchain_ollama.v1.chat_models.Client")
 def test_validate_model_on_init(
     mock_client_class: Any, mock_validate_model: Any
 ) -> None:
@@ -501,7 +501,7 @@ def test_load_response_with_empty_content_is_skipped(
         }
     ]
 
-    with patch("langchain_ollama.chat_models_v1.Client") as mock_client_class:
+    with patch("langchain_ollama.v1.chat_models.Client") as mock_client_class:
         mock_client = MagicMock()
         mock_client_class.return_value = mock_client
         mock_client.chat.return_value = iter(load_only_response)
@@ -531,7 +531,7 @@ def test_load_response_with_whitespace_content_is_skipped(
         }
     ]
 
-    with patch("langchain_ollama.chat_models_v1.Client") as mock_client_class:
+    with patch("langchain_ollama.v1.chat_models.Client") as mock_client_class:
         mock_client = MagicMock()
         mock_client_class.return_value = mock_client
         mock_client.chat.return_value = iter(load_whitespace_response)
@@ -570,7 +570,7 @@ def test_load_followed_by_content_response(
         },
     ]
 
-    with patch("langchain_ollama.chat_models_v1.Client") as mock_client_class:
+    with patch("langchain_ollama.v1.chat_models.Client") as mock_client_class:
         mock_client = MagicMock()
         mock_client_class.return_value = mock_client
         mock_client.chat.return_value = iter(load_then_content_response)
@@ -600,7 +600,7 @@ def test_load_response_with_actual_content_is_not_skipped(
         }
     ]
 
-    with patch("langchain_ollama.chat_models_v1.Client") as mock_client_class:
+    with patch("langchain_ollama.v1.chat_models.Client") as mock_client_class:
         mock_client = MagicMock()
         mock_client_class.return_value = mock_client
         mock_client.chat.return_value = iter(load_with_content_response)