nits & namespace update for ollama

This commit is contained in:
Mason Daugherty 2025-08-06 12:19:28 -04:00
parent 821527b97a
commit e18e2c13ce
No known key found for this signature in database
9 changed files with 154 additions and 101 deletions

View File

@ -16,7 +16,6 @@ service.
from importlib import metadata from importlib import metadata
from langchain_ollama.chat_models import ChatOllama from langchain_ollama.chat_models import ChatOllama
from langchain_ollama.chat_models_v1 import ChatOllama as ChatOllamaV1
from langchain_ollama.embeddings import OllamaEmbeddings from langchain_ollama.embeddings import OllamaEmbeddings
from langchain_ollama.llms import OllamaLLM from langchain_ollama.llms import OllamaLLM
@ -31,7 +30,6 @@ del metadata # optional, avoids polluting the results of dir(__package__)
__all__ = [ __all__ = [
"ChatOllama", "ChatOllama",
"ChatOllamaV1",
"OllamaEmbeddings", "OllamaEmbeddings",
"OllamaLLM", "OllamaLLM",
"__version__", "__version__",

View File

@ -1,4 +1,4 @@
"""V1 message conversion utilities for Ollama.""" """LangChain v1 message conversion utilities for Ollama."""
from __future__ import annotations from __future__ import annotations

View File

@ -1,11 +1,11 @@
"""Utility functions for validating Ollama models.""" """Utility function to validate Ollama models."""
from httpx import ConnectError from httpx import ConnectError
from ollama import Client, ResponseError from ollama import Client, ResponseError
def validate_model(client: Client, model_name: str) -> None: def validate_model(client: Client, model_name: str) -> None:
"""Validate that a model exists in the Ollama instance. """Validate that a model exists in the local Ollama instance.
Args: Args:
client: The Ollama client. client: The Ollama client.

View File

@ -0,0 +1,5 @@
from langchain_ollama.v1.chat_models import (
ChatOllama,
)
__all__ = ["ChatOllama"]

View File

@ -0,0 +1,5 @@
from langchain_ollama.v1.chat_models.base import (
ChatOllama,
)
__all__ = ["ChatOllama"]

View File

@ -1,9 +1,9 @@
"""Ollama chat model v1 implementation. """v1 Ollama implementation.
This implementation provides native support for v1 messages with structured Provides native support for v1 messages with standard content blocks.
content blocks.
.. versionadded:: 1.0.0 .. versionadded:: 1.0.0
""" """
from __future__ import annotations from __future__ import annotations
@ -45,12 +45,12 @@ from pydantic.json_schema import JsonSchemaValue
from pydantic.v1 import BaseModel as BaseModelV1 from pydantic.v1 import BaseModel as BaseModelV1
from typing_extensions import Self, is_typeddict from typing_extensions import Self, is_typeddict
from ._compat import ( from langchain_ollama._compat import (
_convert_chunk_to_v1, _convert_chunk_to_v1,
_convert_from_v1_to_ollama_format, _convert_from_v1_to_ollama_format,
_convert_to_v1_from_ollama_format, _convert_to_v1_from_ollama_format,
) )
from ._utils import validate_model from langchain_ollama._utils import validate_model
log = logging.getLogger(__name__) log = logging.getLogger(__name__)
@ -116,7 +116,7 @@ def _parse_arguments_from_tool_call(
Band-aid fix for issue in Ollama with inconsistent tool call argument structure. Band-aid fix for issue in Ollama with inconsistent tool call argument structure.
Should be removed/changed if fixed upstream. Should be removed/changed if fixed upstream.
See https://github.com/ollama/ollama/issues/6155 `See #6155 <https://github.com/ollama/ollama/issues/6155>`__.
""" """
if "function" not in raw_tool_call: if "function" not in raw_tool_call:
@ -142,12 +142,6 @@ def _parse_arguments_from_tool_call(
return parsed_arguments return parsed_arguments
# Removed from v0:
# - _get_tool_calls_from_response
# _lc_tool_call_to_openai_tool_call
# _get_image_from_data_content_block
def _is_pydantic_class(obj: Any) -> bool: def _is_pydantic_class(obj: Any) -> bool:
return isinstance(obj, type) and is_basemodel_subclass(obj) return isinstance(obj, type) and is_basemodel_subclass(obj)
@ -193,7 +187,7 @@ class ChatOllama(BaseChatModel):
Instantiate: Instantiate:
.. code-block:: python .. code-block:: python
from langchain_ollama import ChatOllama from langchain_ollama.v1 import ChatOllama
llm = ChatOllama( llm = ChatOllama(
model = "llama3", model = "llama3",
@ -209,15 +203,13 @@ class ChatOllama(BaseChatModel):
from langchain_core.messages.content_blocks import TextContentBlock from langchain_core.messages.content_blocks import TextContentBlock
messages = [ messages = [
HumanMessage(content=[ HumanMessage("Hello!")
TextContentBlock(type="text", text="Hello!")
])
] ]
llm.invoke(messages) llm.invoke(messages)
.. code-block:: python .. code-block:: python
AIMessage(content=[{'type': 'text', 'text': 'Hello! How can I help you today?'}], response_metadata={'model': 'llama3', 'created_at': '2024-07-04T03:37:50.182604Z', 'done_reason': 'stop', 'done': True, 'total_duration': 3576619666, 'load_duration': 788524916, 'prompt_eval_count': 32, 'prompt_eval_duration': 128125000, 'eval_count': 71, 'eval_duration': 2656556000}, id='run-ba48f958-6402-41a5-b461-5e250a4ebd36-0') AIMessage(content=[{'type': 'text', 'text': 'Hello! How can I help you today?'}], ...)
Stream: Stream:
.. code-block:: python .. code-block:: python
@ -226,18 +218,16 @@ class ChatOllama(BaseChatModel):
from langchain_core.messages.content_blocks import TextContentBlock from langchain_core.messages.content_blocks import TextContentBlock
messages = [ messages = [
HumanMessage(content=[ HumanMessage(Return the words Hello World!")
TextContentBlock(type="text", text="Return the words Hello World!")
])
] ]
for chunk in llm.stream(messages): for chunk in llm.stream(messages):
print(chunk.content, end="") print(chunk.content, end="")
.. code-block:: python .. code-block:: python
[{'type': 'text', 'text': 'Hello'}] AIMessageChunk(content=[{'type': 'text', 'text': 'Hello'}], ...)
[{'type': 'text', 'text': ' World'}] AIMessageChunk(content=[{'type': 'text', 'text': ' World'}], ...)
[{'type': 'text', 'text': '!'}] AIMessageChunk(content=[{'type': 'text', 'text': '!'}], ...)
Multi-modal input: Multi-modal input:
.. code-block:: python .. code-block:: python
@ -249,7 +239,6 @@ class ChatOllama(BaseChatModel):
TextContentBlock(type="text", text="Describe this image:"), TextContentBlock(type="text", text="Describe this image:"),
ImageContentBlock( ImageContentBlock(
type="image", type="image",
mime_type="image/jpeg",
base64="base64_encoded_image", base64="base64_encoded_image",
) )
]) ])
@ -258,7 +247,6 @@ class ChatOllama(BaseChatModel):
Tool Calling: Tool Calling:
.. code-block:: python .. code-block:: python
from langchain_ollama import ChatOllama
from pydantic import BaseModel, Field from pydantic import BaseModel, Field
class Multiply(BaseModel): class Multiply(BaseModel):
@ -267,20 +255,22 @@ class ChatOllama(BaseChatModel):
llm_with_tools = llm.bind_tools([Multiply]) llm_with_tools = llm.bind_tools([Multiply])
ans = llm_with_tools.invoke([ ans = llm_with_tools.invoke([
HumanMessage(content=[ HumanMessage("What is 45*67")
TextContentBlock(type="text", text="What is 45*67")
])
]) ])
ans.tool_calls ans.tool_calls
.. code-block:: python .. code-block:: python
[{'name': 'Multiply', [
'args': {'a': 45, 'b': 67}, {
'id': '420c3f3b-df10-4188-945f-eb3abdb40622', 'name': 'Multiply',
'type': 'tool_call'}] 'args': {'a': 45, 'b': 67},
'id': '420c3f3b-df10-4188-945f-eb3abdb40622',
'type': 'tool_call'
}
]
""" # noqa: E501, pylint: disable=line-too-long """ # noqa: E501
model: str model: str
"""Model name to use.""" """Model name to use."""
@ -297,6 +287,7 @@ class ChatOllama(BaseChatModel):
however, if the model's default behavior *is* to perform reasoning, think tags however, if the model's default behavior *is* to perform reasoning, think tags
(``<think>`` and ``</think>``) will be present within the main response content (``<think>`` and ``</think>``) will be present within the main response content
unless you set ``reasoning`` to ``True``. unless you set ``reasoning`` to ``True``.
""" """
validate_model_on_init: bool = False validate_model_on_init: bool = False
@ -305,75 +296,126 @@ class ChatOllama(BaseChatModel):
# Ollama-specific parameters # Ollama-specific parameters
mirostat: Optional[int] = None mirostat: Optional[int] = None
"""Enable Mirostat sampling for controlling perplexity. """Enable Mirostat sampling for controlling perplexity.
(default: ``0``, ``0`` = disabled, ``1`` = Mirostat, ``2`` = Mirostat 2.0)"""
(Default: ``0``, ``0`` = disabled, ``1`` = Mirostat, ``2`` = Mirostat 2.0)
"""
mirostat_eta: Optional[float] = None mirostat_eta: Optional[float] = None
"""Influences how quickly the algorithm responds to feedback """Influences how quickly the algorithm responds to feedback from generated text.
from the generated text. A lower learning rate will result in
slower adjustments, while a higher learning rate will make A lower learning rate will result in slower adjustments, while a higher learning
the algorithm more responsive. (Default: ``0.1``)""" rate will make the algorithm more responsive.
(Default: ``0.1``)
"""
mirostat_tau: Optional[float] = None mirostat_tau: Optional[float] = None
"""Controls the balance between coherence and diversity """Controls the balance between coherence and diversity of the output.
of the output. A lower value will result in more focused and
coherent text. (Default: ``5.0``)""" A lower value will result in more focused and coherent text.
(Default: ``5.0``)
"""
num_ctx: Optional[int] = None num_ctx: Optional[int] = None
"""Sets the size of the context window used to generate the """Sets the size of the context window used to generate the next token.
next token. (Default: ``2048``) """
(Default: ``2048``)
"""
num_gpu: Optional[int] = None num_gpu: Optional[int] = None
"""The number of GPUs to use. On macOS it defaults to ``1`` to """The number of GPUs to use.
enable metal support, ``0`` to disable."""
On macOS it defaults to ``1`` to enable metal support, ``0`` to disable.
"""
num_thread: Optional[int] = None num_thread: Optional[int] = None
"""Sets the number of threads to use during computation. """Sets the number of threads to use during computation.
By default, Ollama will detect this for optimal performance.
It is recommended to set this value to the number of physical By default, Ollama will detect this for optimal performance. It is recommended to
CPU cores your system has (as opposed to the logical number of cores).""" set this value to the number of physical CPU cores your system has (as opposed to
the logical number of cores).
"""
num_predict: Optional[int] = None num_predict: Optional[int] = None
"""Maximum number of tokens to predict when generating text. """Maximum number of tokens to predict when generating text.
(Default: ``128``, ``-1`` = infinite generation, ``-2`` = fill context)"""
(Default: ``128``, ``-1`` = infinite generation, ``-2`` = fill context)
"""
repeat_last_n: Optional[int] = None repeat_last_n: Optional[int] = None
"""Sets how far back for the model to look back to prevent """Sets how far back for the model to look back to prevent repetition.
repetition. (Default: ``64``, ``0`` = disabled, ``-1`` = ``num_ctx``)"""
(Default: ``64``, ``0`` = disabled, ``-1`` = ``num_ctx``)
"""
repeat_penalty: Optional[float] = None repeat_penalty: Optional[float] = None
"""Sets how strongly to penalize repetitions. A higher value (e.g., ``1.5``) """Sets how strongly to penalize repetitions.
will penalize repetitions more strongly, while a lower value (e.g., ``0.9``)
will be more lenient. (Default: ``1.1``)""" A higher value (e.g., ``1.5``) will penalize repetitions more strongly, while a
lower value (e.g., ``0.9``) will be more lenient.
(Default: ``1.1``)
"""
temperature: Optional[float] = None temperature: Optional[float] = None
"""The temperature of the model. Increasing the temperature will """The temperature of the model.
make the model answer more creatively. (Default: ``0.8``)"""
Increasing the temperature will make the model answer more creatively.
(Default: ``0.8``)"""
seed: Optional[int] = None seed: Optional[int] = None
"""Sets the random number seed to use for generation. Setting this """Sets the random number seed to use for generation.
to a specific number will make the model generate the same text for
the same prompt.""" Setting this to a specific number will make the model generate the same text for the
same prompt.
"""
stop: Optional[list[str]] = None stop: Optional[list[str]] = None
"""Sets the stop tokens to use.""" """Sets the stop tokens to use."""
tfs_z: Optional[float] = None tfs_z: Optional[float] = None
"""Tail free sampling is used to reduce the impact of less probable """Tail free sampling is used to reduce the impact of less probable tokens from the output.
tokens from the output. A higher value (e.g., ``2.0``) will reduce the
impact more, while a value of ``1.0`` disables this setting. (default: ``1``)""" A higher value (e.g., ``2.0``) will reduce the impact more, while a value of ``1.0`` disables this setting.
(Default: ``1``)
""" # noqa: E501
top_k: Optional[int] = None top_k: Optional[int] = None
"""Reduces the probability of generating nonsense. A higher value (e.g. ``100``) """Reduces the probability of generating nonsense.
will give more diverse answers, while a lower value (e.g. ``10``)
will be more conservative. (Default: ``40``)""" A higher value (e.g. ``100``) will give more diverse answers, while a lower value
(e.g. ``10``) will be more conservative.
(Default: ``40``)
"""
top_p: Optional[float] = None top_p: Optional[float] = None
"""Works together with top-k. A higher value (e.g., ``0.95``) will lead """Works together with top-k.
to more diverse text, while a lower value (e.g., ``0.5``) will
generate more focused and conservative text. (Default: ``0.9``)""" A higher value (e.g., ``0.95``) will lead to more diverse text, while a lower value
(e.g., ``0.5``) will generate more focused and conservative text.
(Default: ``0.9``)
"""
format: Optional[Union[Literal["", "json"], JsonSchemaValue]] = None format: Optional[Union[Literal["", "json"], JsonSchemaValue]] = None
"""Specify the format of the output (options: ``'json'``, JSON schema).""" """Specify the format of the output (Options: ``'json'``, JSON schema)."""
keep_alive: Optional[Union[int, str]] = None keep_alive: Optional[Union[int, str]] = None
"""How long the model will stay loaded into memory.""" """How long the model will stay loaded into memory."""
@ -552,7 +594,7 @@ class ChatOllama(BaseChatModel):
and not part.get("message", {}).get("content", "").strip() and not part.get("message", {}).get("content", "").strip()
): ):
log.warning( log.warning(
"Ollama returned empty response with done_reason='load'. " "Ollama returned empty response with `done_reason='load'`. "
"Skipping this response." "Skipping this response."
) )
continue continue
@ -574,7 +616,6 @@ class ChatOllama(BaseChatModel):
# Non-streaming case # Non-streaming case
response = self._client.chat(**chat_params) response = self._client.chat(**chat_params)
ai_message = _convert_to_v1_from_ollama_format(response) ai_message = _convert_to_v1_from_ollama_format(response)
# Convert to chunk for yielding
chunk = AIMessageChunk( chunk = AIMessageChunk(
content=ai_message.content, content=ai_message.content,
response_metadata=ai_message.response_metadata, response_metadata=ai_message.response_metadata,
@ -602,7 +643,7 @@ class ChatOllama(BaseChatModel):
and not part.get("message", {}).get("content", "").strip() and not part.get("message", {}).get("content", "").strip()
): ):
log.warning( log.warning(
"Ollama returned empty response with done_reason='load'. " "Ollama returned empty response with `done_reason='load'`. "
"Skipping this response." "Skipping this response."
) )
continue continue
@ -624,7 +665,6 @@ class ChatOllama(BaseChatModel):
# Non-streaming case # Non-streaming case
response = await self._async_client.chat(**chat_params) response = await self._async_client.chat(**chat_params)
ai_message = _convert_to_v1_from_ollama_format(response) ai_message = _convert_to_v1_from_ollama_format(response)
# Convert to chunk for yielding
chunk = AIMessageChunk( chunk = AIMessageChunk(
content=ai_message.content, content=ai_message.content,
response_metadata=ai_message.response_metadata, response_metadata=ai_message.response_metadata,
@ -649,6 +689,7 @@ class ChatOllama(BaseChatModel):
Returns: Returns:
Complete AI message response. Complete AI message response.
""" """
stream_iter = self._generate_stream( stream_iter = self._generate_stream(
messages, stop=stop, run_manager=run_manager, **kwargs messages, stop=stop, run_manager=run_manager, **kwargs
@ -672,6 +713,7 @@ class ChatOllama(BaseChatModel):
Returns: Returns:
Complete AI message response. Complete AI message response.
""" """
stream_iter = self._agenerate_stream( stream_iter = self._agenerate_stream(
messages, stop=stop, run_manager=run_manager, **kwargs messages, stop=stop, run_manager=run_manager, **kwargs
@ -695,6 +737,7 @@ class ChatOllama(BaseChatModel):
Yields: Yields:
AI message chunks in v1 format. AI message chunks in v1 format.
""" """
yield from self._generate_stream( yield from self._generate_stream(
messages, stop=stop, run_manager=run_manager, **kwargs messages, stop=stop, run_manager=run_manager, **kwargs
@ -717,6 +760,7 @@ class ChatOllama(BaseChatModel):
Yields: Yields:
AI message chunks in v1 format. AI message chunks in v1 format.
""" """
async for chunk in self._agenerate_stream( async for chunk in self._agenerate_stream(
messages, stop=stop, run_manager=run_manager, **kwargs messages, stop=stop, run_manager=run_manager, **kwargs
@ -735,7 +779,8 @@ class ChatOllama(BaseChatModel):
Args: Args:
tools: A list of tool definitions to bind to this chat model. tools: A list of tool definitions to bind to this chat model.
tool_choice: Tool choice parameter (currently ignored by Ollama). tool_choice: Tool choice parameter (currently ignored by Ollama).
kwargs: Additional parameters passed to bind(). kwargs: Additional parameters passed to ``bind()``.
""" """
formatted_tools = [convert_to_openai_tool(tool) for tool in tools] formatted_tools = [convert_to_openai_tool(tool) for tool in tools]
return super().bind(tools=formatted_tools, **kwargs) return super().bind(tools=formatted_tools, **kwargs)

View File

@ -12,7 +12,7 @@ import pytest
from pydantic import BaseModel, Field from pydantic import BaseModel, Field
from typing_extensions import TypedDict from typing_extensions import TypedDict
from langchain_ollama.chat_models_v1 import ChatOllama from langchain_ollama.v1.chat_models import ChatOllama
DEFAULT_MODEL_NAME = "llama3.1" DEFAULT_MODEL_NAME = "llama3.1"

View File

@ -12,7 +12,7 @@ from langchain_tests.integration_tests.chat_models_v1 import ChatModelV1Integrat
from ollama import ResponseError from ollama import ResponseError
from pydantic import ValidationError from pydantic import ValidationError
from langchain_ollama.chat_models_v1 import ChatOllama from langchain_ollama.v1.chat_models import ChatOllama
DEFAULT_MODEL_NAME = "llama3.1" DEFAULT_MODEL_NAME = "llama3.1"
@ -251,7 +251,7 @@ class TestChatOllamaV1(ChatModelV1IntegrationTests):
f"Content blocks: {[block.get('type') for block in result.content]}" f"Content blocks: {[block.get('type') for block in result.content]}"
) )
@patch("langchain_ollama.chat_models_v1.Client.list") @patch("langchain_ollama.v1.chat_models.Client.list")
def test_init_model_not_found(self, mock_list: MagicMock) -> None: def test_init_model_not_found(self, mock_list: MagicMock) -> None:
"""Test that a ValueError is raised when the model is not found.""" """Test that a ValueError is raised when the model is not found."""
mock_list.side_effect = ValueError("Test model not found") mock_list.side_effect = ValueError("Test model not found")
@ -259,7 +259,7 @@ class TestChatOllamaV1(ChatModelV1IntegrationTests):
ChatOllama(model="non-existent-model", validate_model_on_init=True) ChatOllama(model="non-existent-model", validate_model_on_init=True)
assert "Test model not found" in str(excinfo.value) assert "Test model not found" in str(excinfo.value)
@patch("langchain_ollama.chat_models_v1.Client.list") @patch("langchain_ollama.v1.chat_models.Client.list")
def test_init_connection_error(self, mock_list: MagicMock) -> None: def test_init_connection_error(self, mock_list: MagicMock) -> None:
"""Test that a ValidationError is raised on connect failure during init.""" """Test that a ValidationError is raised on connect failure during init."""
mock_list.side_effect = ConnectError("Test connection error") mock_list.side_effect = ConnectError("Test connection error")
@ -268,7 +268,7 @@ class TestChatOllamaV1(ChatModelV1IntegrationTests):
ChatOllama(model="any-model", validate_model_on_init=True) ChatOllama(model="any-model", validate_model_on_init=True)
assert "Failed to connect to Ollama" in str(excinfo.value) assert "Failed to connect to Ollama" in str(excinfo.value)
@patch("langchain_ollama.chat_models_v1.Client.list") @patch("langchain_ollama.v1.chat_models.Client.list")
def test_init_response_error(self, mock_list: MagicMock) -> None: def test_init_response_error(self, mock_list: MagicMock) -> None:
"""Test that a ResponseError is raised.""" """Test that a ResponseError is raised."""
mock_list.side_effect = ResponseError("Test response error") mock_list.side_effect = ResponseError("Test response error")

View File

@ -20,7 +20,7 @@ from langchain_ollama._compat import (
_convert_from_v1_to_ollama_format, _convert_from_v1_to_ollama_format,
_convert_to_v1_from_ollama_format, _convert_to_v1_from_ollama_format,
) )
from langchain_ollama.chat_models_v1 import ( from langchain_ollama.v1.chat_models import (
ChatOllama, ChatOllama,
_parse_arguments_from_tool_call, _parse_arguments_from_tool_call,
_parse_json_string, _parse_json_string,
@ -246,8 +246,8 @@ class TestChatOllama(ChatModelV1UnitTests):
@pytest.fixture @pytest.fixture
def model(self) -> Generator[ChatOllama, None, None]: # type: ignore[override] def model(self) -> Generator[ChatOllama, None, None]: # type: ignore[override]
"""Create a ChatOllama instance for testing.""" """Create a ChatOllama instance for testing."""
sync_patcher = patch("langchain_ollama.chat_models_v1.Client") sync_patcher = patch("langchain_ollama.v1.chat_models.Client")
async_patcher = patch("langchain_ollama.chat_models_v1.AsyncClient") async_patcher = patch("langchain_ollama.v1.chat_models.AsyncClient")
mock_sync_client_class = sync_patcher.start() mock_sync_client_class = sync_patcher.start()
mock_async_client_class = async_patcher.start() mock_async_client_class = async_patcher.start()
@ -328,8 +328,8 @@ class TestChatOllama(ChatModelV1UnitTests):
def test_initialization(self) -> None: def test_initialization(self) -> None:
"""Test `ChatOllama` initialization.""" """Test `ChatOllama` initialization."""
with ( with (
patch("langchain_ollama.chat_models_v1.Client"), patch("langchain_ollama.v1.chat_models.Client"),
patch("langchain_ollama.chat_models_v1.AsyncClient"), patch("langchain_ollama.v1.chat_models.AsyncClient"),
): ):
llm = ChatOllama(model=MODEL_NAME) llm = ChatOllama(model=MODEL_NAME)
@ -339,8 +339,8 @@ class TestChatOllama(ChatModelV1UnitTests):
def test_chat_params(self) -> None: def test_chat_params(self) -> None:
"""Test `_chat_params()`.""" """Test `_chat_params()`."""
with ( with (
patch("langchain_ollama.chat_models_v1.Client"), patch("langchain_ollama.v1.chat_models.Client"),
patch("langchain_ollama.chat_models_v1.AsyncClient"), patch("langchain_ollama.v1.chat_models.AsyncClient"),
): ):
llm = ChatOllama(model=MODEL_NAME, temperature=0.7) llm = ChatOllama(model=MODEL_NAME, temperature=0.7)
@ -359,8 +359,8 @@ class TestChatOllama(ChatModelV1UnitTests):
def test_ls_params(self) -> None: def test_ls_params(self) -> None:
"""Test LangSmith parameters.""" """Test LangSmith parameters."""
with ( with (
patch("langchain_ollama.chat_models_v1.Client"), patch("langchain_ollama.v1.chat_models.Client"),
patch("langchain_ollama.chat_models_v1.AsyncClient"), patch("langchain_ollama.v1.chat_models.AsyncClient"),
): ):
llm = ChatOllama(model=MODEL_NAME, temperature=0.5) llm = ChatOllama(model=MODEL_NAME, temperature=0.5)
@ -374,8 +374,8 @@ class TestChatOllama(ChatModelV1UnitTests):
def test_bind_tools_basic(self) -> None: def test_bind_tools_basic(self) -> None:
"""Test basic tool binding functionality.""" """Test basic tool binding functionality."""
with ( with (
patch("langchain_ollama.chat_models_v1.Client"), patch("langchain_ollama.v1.chat_models.Client"),
patch("langchain_ollama.chat_models_v1.AsyncClient"), patch("langchain_ollama.v1.chat_models.AsyncClient"),
): ):
llm = ChatOllama(model=MODEL_NAME) llm = ChatOllama(model=MODEL_NAME)
@ -394,8 +394,8 @@ class TestChatOllama(ChatModelV1UnitTests):
# But can be added if needed in the future. # But can be added if needed in the future.
@patch("langchain_ollama.chat_models_v1.validate_model") @patch("langchain_ollama.v1.chat_models.validate_model")
@patch("langchain_ollama.chat_models_v1.Client") @patch("langchain_ollama.v1.chat_models.Client")
def test_validate_model_on_init( def test_validate_model_on_init(
mock_client_class: Any, mock_validate_model: Any mock_client_class: Any, mock_validate_model: Any
) -> None: ) -> None:
@ -501,7 +501,7 @@ def test_load_response_with_empty_content_is_skipped(
} }
] ]
with patch("langchain_ollama.chat_models_v1.Client") as mock_client_class: with patch("langchain_ollama.v1.chat_models.Client") as mock_client_class:
mock_client = MagicMock() mock_client = MagicMock()
mock_client_class.return_value = mock_client mock_client_class.return_value = mock_client
mock_client.chat.return_value = iter(load_only_response) mock_client.chat.return_value = iter(load_only_response)
@ -531,7 +531,7 @@ def test_load_response_with_whitespace_content_is_skipped(
} }
] ]
with patch("langchain_ollama.chat_models_v1.Client") as mock_client_class: with patch("langchain_ollama.v1.chat_models.Client") as mock_client_class:
mock_client = MagicMock() mock_client = MagicMock()
mock_client_class.return_value = mock_client mock_client_class.return_value = mock_client
mock_client.chat.return_value = iter(load_whitespace_response) mock_client.chat.return_value = iter(load_whitespace_response)
@ -570,7 +570,7 @@ def test_load_followed_by_content_response(
}, },
] ]
with patch("langchain_ollama.chat_models_v1.Client") as mock_client_class: with patch("langchain_ollama.v1.chat_models.Client") as mock_client_class:
mock_client = MagicMock() mock_client = MagicMock()
mock_client_class.return_value = mock_client mock_client_class.return_value = mock_client
mock_client.chat.return_value = iter(load_then_content_response) mock_client.chat.return_value = iter(load_then_content_response)
@ -600,7 +600,7 @@ def test_load_response_with_actual_content_is_not_skipped(
} }
] ]
with patch("langchain_ollama.chat_models_v1.Client") as mock_client_class: with patch("langchain_ollama.v1.chat_models.Client") as mock_client_class:
mock_client = MagicMock() mock_client = MagicMock()
mock_client_class.return_value = mock_client mock_client_class.return_value = mock_client
mock_client.chat.return_value = iter(load_with_content_response) mock_client.chat.return_value = iter(load_with_content_response)