docs nits

This commit is contained in:
Mason Daugherty 2025-08-04 14:46:10 -04:00
parent ade2155aee
commit 3f11b041df
No known key found for this signature in database
4 changed files with 76 additions and 68 deletions

View File

@ -29,7 +29,10 @@ def validate_model(client: Client, model_name: str) -> None:
) )
raise ValueError(msg) raise ValueError(msg)
except ConnectError as e: except ConnectError as e:
msg = "Failed to connect to Ollama. Please check that Ollama is downloaded, running and accessible. https://ollama.com/download" # noqa: E501 msg = (
"Failed to connect to Ollama. Please check that Ollama is downloaded, "
"running and accessible. https://ollama.com/download"
)
raise ValueError(msg) from e raise ValueError(msg) from e
except ResponseError as e: except ResponseError as e:
msg = ( msg = (

View File

@ -482,7 +482,7 @@ class ChatOllama(BaseChatModel):
repeat_last_n: Optional[int] = None repeat_last_n: Optional[int] = None
"""Sets how far back for the model to look back to prevent """Sets how far back for the model to look back to prevent
repetition. (Default: ``64``, ``0`` = disabled, ``-1`` = num_ctx)""" repetition. (Default: ``64``, ``0`` = disabled, ``-1`` = ``num_ctx``)"""
repeat_penalty: Optional[float] = None repeat_penalty: Optional[float] = None
"""Sets how strongly to penalize repetitions. A higher value (e.g., ``1.5``) """Sets how strongly to penalize repetitions. A higher value (e.g., ``1.5``)

View File

@ -21,12 +21,12 @@ class OllamaEmbeddings(BaseModel, Embeddings):
"""Ollama embedding model integration. """Ollama embedding model integration.
Set up a local Ollama instance: Set up a local Ollama instance:
Install the Ollama package and set up a local Ollama instance `Install the Ollama package <https://github.com/ollama/ollama>`__ and set up a
using the instructions here: https://github.com/ollama/ollama . local Ollama instance.
You will need to choose a model to serve. You will need to choose a model to serve.
You can view a list of available models via the model library (https://ollama.com/library). You can view a list of available models via `the model library <https://ollama.com/library>`__.
To fetch a model from the Ollama model library use ``ollama pull <name-of-model>``. To fetch a model from the Ollama model library use ``ollama pull <name-of-model>``.
@ -39,8 +39,8 @@ class OllamaEmbeddings(BaseModel, Embeddings):
This will download the default tagged version of the model. This will download the default tagged version of the model.
Typically, the default points to the latest, smallest sized-parameter model. Typically, the default points to the latest, smallest sized-parameter model.
* On Mac, the models will be downloaded to ~/.ollama/models * On Mac, the models will be downloaded to ``~/.ollama/models``
* On Linux (or WSL), the models will be stored at /usr/share/ollama/.ollama/models * On Linux (or WSL), the models will be stored at ``/usr/share/ollama/.ollama/models``
You can specify the exact version of the model of interest You can specify the exact version of the model of interest
as such ``ollama pull vicuna:13b-v1.5-16k-q4_0``. as such ``ollama pull vicuna:13b-v1.5-16k-q4_0``.
@ -132,6 +132,7 @@ class OllamaEmbeddings(BaseModel, Embeddings):
"""Whether to validate the model exists in ollama locally on initialization. """Whether to validate the model exists in ollama locally on initialization.
.. versionadded:: 0.3.4 .. versionadded:: 0.3.4
""" """
base_url: Optional[str] = None base_url: Optional[str] = None
@ -139,60 +140,62 @@ class OllamaEmbeddings(BaseModel, Embeddings):
client_kwargs: Optional[dict] = {} client_kwargs: Optional[dict] = {}
"""Additional kwargs to pass to the httpx clients. """Additional kwargs to pass to the httpx clients.
These arguments are passed to both synchronous and async clients. These arguments are passed to both synchronous and async clients.
Use sync_client_kwargs and async_client_kwargs to pass different arguments
Use ``sync_client_kwargs`` and ``async_client_kwargs`` to pass different arguments
to synchronous and asynchronous clients. to synchronous and asynchronous clients.
""" """
async_client_kwargs: Optional[dict] = {} async_client_kwargs: Optional[dict] = {}
"""Additional kwargs to merge with client_kwargs before passing to the httpx """Additional kwargs to merge with ``client_kwargs`` before passing to the httpx
AsyncClient. AsyncClient.
For a full list of the params, see the `HTTPX documentation <https://www.python-httpx.org/api/#asyncclient>`__. For a full list of the params, see the `HTTPX documentation <https://www.python-httpx.org/api/#asyncclient>`__.
""" """
sync_client_kwargs: Optional[dict] = {} sync_client_kwargs: Optional[dict] = {}
"""Additional kwargs to merge with client_kwargs before passing to the HTTPX Client. """Additional kwargs to merge with ``client_kwargs`` before
passing to the HTTPX Client.
For a full list of the params, see the `HTTPX documentation <https://www.python-httpx.org/api/#client>`__. For a full list of the params, see the `HTTPX documentation <https://www.python-httpx.org/api/#client>`__.
""" """
_client: Optional[Client] = PrivateAttr(default=None) _client: Optional[Client] = PrivateAttr(default=None)
""" """The client to use for making requests."""
The client to use for making requests.
"""
_async_client: Optional[AsyncClient] = PrivateAttr(default=None) _async_client: Optional[AsyncClient] = PrivateAttr(default=None)
""" """The async client to use for making requests."""
The async client to use for making requests.
"""
mirostat: Optional[int] = None mirostat: Optional[int] = None
"""Enable Mirostat sampling for controlling perplexity. """Enable Mirostat sampling for controlling perplexity.
(default: 0, 0 = disabled, 1 = Mirostat, 2 = Mirostat 2.0)""" (default: ``0``, ``0`` = disabled, ``1`` = Mirostat, ``2`` = Mirostat 2.0)"""
mirostat_eta: Optional[float] = None mirostat_eta: Optional[float] = None
"""Influences how quickly the algorithm responds to feedback """Influences how quickly the algorithm responds to feedback
from the generated text. A lower learning rate will result in from the generated text. A lower learning rate will result in
slower adjustments, while a higher learning rate will make slower adjustments, while a higher learning rate will make
the algorithm more responsive. (Default: 0.1)""" the algorithm more responsive. (Default: ``0.1``)"""
mirostat_tau: Optional[float] = None mirostat_tau: Optional[float] = None
"""Controls the balance between coherence and diversity """Controls the balance between coherence and diversity
of the output. A lower value will result in more focused and of the output. A lower value will result in more focused and
coherent text. (Default: 5.0)""" coherent text. (Default: ``5.0``)"""
num_ctx: Optional[int] = None num_ctx: Optional[int] = None
"""Sets the size of the context window used to generate the """Sets the size of the context window used to generate the
next token. (Default: 2048) """ next token. (Default: ``2048``) """
num_gpu: Optional[int] = None num_gpu: Optional[int] = None
"""The number of GPUs to use. On macOS it defaults to 1 to """The number of GPUs to use. On macOS it defaults to ``1`` to
enable metal support, 0 to disable.""" enable metal support, ``0`` to disable."""
keep_alive: Optional[int] = None keep_alive: Optional[int] = None
"""controls how long the model will stay loaded into memory """Controls how long the model will stay loaded into memory
following the request (default: 5m) following the request (default: ``5m``)
""" """
num_thread: Optional[int] = None num_thread: Optional[int] = None
@ -203,34 +206,34 @@ class OllamaEmbeddings(BaseModel, Embeddings):
repeat_last_n: Optional[int] = None repeat_last_n: Optional[int] = None
"""Sets how far back for the model to look back to prevent """Sets how far back for the model to look back to prevent
repetition. (Default: 64, 0 = disabled, -1 = num_ctx)""" repetition. (Default: ``64``, ``0`` = disabled, ``-1`` = ``num_ctx``)"""
repeat_penalty: Optional[float] = None repeat_penalty: Optional[float] = None
"""Sets how strongly to penalize repetitions. A higher value (e.g., 1.5) """Sets how strongly to penalize repetitions. A higher value (e.g., ``1.5``)
will penalize repetitions more strongly, while a lower value (e.g., 0.9) will penalize repetitions more strongly, while a lower value (e.g., ``0.9``)
will be more lenient. (Default: 1.1)""" will be more lenient. (Default: ``1.1``)"""
temperature: Optional[float] = None temperature: Optional[float] = None
"""The temperature of the model. Increasing the temperature will """The temperature of the model. Increasing the temperature will
make the model answer more creatively. (Default: 0.8)""" make the model answer more creatively. (Default: ``0.8``)"""
stop: Optional[list[str]] = None stop: Optional[list[str]] = None
"""Sets the stop tokens to use.""" """Sets the stop tokens to use."""
tfs_z: Optional[float] = None tfs_z: Optional[float] = None
"""Tail free sampling is used to reduce the impact of less probable """Tail free sampling is used to reduce the impact of less probable
tokens from the output. A higher value (e.g., 2.0) will reduce the tokens from the output. A higher value (e.g., ``2.0``) will reduce the
impact more, while a value of 1.0 disables this setting. (default: 1)""" impact more, while a value of ``1.0`` disables this setting. (default: ``1``)"""
top_k: Optional[int] = None top_k: Optional[int] = None
"""Reduces the probability of generating nonsense. A higher value (e.g. 100) """Reduces the probability of generating nonsense. A higher value (e.g. ``100``)
will give more diverse answers, while a lower value (e.g. 10) will give more diverse answers, while a lower value (e.g. ``10``)
will be more conservative. (Default: 40)""" will be more conservative. (Default: ``40``)"""
top_p: Optional[float] = None top_p: Optional[float] = None
"""Works together with top-k. A higher value (e.g., 0.95) will lead """Works together with top-k. A higher value (e.g., ``0.95``) will lead
to more diverse text, while a lower value (e.g., 0.5) will to more diverse text, while a lower value (e.g., ``0.5``) will
generate more focused and conservative text. (Default: 0.9)""" generate more focused and conservative text. (Default: ``0.9``)"""
model_config = ConfigDict( model_config = ConfigDict(
extra="forbid", extra="forbid",
@ -257,7 +260,7 @@ class OllamaEmbeddings(BaseModel, Embeddings):
@model_validator(mode="after") @model_validator(mode="after")
def _set_clients(self) -> Self: def _set_clients(self) -> Self:
"""Set clients to use for ollama.""" """Set clients to use for Ollama."""
client_kwargs = self.client_kwargs or {} client_kwargs = self.client_kwargs or {}
sync_client_kwargs = client_kwargs sync_client_kwargs = client_kwargs

View File

@ -61,26 +61,26 @@ class OllamaLLM(BaseLLM):
mirostat: Optional[int] = None mirostat: Optional[int] = None
"""Enable Mirostat sampling for controlling perplexity. """Enable Mirostat sampling for controlling perplexity.
(default: 0, 0 = disabled, 1 = Mirostat, 2 = Mirostat 2.0)""" (default: ``0``, ``0`` = disabled, ``1`` = Mirostat, ``2`` = Mirostat 2.0)"""
mirostat_eta: Optional[float] = None mirostat_eta: Optional[float] = None
"""Influences how quickly the algorithm responds to feedback """Influences how quickly the algorithm responds to feedback
from the generated text. A lower learning rate will result in from the generated text. A lower learning rate will result in
slower adjustments, while a higher learning rate will make slower adjustments, while a higher learning rate will make
the algorithm more responsive. (Default: 0.1)""" the algorithm more responsive. (Default: ``0.1``)"""
mirostat_tau: Optional[float] = None mirostat_tau: Optional[float] = None
"""Controls the balance between coherence and diversity """Controls the balance between coherence and diversity
of the output. A lower value will result in more focused and of the output. A lower value will result in more focused and
coherent text. (Default: 5.0)""" coherent text. (Default: ``5.0``)"""
num_ctx: Optional[int] = None num_ctx: Optional[int] = None
"""Sets the size of the context window used to generate the """Sets the size of the context window used to generate the
next token. (Default: 2048)""" next token. (Default: ``2048``)"""
num_gpu: Optional[int] = None num_gpu: Optional[int] = None
"""The number of GPUs to use. On macOS it defaults to 1 to """The number of GPUs to use. On macOS it defaults to ``1`` to
enable metal support, 0 to disable.""" enable metal support, ``0`` to disable."""
num_thread: Optional[int] = None num_thread: Optional[int] = None
"""Sets the number of threads to use during computation. """Sets the number of threads to use during computation.
@ -90,20 +90,20 @@ class OllamaLLM(BaseLLM):
num_predict: Optional[int] = None num_predict: Optional[int] = None
"""Maximum number of tokens to predict when generating text. """Maximum number of tokens to predict when generating text.
(Default: 128, -1 = infinite generation, -2 = fill context)""" (Default: ``128``, ``-1`` = infinite generation, ``-2`` = fill context)"""
repeat_last_n: Optional[int] = None repeat_last_n: Optional[int] = None
"""Sets how far back for the model to look back to prevent """Sets how far back for the model to look back to prevent
repetition. (Default: 64, 0 = disabled, -1 = num_ctx)""" repetition. (Default: ``64``, ``0`` = disabled, ``-1`` = ``num_ctx``)"""
repeat_penalty: Optional[float] = None repeat_penalty: Optional[float] = None
"""Sets how strongly to penalize repetitions. A higher value (e.g., 1.5) """Sets how strongly to penalize repetitions. A higher value (e.g., ``1.5``)
will penalize repetitions more strongly, while a lower value (e.g., 0.9) will penalize repetitions more strongly, while a lower value (e.g., ``0.9``)
will be more lenient. (Default: 1.1)""" will be more lenient. (Default: ``1.1``)"""
temperature: Optional[float] = None temperature: Optional[float] = None
"""The temperature of the model. Increasing the temperature will """The temperature of the model. Increasing the temperature will
make the model answer more creatively. (Default: 0.8)""" make the model answer more creatively. (Default: ``0.8``)"""
seed: Optional[int] = None seed: Optional[int] = None
"""Sets the random number seed to use for generation. Setting this """Sets the random number seed to use for generation. Setting this
@ -115,21 +115,21 @@ class OllamaLLM(BaseLLM):
tfs_z: Optional[float] = None tfs_z: Optional[float] = None
"""Tail free sampling is used to reduce the impact of less probable """Tail free sampling is used to reduce the impact of less probable
tokens from the output. A higher value (e.g., 2.0) will reduce the tokens from the output. A higher value (e.g., ``2.0``) will reduce the
impact more, while a value of 1.0 disables this setting. (default: 1)""" impact more, while a value of 1.0 disables this setting. (default: ``1``)"""
top_k: Optional[int] = None top_k: Optional[int] = None
"""Reduces the probability of generating nonsense. A higher value (e.g. 100) """Reduces the probability of generating nonsense. A higher value (e.g. ``100``)
will give more diverse answers, while a lower value (e.g. 10) will give more diverse answers, while a lower value (e.g. ``10``)
will be more conservative. (Default: 40)""" will be more conservative. (Default: ``40``)"""
top_p: Optional[float] = None top_p: Optional[float] = None
"""Works together with top-k. A higher value (e.g., 0.95) will lead """Works together with top-k. A higher value (e.g., ``0.95``) will lead
to more diverse text, while a lower value (e.g., 0.5) will to more diverse text, while a lower value (e.g., ``0.5``) will
generate more focused and conservative text. (Default: 0.9)""" generate more focused and conservative text. (Default: ``0.9``)"""
format: Literal["", "json"] = "" format: Literal["", "json"] = ""
"""Specify the format of the output (options: json)""" """Specify the format of the output (options: ``'json'``)"""
keep_alive: Optional[Union[int, str]] = None keep_alive: Optional[Union[int, str]] = None
"""How long the model will stay loaded into memory.""" """How long the model will stay loaded into memory."""
@ -139,33 +139,35 @@ class OllamaLLM(BaseLLM):
client_kwargs: Optional[dict] = {} client_kwargs: Optional[dict] = {}
"""Additional kwargs to pass to the httpx clients. """Additional kwargs to pass to the httpx clients.
These arguments are passed to both synchronous and async clients. These arguments are passed to both synchronous and async clients.
Use sync_client_kwargs and async_client_kwargs to pass different arguments
Use ``sync_client_kwargs`` and ``async_client_kwargs`` to pass different arguments
to synchronous and asynchronous clients. to synchronous and asynchronous clients.
""" """
async_client_kwargs: Optional[dict] = {} async_client_kwargs: Optional[dict] = {}
"""Additional kwargs to merge with client_kwargs before passing to the HTTPX """Additional kwargs to merge with ``client_kwargs`` before passing to the HTTPX
AsyncClient. AsyncClient.
For a full list of the params, see the `HTTPX documentation <https://www.python-httpx.org/api/#asyncclient>`__. For a full list of the params, see the `HTTPX documentation <https://www.python-httpx.org/api/#asyncclient>`__.
""" """
sync_client_kwargs: Optional[dict] = {} sync_client_kwargs: Optional[dict] = {}
"""Additional kwargs to merge with client_kwargs before passing to the HTTPX Client. """Additional kwargs to merge with ``client_kwargs`` before
passing to the HTTPX Client.
For a full list of the params, see the `HTTPX documentation <https://www.python-httpx.org/api/#client>`__. For a full list of the params, see the `HTTPX documentation <https://www.python-httpx.org/api/#client>`__.
""" """
_client: Optional[Client] = PrivateAttr(default=None) _client: Optional[Client] = PrivateAttr(default=None)
""" """The client to use for making requests."""
The client to use for making requests.
"""
_async_client: Optional[AsyncClient] = PrivateAttr(default=None) _async_client: Optional[AsyncClient] = PrivateAttr(default=None)
""" """The async client to use for making requests."""
The async client to use for making requests.
"""
def _generate_params( def _generate_params(
self, self,