mirror of
https://github.com/hwchase17/langchain.git
synced 2025-08-14 15:16:21 +00:00
docs nits
This commit is contained in:
parent
ade2155aee
commit
3f11b041df
@ -29,7 +29,10 @@ def validate_model(client: Client, model_name: str) -> None:
|
|||||||
)
|
)
|
||||||
raise ValueError(msg)
|
raise ValueError(msg)
|
||||||
except ConnectError as e:
|
except ConnectError as e:
|
||||||
msg = "Failed to connect to Ollama. Please check that Ollama is downloaded, running and accessible. https://ollama.com/download" # noqa: E501
|
msg = (
|
||||||
|
"Failed to connect to Ollama. Please check that Ollama is downloaded, "
|
||||||
|
"running and accessible. https://ollama.com/download"
|
||||||
|
)
|
||||||
raise ValueError(msg) from e
|
raise ValueError(msg) from e
|
||||||
except ResponseError as e:
|
except ResponseError as e:
|
||||||
msg = (
|
msg = (
|
||||||
|
@ -482,7 +482,7 @@ class ChatOllama(BaseChatModel):
|
|||||||
|
|
||||||
repeat_last_n: Optional[int] = None
|
repeat_last_n: Optional[int] = None
|
||||||
"""Sets how far back for the model to look back to prevent
|
"""Sets how far back for the model to look back to prevent
|
||||||
repetition. (Default: ``64``, ``0`` = disabled, ``-1`` = num_ctx)"""
|
repetition. (Default: ``64``, ``0`` = disabled, ``-1`` = ``num_ctx``)"""
|
||||||
|
|
||||||
repeat_penalty: Optional[float] = None
|
repeat_penalty: Optional[float] = None
|
||||||
"""Sets how strongly to penalize repetitions. A higher value (e.g., ``1.5``)
|
"""Sets how strongly to penalize repetitions. A higher value (e.g., ``1.5``)
|
||||||
|
@ -21,12 +21,12 @@ class OllamaEmbeddings(BaseModel, Embeddings):
|
|||||||
"""Ollama embedding model integration.
|
"""Ollama embedding model integration.
|
||||||
|
|
||||||
Set up a local Ollama instance:
|
Set up a local Ollama instance:
|
||||||
Install the Ollama package and set up a local Ollama instance
|
`Install the Ollama package <https://github.com/ollama/ollama>`__ and set up a
|
||||||
using the instructions here: https://github.com/ollama/ollama .
|
local Ollama instance.
|
||||||
|
|
||||||
You will need to choose a model to serve.
|
You will need to choose a model to serve.
|
||||||
|
|
||||||
You can view a list of available models via the model library (https://ollama.com/library).
|
You can view a list of available models via `the model library <https://ollama.com/library>`__.
|
||||||
|
|
||||||
To fetch a model from the Ollama model library use ``ollama pull <name-of-model>``.
|
To fetch a model from the Ollama model library use ``ollama pull <name-of-model>``.
|
||||||
|
|
||||||
@ -39,8 +39,8 @@ class OllamaEmbeddings(BaseModel, Embeddings):
|
|||||||
This will download the default tagged version of the model.
|
This will download the default tagged version of the model.
|
||||||
Typically, the default points to the latest, smallest sized-parameter model.
|
Typically, the default points to the latest, smallest sized-parameter model.
|
||||||
|
|
||||||
* On Mac, the models will be downloaded to ~/.ollama/models
|
* On Mac, the models will be downloaded to ``~/.ollama/models``
|
||||||
* On Linux (or WSL), the models will be stored at /usr/share/ollama/.ollama/models
|
* On Linux (or WSL), the models will be stored at ``/usr/share/ollama/.ollama/models``
|
||||||
|
|
||||||
You can specify the exact version of the model of interest
|
You can specify the exact version of the model of interest
|
||||||
as such ``ollama pull vicuna:13b-v1.5-16k-q4_0``.
|
as such ``ollama pull vicuna:13b-v1.5-16k-q4_0``.
|
||||||
@ -132,6 +132,7 @@ class OllamaEmbeddings(BaseModel, Embeddings):
|
|||||||
"""Whether to validate the model exists in ollama locally on initialization.
|
"""Whether to validate the model exists in ollama locally on initialization.
|
||||||
|
|
||||||
.. versionadded:: 0.3.4
|
.. versionadded:: 0.3.4
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
base_url: Optional[str] = None
|
base_url: Optional[str] = None
|
||||||
@ -139,60 +140,62 @@ class OllamaEmbeddings(BaseModel, Embeddings):
|
|||||||
|
|
||||||
client_kwargs: Optional[dict] = {}
|
client_kwargs: Optional[dict] = {}
|
||||||
"""Additional kwargs to pass to the httpx clients.
|
"""Additional kwargs to pass to the httpx clients.
|
||||||
|
|
||||||
These arguments are passed to both synchronous and async clients.
|
These arguments are passed to both synchronous and async clients.
|
||||||
Use sync_client_kwargs and async_client_kwargs to pass different arguments
|
|
||||||
|
Use ``sync_client_kwargs`` and ``async_client_kwargs`` to pass different arguments
|
||||||
to synchronous and asynchronous clients.
|
to synchronous and asynchronous clients.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
async_client_kwargs: Optional[dict] = {}
|
async_client_kwargs: Optional[dict] = {}
|
||||||
"""Additional kwargs to merge with client_kwargs before passing to the httpx
|
"""Additional kwargs to merge with ``client_kwargs`` before passing to the httpx
|
||||||
AsyncClient.
|
AsyncClient.
|
||||||
|
|
||||||
For a full list of the params, see the `HTTPX documentation <https://www.python-httpx.org/api/#asyncclient>`__.
|
For a full list of the params, see the `HTTPX documentation <https://www.python-httpx.org/api/#asyncclient>`__.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
sync_client_kwargs: Optional[dict] = {}
|
sync_client_kwargs: Optional[dict] = {}
|
||||||
"""Additional kwargs to merge with client_kwargs before passing to the HTTPX Client.
|
"""Additional kwargs to merge with ``client_kwargs`` before
|
||||||
|
passing to the HTTPX Client.
|
||||||
|
|
||||||
For a full list of the params, see the `HTTPX documentation <https://www.python-httpx.org/api/#client>`__.
|
For a full list of the params, see the `HTTPX documentation <https://www.python-httpx.org/api/#client>`__.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
_client: Optional[Client] = PrivateAttr(default=None)
|
_client: Optional[Client] = PrivateAttr(default=None)
|
||||||
"""
|
"""The client to use for making requests."""
|
||||||
The client to use for making requests.
|
|
||||||
"""
|
|
||||||
|
|
||||||
_async_client: Optional[AsyncClient] = PrivateAttr(default=None)
|
_async_client: Optional[AsyncClient] = PrivateAttr(default=None)
|
||||||
"""
|
"""The async client to use for making requests."""
|
||||||
The async client to use for making requests.
|
|
||||||
"""
|
|
||||||
|
|
||||||
mirostat: Optional[int] = None
|
mirostat: Optional[int] = None
|
||||||
"""Enable Mirostat sampling for controlling perplexity.
|
"""Enable Mirostat sampling for controlling perplexity.
|
||||||
(default: 0, 0 = disabled, 1 = Mirostat, 2 = Mirostat 2.0)"""
|
(default: ``0``, ``0`` = disabled, ``1`` = Mirostat, ``2`` = Mirostat 2.0)"""
|
||||||
|
|
||||||
mirostat_eta: Optional[float] = None
|
mirostat_eta: Optional[float] = None
|
||||||
"""Influences how quickly the algorithm responds to feedback
|
"""Influences how quickly the algorithm responds to feedback
|
||||||
from the generated text. A lower learning rate will result in
|
from the generated text. A lower learning rate will result in
|
||||||
slower adjustments, while a higher learning rate will make
|
slower adjustments, while a higher learning rate will make
|
||||||
the algorithm more responsive. (Default: 0.1)"""
|
the algorithm more responsive. (Default: ``0.1``)"""
|
||||||
|
|
||||||
mirostat_tau: Optional[float] = None
|
mirostat_tau: Optional[float] = None
|
||||||
"""Controls the balance between coherence and diversity
|
"""Controls the balance between coherence and diversity
|
||||||
of the output. A lower value will result in more focused and
|
of the output. A lower value will result in more focused and
|
||||||
coherent text. (Default: 5.0)"""
|
coherent text. (Default: ``5.0``)"""
|
||||||
|
|
||||||
num_ctx: Optional[int] = None
|
num_ctx: Optional[int] = None
|
||||||
"""Sets the size of the context window used to generate the
|
"""Sets the size of the context window used to generate the
|
||||||
next token. (Default: 2048) """
|
next token. (Default: ``2048``) """
|
||||||
|
|
||||||
num_gpu: Optional[int] = None
|
num_gpu: Optional[int] = None
|
||||||
"""The number of GPUs to use. On macOS it defaults to 1 to
|
"""The number of GPUs to use. On macOS it defaults to ``1`` to
|
||||||
enable metal support, 0 to disable."""
|
enable metal support, ``0`` to disable."""
|
||||||
|
|
||||||
keep_alive: Optional[int] = None
|
keep_alive: Optional[int] = None
|
||||||
"""controls how long the model will stay loaded into memory
|
"""Controls how long the model will stay loaded into memory
|
||||||
following the request (default: 5m)
|
following the request (default: ``5m``)
|
||||||
"""
|
"""
|
||||||
|
|
||||||
num_thread: Optional[int] = None
|
num_thread: Optional[int] = None
|
||||||
@ -203,34 +206,34 @@ class OllamaEmbeddings(BaseModel, Embeddings):
|
|||||||
|
|
||||||
repeat_last_n: Optional[int] = None
|
repeat_last_n: Optional[int] = None
|
||||||
"""Sets how far back for the model to look back to prevent
|
"""Sets how far back for the model to look back to prevent
|
||||||
repetition. (Default: 64, 0 = disabled, -1 = num_ctx)"""
|
repetition. (Default: ``64``, ``0`` = disabled, ``-1`` = ``num_ctx``)"""
|
||||||
|
|
||||||
repeat_penalty: Optional[float] = None
|
repeat_penalty: Optional[float] = None
|
||||||
"""Sets how strongly to penalize repetitions. A higher value (e.g., 1.5)
|
"""Sets how strongly to penalize repetitions. A higher value (e.g., ``1.5``)
|
||||||
will penalize repetitions more strongly, while a lower value (e.g., 0.9)
|
will penalize repetitions more strongly, while a lower value (e.g., ``0.9``)
|
||||||
will be more lenient. (Default: 1.1)"""
|
will be more lenient. (Default: ``1.1``)"""
|
||||||
|
|
||||||
temperature: Optional[float] = None
|
temperature: Optional[float] = None
|
||||||
"""The temperature of the model. Increasing the temperature will
|
"""The temperature of the model. Increasing the temperature will
|
||||||
make the model answer more creatively. (Default: 0.8)"""
|
make the model answer more creatively. (Default: ``0.8``)"""
|
||||||
|
|
||||||
stop: Optional[list[str]] = None
|
stop: Optional[list[str]] = None
|
||||||
"""Sets the stop tokens to use."""
|
"""Sets the stop tokens to use."""
|
||||||
|
|
||||||
tfs_z: Optional[float] = None
|
tfs_z: Optional[float] = None
|
||||||
"""Tail free sampling is used to reduce the impact of less probable
|
"""Tail free sampling is used to reduce the impact of less probable
|
||||||
tokens from the output. A higher value (e.g., 2.0) will reduce the
|
tokens from the output. A higher value (e.g., ``2.0``) will reduce the
|
||||||
impact more, while a value of 1.0 disables this setting. (default: 1)"""
|
impact more, while a value of ``1.0`` disables this setting. (default: ``1``)"""
|
||||||
|
|
||||||
top_k: Optional[int] = None
|
top_k: Optional[int] = None
|
||||||
"""Reduces the probability of generating nonsense. A higher value (e.g. 100)
|
"""Reduces the probability of generating nonsense. A higher value (e.g. ``100``)
|
||||||
will give more diverse answers, while a lower value (e.g. 10)
|
will give more diverse answers, while a lower value (e.g. ``10``)
|
||||||
will be more conservative. (Default: 40)"""
|
will be more conservative. (Default: ``40``)"""
|
||||||
|
|
||||||
top_p: Optional[float] = None
|
top_p: Optional[float] = None
|
||||||
"""Works together with top-k. A higher value (e.g., 0.95) will lead
|
"""Works together with top-k. A higher value (e.g., ``0.95``) will lead
|
||||||
to more diverse text, while a lower value (e.g., 0.5) will
|
to more diverse text, while a lower value (e.g., ``0.5``) will
|
||||||
generate more focused and conservative text. (Default: 0.9)"""
|
generate more focused and conservative text. (Default: ``0.9``)"""
|
||||||
|
|
||||||
model_config = ConfigDict(
|
model_config = ConfigDict(
|
||||||
extra="forbid",
|
extra="forbid",
|
||||||
@ -257,7 +260,7 @@ class OllamaEmbeddings(BaseModel, Embeddings):
|
|||||||
|
|
||||||
@model_validator(mode="after")
|
@model_validator(mode="after")
|
||||||
def _set_clients(self) -> Self:
|
def _set_clients(self) -> Self:
|
||||||
"""Set clients to use for ollama."""
|
"""Set clients to use for Ollama."""
|
||||||
client_kwargs = self.client_kwargs or {}
|
client_kwargs = self.client_kwargs or {}
|
||||||
|
|
||||||
sync_client_kwargs = client_kwargs
|
sync_client_kwargs = client_kwargs
|
||||||
|
@ -61,26 +61,26 @@ class OllamaLLM(BaseLLM):
|
|||||||
|
|
||||||
mirostat: Optional[int] = None
|
mirostat: Optional[int] = None
|
||||||
"""Enable Mirostat sampling for controlling perplexity.
|
"""Enable Mirostat sampling for controlling perplexity.
|
||||||
(default: 0, 0 = disabled, 1 = Mirostat, 2 = Mirostat 2.0)"""
|
(default: ``0``, ``0`` = disabled, ``1`` = Mirostat, ``2`` = Mirostat 2.0)"""
|
||||||
|
|
||||||
mirostat_eta: Optional[float] = None
|
mirostat_eta: Optional[float] = None
|
||||||
"""Influences how quickly the algorithm responds to feedback
|
"""Influences how quickly the algorithm responds to feedback
|
||||||
from the generated text. A lower learning rate will result in
|
from the generated text. A lower learning rate will result in
|
||||||
slower adjustments, while a higher learning rate will make
|
slower adjustments, while a higher learning rate will make
|
||||||
the algorithm more responsive. (Default: 0.1)"""
|
the algorithm more responsive. (Default: ``0.1``)"""
|
||||||
|
|
||||||
mirostat_tau: Optional[float] = None
|
mirostat_tau: Optional[float] = None
|
||||||
"""Controls the balance between coherence and diversity
|
"""Controls the balance between coherence and diversity
|
||||||
of the output. A lower value will result in more focused and
|
of the output. A lower value will result in more focused and
|
||||||
coherent text. (Default: 5.0)"""
|
coherent text. (Default: ``5.0``)"""
|
||||||
|
|
||||||
num_ctx: Optional[int] = None
|
num_ctx: Optional[int] = None
|
||||||
"""Sets the size of the context window used to generate the
|
"""Sets the size of the context window used to generate the
|
||||||
next token. (Default: 2048)"""
|
next token. (Default: ``2048``)"""
|
||||||
|
|
||||||
num_gpu: Optional[int] = None
|
num_gpu: Optional[int] = None
|
||||||
"""The number of GPUs to use. On macOS it defaults to 1 to
|
"""The number of GPUs to use. On macOS it defaults to ``1`` to
|
||||||
enable metal support, 0 to disable."""
|
enable metal support, ``0`` to disable."""
|
||||||
|
|
||||||
num_thread: Optional[int] = None
|
num_thread: Optional[int] = None
|
||||||
"""Sets the number of threads to use during computation.
|
"""Sets the number of threads to use during computation.
|
||||||
@ -90,20 +90,20 @@ class OllamaLLM(BaseLLM):
|
|||||||
|
|
||||||
num_predict: Optional[int] = None
|
num_predict: Optional[int] = None
|
||||||
"""Maximum number of tokens to predict when generating text.
|
"""Maximum number of tokens to predict when generating text.
|
||||||
(Default: 128, -1 = infinite generation, -2 = fill context)"""
|
(Default: ``128``, ``-1`` = infinite generation, ``-2`` = fill context)"""
|
||||||
|
|
||||||
repeat_last_n: Optional[int] = None
|
repeat_last_n: Optional[int] = None
|
||||||
"""Sets how far back for the model to look back to prevent
|
"""Sets how far back for the model to look back to prevent
|
||||||
repetition. (Default: 64, 0 = disabled, -1 = num_ctx)"""
|
repetition. (Default: ``64``, ``0`` = disabled, ``-1`` = ``num_ctx``)"""
|
||||||
|
|
||||||
repeat_penalty: Optional[float] = None
|
repeat_penalty: Optional[float] = None
|
||||||
"""Sets how strongly to penalize repetitions. A higher value (e.g., 1.5)
|
"""Sets how strongly to penalize repetitions. A higher value (e.g., ``1.5``)
|
||||||
will penalize repetitions more strongly, while a lower value (e.g., 0.9)
|
will penalize repetitions more strongly, while a lower value (e.g., ``0.9``)
|
||||||
will be more lenient. (Default: 1.1)"""
|
will be more lenient. (Default: ``1.1``)"""
|
||||||
|
|
||||||
temperature: Optional[float] = None
|
temperature: Optional[float] = None
|
||||||
"""The temperature of the model. Increasing the temperature will
|
"""The temperature of the model. Increasing the temperature will
|
||||||
make the model answer more creatively. (Default: 0.8)"""
|
make the model answer more creatively. (Default: ``0.8``)"""
|
||||||
|
|
||||||
seed: Optional[int] = None
|
seed: Optional[int] = None
|
||||||
"""Sets the random number seed to use for generation. Setting this
|
"""Sets the random number seed to use for generation. Setting this
|
||||||
@ -115,21 +115,21 @@ class OllamaLLM(BaseLLM):
|
|||||||
|
|
||||||
tfs_z: Optional[float] = None
|
tfs_z: Optional[float] = None
|
||||||
"""Tail free sampling is used to reduce the impact of less probable
|
"""Tail free sampling is used to reduce the impact of less probable
|
||||||
tokens from the output. A higher value (e.g., 2.0) will reduce the
|
tokens from the output. A higher value (e.g., ``2.0``) will reduce the
|
||||||
impact more, while a value of 1.0 disables this setting. (default: 1)"""
|
impact more, while a value of 1.0 disables this setting. (default: ``1``)"""
|
||||||
|
|
||||||
top_k: Optional[int] = None
|
top_k: Optional[int] = None
|
||||||
"""Reduces the probability of generating nonsense. A higher value (e.g. 100)
|
"""Reduces the probability of generating nonsense. A higher value (e.g. ``100``)
|
||||||
will give more diverse answers, while a lower value (e.g. 10)
|
will give more diverse answers, while a lower value (e.g. ``10``)
|
||||||
will be more conservative. (Default: 40)"""
|
will be more conservative. (Default: ``40``)"""
|
||||||
|
|
||||||
top_p: Optional[float] = None
|
top_p: Optional[float] = None
|
||||||
"""Works together with top-k. A higher value (e.g., 0.95) will lead
|
"""Works together with top-k. A higher value (e.g., ``0.95``) will lead
|
||||||
to more diverse text, while a lower value (e.g., 0.5) will
|
to more diverse text, while a lower value (e.g., ``0.5``) will
|
||||||
generate more focused and conservative text. (Default: 0.9)"""
|
generate more focused and conservative text. (Default: ``0.9``)"""
|
||||||
|
|
||||||
format: Literal["", "json"] = ""
|
format: Literal["", "json"] = ""
|
||||||
"""Specify the format of the output (options: json)"""
|
"""Specify the format of the output (options: ``'json'``)"""
|
||||||
|
|
||||||
keep_alive: Optional[Union[int, str]] = None
|
keep_alive: Optional[Union[int, str]] = None
|
||||||
"""How long the model will stay loaded into memory."""
|
"""How long the model will stay loaded into memory."""
|
||||||
@ -139,33 +139,35 @@ class OllamaLLM(BaseLLM):
|
|||||||
|
|
||||||
client_kwargs: Optional[dict] = {}
|
client_kwargs: Optional[dict] = {}
|
||||||
"""Additional kwargs to pass to the httpx clients.
|
"""Additional kwargs to pass to the httpx clients.
|
||||||
|
|
||||||
These arguments are passed to both synchronous and async clients.
|
These arguments are passed to both synchronous and async clients.
|
||||||
Use sync_client_kwargs and async_client_kwargs to pass different arguments
|
|
||||||
|
Use ``sync_client_kwargs`` and ``async_client_kwargs`` to pass different arguments
|
||||||
to synchronous and asynchronous clients.
|
to synchronous and asynchronous clients.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
async_client_kwargs: Optional[dict] = {}
|
async_client_kwargs: Optional[dict] = {}
|
||||||
"""Additional kwargs to merge with client_kwargs before passing to the HTTPX
|
"""Additional kwargs to merge with ``client_kwargs`` before passing to the HTTPX
|
||||||
AsyncClient.
|
AsyncClient.
|
||||||
|
|
||||||
For a full list of the params, see the `HTTPX documentation <https://www.python-httpx.org/api/#asyncclient>`__.
|
For a full list of the params, see the `HTTPX documentation <https://www.python-httpx.org/api/#asyncclient>`__.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
sync_client_kwargs: Optional[dict] = {}
|
sync_client_kwargs: Optional[dict] = {}
|
||||||
"""Additional kwargs to merge with client_kwargs before passing to the HTTPX Client.
|
"""Additional kwargs to merge with ``client_kwargs`` before
|
||||||
|
passing to the HTTPX Client.
|
||||||
|
|
||||||
For a full list of the params, see the `HTTPX documentation <https://www.python-httpx.org/api/#client>`__.
|
For a full list of the params, see the `HTTPX documentation <https://www.python-httpx.org/api/#client>`__.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
_client: Optional[Client] = PrivateAttr(default=None)
|
_client: Optional[Client] = PrivateAttr(default=None)
|
||||||
"""
|
"""The client to use for making requests."""
|
||||||
The client to use for making requests.
|
|
||||||
"""
|
|
||||||
|
|
||||||
_async_client: Optional[AsyncClient] = PrivateAttr(default=None)
|
_async_client: Optional[AsyncClient] = PrivateAttr(default=None)
|
||||||
"""
|
"""The async client to use for making requests."""
|
||||||
The async client to use for making requests.
|
|
||||||
"""
|
|
||||||
|
|
||||||
def _generate_params(
|
def _generate_params(
|
||||||
self,
|
self,
|
||||||
|
Loading…
Reference in New Issue
Block a user