diff --git a/libs/partners/ollama/langchain_ollama/_utils.py b/libs/partners/ollama/langchain_ollama/_utils.py index f3cd6fe9a4d..3da9981621f 100644 --- a/libs/partners/ollama/langchain_ollama/_utils.py +++ b/libs/partners/ollama/langchain_ollama/_utils.py @@ -29,7 +29,10 @@ def validate_model(client: Client, model_name: str) -> None: ) raise ValueError(msg) except ConnectError as e: - msg = "Failed to connect to Ollama. Please check that Ollama is downloaded, running and accessible. https://ollama.com/download" # noqa: E501 + msg = ( + "Failed to connect to Ollama. Please check that Ollama is downloaded, " + "running and accessible. https://ollama.com/download" + ) raise ValueError(msg) from e except ResponseError as e: msg = ( diff --git a/libs/partners/ollama/langchain_ollama/chat_models.py b/libs/partners/ollama/langchain_ollama/chat_models.py index 446856b8d9f..0e9625890a8 100644 --- a/libs/partners/ollama/langchain_ollama/chat_models.py +++ b/libs/partners/ollama/langchain_ollama/chat_models.py @@ -482,7 +482,7 @@ class ChatOllama(BaseChatModel): repeat_last_n: Optional[int] = None """Sets how far back for the model to look back to prevent - repetition. (Default: ``64``, ``0`` = disabled, ``-1`` = num_ctx)""" + repetition. (Default: ``64``, ``0`` = disabled, ``-1`` = ``num_ctx``)""" repeat_penalty: Optional[float] = None """Sets how strongly to penalize repetitions. A higher value (e.g., ``1.5``) diff --git a/libs/partners/ollama/langchain_ollama/embeddings.py b/libs/partners/ollama/langchain_ollama/embeddings.py index ac5619a3b06..5f6c96a2997 100644 --- a/libs/partners/ollama/langchain_ollama/embeddings.py +++ b/libs/partners/ollama/langchain_ollama/embeddings.py @@ -21,12 +21,12 @@ class OllamaEmbeddings(BaseModel, Embeddings): """Ollama embedding model integration. Set up a local Ollama instance: - Install the Ollama package and set up a local Ollama instance - using the instructions here: https://github.com/ollama/ollama . + `Install the Ollama package `__ and set up a + local Ollama instance. You will need to choose a model to serve. - You can view a list of available models via the model library (https://ollama.com/library). + You can view a list of available models via `the model library `__. To fetch a model from the Ollama model library use ``ollama pull ``. @@ -39,8 +39,8 @@ class OllamaEmbeddings(BaseModel, Embeddings): This will download the default tagged version of the model. Typically, the default points to the latest, smallest sized-parameter model. - * On Mac, the models will be downloaded to ~/.ollama/models - * On Linux (or WSL), the models will be stored at /usr/share/ollama/.ollama/models + * On Mac, the models will be downloaded to ``~/.ollama/models`` + * On Linux (or WSL), the models will be stored at ``/usr/share/ollama/.ollama/models`` You can specify the exact version of the model of interest as such ``ollama pull vicuna:13b-v1.5-16k-q4_0``. @@ -132,6 +132,7 @@ class OllamaEmbeddings(BaseModel, Embeddings): """Whether to validate the model exists in ollama locally on initialization. .. versionadded:: 0.3.4 + """ base_url: Optional[str] = None @@ -139,60 +140,62 @@ class OllamaEmbeddings(BaseModel, Embeddings): client_kwargs: Optional[dict] = {} """Additional kwargs to pass to the httpx clients. + These arguments are passed to both synchronous and async clients. - Use sync_client_kwargs and async_client_kwargs to pass different arguments + + Use ``sync_client_kwargs`` and ``async_client_kwargs`` to pass different arguments to synchronous and asynchronous clients. + """ async_client_kwargs: Optional[dict] = {} - """Additional kwargs to merge with client_kwargs before passing to the httpx + """Additional kwargs to merge with ``client_kwargs`` before passing to the httpx AsyncClient. For a full list of the params, see the `HTTPX documentation `__. + """ sync_client_kwargs: Optional[dict] = {} - """Additional kwargs to merge with client_kwargs before passing to the HTTPX Client. + """Additional kwargs to merge with ``client_kwargs`` before + passing to the HTTPX Client. For a full list of the params, see the `HTTPX documentation `__. + """ _client: Optional[Client] = PrivateAttr(default=None) - """ - The client to use for making requests. - """ + """The client to use for making requests.""" _async_client: Optional[AsyncClient] = PrivateAttr(default=None) - """ - The async client to use for making requests. - """ + """The async client to use for making requests.""" mirostat: Optional[int] = None """Enable Mirostat sampling for controlling perplexity. - (default: 0, 0 = disabled, 1 = Mirostat, 2 = Mirostat 2.0)""" + (default: ``0``, ``0`` = disabled, ``1`` = Mirostat, ``2`` = Mirostat 2.0)""" mirostat_eta: Optional[float] = None """Influences how quickly the algorithm responds to feedback from the generated text. A lower learning rate will result in slower adjustments, while a higher learning rate will make - the algorithm more responsive. (Default: 0.1)""" + the algorithm more responsive. (Default: ``0.1``)""" mirostat_tau: Optional[float] = None """Controls the balance between coherence and diversity of the output. A lower value will result in more focused and - coherent text. (Default: 5.0)""" + coherent text. (Default: ``5.0``)""" num_ctx: Optional[int] = None """Sets the size of the context window used to generate the - next token. (Default: 2048) """ + next token. (Default: ``2048``) """ num_gpu: Optional[int] = None - """The number of GPUs to use. On macOS it defaults to 1 to - enable metal support, 0 to disable.""" + """The number of GPUs to use. On macOS it defaults to ``1`` to + enable metal support, ``0`` to disable.""" keep_alive: Optional[int] = None - """controls how long the model will stay loaded into memory - following the request (default: 5m) + """Controls how long the model will stay loaded into memory + following the request (default: ``5m``) """ num_thread: Optional[int] = None @@ -203,34 +206,34 @@ class OllamaEmbeddings(BaseModel, Embeddings): repeat_last_n: Optional[int] = None """Sets how far back for the model to look back to prevent - repetition. (Default: 64, 0 = disabled, -1 = num_ctx)""" + repetition. (Default: ``64``, ``0`` = disabled, ``-1`` = ``num_ctx``)""" repeat_penalty: Optional[float] = None - """Sets how strongly to penalize repetitions. A higher value (e.g., 1.5) - will penalize repetitions more strongly, while a lower value (e.g., 0.9) - will be more lenient. (Default: 1.1)""" + """Sets how strongly to penalize repetitions. A higher value (e.g., ``1.5``) + will penalize repetitions more strongly, while a lower value (e.g., ``0.9``) + will be more lenient. (Default: ``1.1``)""" temperature: Optional[float] = None """The temperature of the model. Increasing the temperature will - make the model answer more creatively. (Default: 0.8)""" + make the model answer more creatively. (Default: ``0.8``)""" stop: Optional[list[str]] = None """Sets the stop tokens to use.""" tfs_z: Optional[float] = None """Tail free sampling is used to reduce the impact of less probable - tokens from the output. A higher value (e.g., 2.0) will reduce the - impact more, while a value of 1.0 disables this setting. (default: 1)""" + tokens from the output. A higher value (e.g., ``2.0``) will reduce the + impact more, while a value of ``1.0`` disables this setting. (default: ``1``)""" top_k: Optional[int] = None - """Reduces the probability of generating nonsense. A higher value (e.g. 100) - will give more diverse answers, while a lower value (e.g. 10) - will be more conservative. (Default: 40)""" + """Reduces the probability of generating nonsense. A higher value (e.g. ``100``) + will give more diverse answers, while a lower value (e.g. ``10``) + will be more conservative. (Default: ``40``)""" top_p: Optional[float] = None - """Works together with top-k. A higher value (e.g., 0.95) will lead - to more diverse text, while a lower value (e.g., 0.5) will - generate more focused and conservative text. (Default: 0.9)""" + """Works together with top-k. A higher value (e.g., ``0.95``) will lead + to more diverse text, while a lower value (e.g., ``0.5``) will + generate more focused and conservative text. (Default: ``0.9``)""" model_config = ConfigDict( extra="forbid", @@ -257,7 +260,7 @@ class OllamaEmbeddings(BaseModel, Embeddings): @model_validator(mode="after") def _set_clients(self) -> Self: - """Set clients to use for ollama.""" + """Set clients to use for Ollama.""" client_kwargs = self.client_kwargs or {} sync_client_kwargs = client_kwargs diff --git a/libs/partners/ollama/langchain_ollama/llms.py b/libs/partners/ollama/langchain_ollama/llms.py index b433606340d..5ca2c961a2d 100644 --- a/libs/partners/ollama/langchain_ollama/llms.py +++ b/libs/partners/ollama/langchain_ollama/llms.py @@ -61,26 +61,26 @@ class OllamaLLM(BaseLLM): mirostat: Optional[int] = None """Enable Mirostat sampling for controlling perplexity. - (default: 0, 0 = disabled, 1 = Mirostat, 2 = Mirostat 2.0)""" + (default: ``0``, ``0`` = disabled, ``1`` = Mirostat, ``2`` = Mirostat 2.0)""" mirostat_eta: Optional[float] = None """Influences how quickly the algorithm responds to feedback from the generated text. A lower learning rate will result in slower adjustments, while a higher learning rate will make - the algorithm more responsive. (Default: 0.1)""" + the algorithm more responsive. (Default: ``0.1``)""" mirostat_tau: Optional[float] = None """Controls the balance between coherence and diversity of the output. A lower value will result in more focused and - coherent text. (Default: 5.0)""" + coherent text. (Default: ``5.0``)""" num_ctx: Optional[int] = None """Sets the size of the context window used to generate the - next token. (Default: 2048)""" + next token. (Default: ``2048``)""" num_gpu: Optional[int] = None - """The number of GPUs to use. On macOS it defaults to 1 to - enable metal support, 0 to disable.""" + """The number of GPUs to use. On macOS it defaults to ``1`` to + enable metal support, ``0`` to disable.""" num_thread: Optional[int] = None """Sets the number of threads to use during computation. @@ -90,20 +90,20 @@ class OllamaLLM(BaseLLM): num_predict: Optional[int] = None """Maximum number of tokens to predict when generating text. - (Default: 128, -1 = infinite generation, -2 = fill context)""" + (Default: ``128``, ``-1`` = infinite generation, ``-2`` = fill context)""" repeat_last_n: Optional[int] = None """Sets how far back for the model to look back to prevent - repetition. (Default: 64, 0 = disabled, -1 = num_ctx)""" + repetition. (Default: ``64``, ``0`` = disabled, ``-1`` = ``num_ctx``)""" repeat_penalty: Optional[float] = None - """Sets how strongly to penalize repetitions. A higher value (e.g., 1.5) - will penalize repetitions more strongly, while a lower value (e.g., 0.9) - will be more lenient. (Default: 1.1)""" + """Sets how strongly to penalize repetitions. A higher value (e.g., ``1.5``) + will penalize repetitions more strongly, while a lower value (e.g., ``0.9``) + will be more lenient. (Default: ``1.1``)""" temperature: Optional[float] = None """The temperature of the model. Increasing the temperature will - make the model answer more creatively. (Default: 0.8)""" + make the model answer more creatively. (Default: ``0.8``)""" seed: Optional[int] = None """Sets the random number seed to use for generation. Setting this @@ -115,21 +115,21 @@ class OllamaLLM(BaseLLM): tfs_z: Optional[float] = None """Tail free sampling is used to reduce the impact of less probable - tokens from the output. A higher value (e.g., 2.0) will reduce the - impact more, while a value of 1.0 disables this setting. (default: 1)""" + tokens from the output. A higher value (e.g., ``2.0``) will reduce the + impact more, while a value of 1.0 disables this setting. (default: ``1``)""" top_k: Optional[int] = None - """Reduces the probability of generating nonsense. A higher value (e.g. 100) - will give more diverse answers, while a lower value (e.g. 10) - will be more conservative. (Default: 40)""" + """Reduces the probability of generating nonsense. A higher value (e.g. ``100``) + will give more diverse answers, while a lower value (e.g. ``10``) + will be more conservative. (Default: ``40``)""" top_p: Optional[float] = None - """Works together with top-k. A higher value (e.g., 0.95) will lead - to more diverse text, while a lower value (e.g., 0.5) will - generate more focused and conservative text. (Default: 0.9)""" + """Works together with top-k. A higher value (e.g., ``0.95``) will lead + to more diverse text, while a lower value (e.g., ``0.5``) will + generate more focused and conservative text. (Default: ``0.9``)""" format: Literal["", "json"] = "" - """Specify the format of the output (options: json)""" + """Specify the format of the output (options: ``'json'``)""" keep_alive: Optional[Union[int, str]] = None """How long the model will stay loaded into memory.""" @@ -139,33 +139,35 @@ class OllamaLLM(BaseLLM): client_kwargs: Optional[dict] = {} """Additional kwargs to pass to the httpx clients. + These arguments are passed to both synchronous and async clients. - Use sync_client_kwargs and async_client_kwargs to pass different arguments + + Use ``sync_client_kwargs`` and ``async_client_kwargs`` to pass different arguments to synchronous and asynchronous clients. + """ async_client_kwargs: Optional[dict] = {} - """Additional kwargs to merge with client_kwargs before passing to the HTTPX + """Additional kwargs to merge with ``client_kwargs`` before passing to the HTTPX AsyncClient. For a full list of the params, see the `HTTPX documentation `__. + """ sync_client_kwargs: Optional[dict] = {} - """Additional kwargs to merge with client_kwargs before passing to the HTTPX Client. + """Additional kwargs to merge with ``client_kwargs`` before + passing to the HTTPX Client. For a full list of the params, see the `HTTPX documentation `__. + """ _client: Optional[Client] = PrivateAttr(default=None) - """ - The client to use for making requests. - """ + """The client to use for making requests.""" _async_client: Optional[AsyncClient] = PrivateAttr(default=None) - """ - The async client to use for making requests. - """ + """The async client to use for making requests.""" def _generate_params( self,