mirror of
https://github.com/hwchase17/langchain.git
synced 2025-06-21 06:14:37 +00:00
[feat] Added backwards compatibility for OllamaEmbeddings initialization (migration from langchain_community.embeddings
to langchain_ollama.embeddings
(#29296)
- [feat] **Added backwards compatibility for OllamaEmbeddings
initialization (migration from `langchain_community.embeddings` to
`langchain_ollama.embeddings`**: "langchain_ollama"
- **Description:** Given that `OllamaEmbeddings` from
`langchain_community.embeddings` is deprecated, code is being shifted to
``langchain_ollama.embeddings`. However, this does not offer backward
compatibility of initializing the parameters and `OllamaEmbeddings`
object.
- **Issue:** #29294
- **Dependencies:** None
- **Twitter handle:** @BaqarAbbas2001
## Additional Information
Previously, `OllamaEmbeddings` from `langchain_community.embeddings`
used to support the following options:
e9abe583b2/libs/community/langchain_community/embeddings/ollama.py (L125-L139)
However, in the new package `from langchain_ollama import
OllamaEmbeddings`, there is no method to set these options. I have added
these parameters to resolve this issue.
This issue was also discussed in
https://github.com/langchain-ai/langchain/discussions/29113
This commit is contained in:
parent
7a95ffc775
commit
f175319303
@ -1,9 +1,6 @@
|
|||||||
"""Ollama embeddings models."""
|
"""Ollama embeddings models."""
|
||||||
|
|
||||||
from typing import (
|
from typing import Any, Dict, List, Optional
|
||||||
List,
|
|
||||||
Optional,
|
|
||||||
)
|
|
||||||
|
|
||||||
from langchain_core.embeddings import Embeddings
|
from langchain_core.embeddings import Embeddings
|
||||||
from ollama import AsyncClient, Client
|
from ollama import AsyncClient, Client
|
||||||
@ -144,10 +141,89 @@ class OllamaEmbeddings(BaseModel, Embeddings):
|
|||||||
The async client to use for making requests.
|
The async client to use for making requests.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
mirostat: Optional[int] = None
|
||||||
|
"""Enable Mirostat sampling for controlling perplexity.
|
||||||
|
(default: 0, 0 = disabled, 1 = Mirostat, 2 = Mirostat 2.0)"""
|
||||||
|
|
||||||
|
mirostat_eta: Optional[float] = None
|
||||||
|
"""Influences how quickly the algorithm responds to feedback
|
||||||
|
from the generated text. A lower learning rate will result in
|
||||||
|
slower adjustments, while a higher learning rate will make
|
||||||
|
the algorithm more responsive. (Default: 0.1)"""
|
||||||
|
|
||||||
|
mirostat_tau: Optional[float] = None
|
||||||
|
"""Controls the balance between coherence and diversity
|
||||||
|
of the output. A lower value will result in more focused and
|
||||||
|
coherent text. (Default: 5.0)"""
|
||||||
|
|
||||||
|
num_ctx: Optional[int] = None
|
||||||
|
"""Sets the size of the context window used to generate the
|
||||||
|
next token. (Default: 2048) """
|
||||||
|
|
||||||
|
num_gpu: Optional[int] = None
|
||||||
|
"""The number of GPUs to use. On macOS it defaults to 1 to
|
||||||
|
enable metal support, 0 to disable."""
|
||||||
|
|
||||||
|
num_thread: Optional[int] = None
|
||||||
|
"""Sets the number of threads to use during computation.
|
||||||
|
By default, Ollama will detect this for optimal performance.
|
||||||
|
It is recommended to set this value to the number of physical
|
||||||
|
CPU cores your system has (as opposed to the logical number of cores)."""
|
||||||
|
|
||||||
|
repeat_last_n: Optional[int] = None
|
||||||
|
"""Sets how far back for the model to look back to prevent
|
||||||
|
repetition. (Default: 64, 0 = disabled, -1 = num_ctx)"""
|
||||||
|
|
||||||
|
repeat_penalty: Optional[float] = None
|
||||||
|
"""Sets how strongly to penalize repetitions. A higher value (e.g., 1.5)
|
||||||
|
will penalize repetitions more strongly, while a lower value (e.g., 0.9)
|
||||||
|
will be more lenient. (Default: 1.1)"""
|
||||||
|
|
||||||
|
temperature: Optional[float] = None
|
||||||
|
"""The temperature of the model. Increasing the temperature will
|
||||||
|
make the model answer more creatively. (Default: 0.8)"""
|
||||||
|
|
||||||
|
stop: Optional[List[str]] = None
|
||||||
|
"""Sets the stop tokens to use."""
|
||||||
|
|
||||||
|
tfs_z: Optional[float] = None
|
||||||
|
"""Tail free sampling is used to reduce the impact of less probable
|
||||||
|
tokens from the output. A higher value (e.g., 2.0) will reduce the
|
||||||
|
impact more, while a value of 1.0 disables this setting. (default: 1)"""
|
||||||
|
|
||||||
|
top_k: Optional[int] = None
|
||||||
|
"""Reduces the probability of generating nonsense. A higher value (e.g. 100)
|
||||||
|
will give more diverse answers, while a lower value (e.g. 10)
|
||||||
|
will be more conservative. (Default: 40)"""
|
||||||
|
|
||||||
|
top_p: Optional[float] = None
|
||||||
|
"""Works together with top-k. A higher value (e.g., 0.95) will lead
|
||||||
|
to more diverse text, while a lower value (e.g., 0.5) will
|
||||||
|
generate more focused and conservative text. (Default: 0.9)"""
|
||||||
|
|
||||||
model_config = ConfigDict(
|
model_config = ConfigDict(
|
||||||
extra="forbid",
|
extra="forbid",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def _default_params(self) -> Dict[str, Any]:
|
||||||
|
"""Get the default parameters for calling Ollama."""
|
||||||
|
return {
|
||||||
|
"mirostat": self.mirostat,
|
||||||
|
"mirostat_eta": self.mirostat_eta,
|
||||||
|
"mirostat_tau": self.mirostat_tau,
|
||||||
|
"num_ctx": self.num_ctx,
|
||||||
|
"num_gpu": self.num_gpu,
|
||||||
|
"num_thread": self.num_thread,
|
||||||
|
"repeat_last_n": self.repeat_last_n,
|
||||||
|
"repeat_penalty": self.repeat_penalty,
|
||||||
|
"temperature": self.temperature,
|
||||||
|
"stop": self.stop,
|
||||||
|
"tfs_z": self.tfs_z,
|
||||||
|
"top_k": self.top_k,
|
||||||
|
"top_p": self.top_p,
|
||||||
|
}
|
||||||
|
|
||||||
@model_validator(mode="after")
|
@model_validator(mode="after")
|
||||||
def _set_clients(self) -> Self:
|
def _set_clients(self) -> Self:
|
||||||
"""Set clients to use for ollama."""
|
"""Set clients to use for ollama."""
|
||||||
@ -158,7 +234,9 @@ class OllamaEmbeddings(BaseModel, Embeddings):
|
|||||||
|
|
||||||
def embed_documents(self, texts: List[str]) -> List[List[float]]:
|
def embed_documents(self, texts: List[str]) -> List[List[float]]:
|
||||||
"""Embed search docs."""
|
"""Embed search docs."""
|
||||||
embedded_docs = self._client.embed(self.model, texts)["embeddings"]
|
embedded_docs = self._client.embed(
|
||||||
|
self.model, texts, options=self._default_params
|
||||||
|
)["embeddings"]
|
||||||
return embedded_docs
|
return embedded_docs
|
||||||
|
|
||||||
def embed_query(self, text: str) -> List[float]:
|
def embed_query(self, text: str) -> List[float]:
|
||||||
|
Loading…
Reference in New Issue
Block a user