Added embeddings support for ollama (#10124)

- Description: Added support for Ollama embeddings - Issue: the issue # it fixes (if applicable), - Dependencies: N/A - Tag maintainer: for a quicker response, tag the relevant maintainer (see below), - Twitter handle: @herrjemand cc https://github.com/jmorganca/ollama/issues/436
2025-09-15 14:36:54 +00:00 · 2023-09-15 12:42:39 +12:00
parent 48a4efc51a
commit 5e50b89164
4 changed files with 458 additions and 4 deletions
--- a/libs/langchain/langchain/embeddings/init.py
+++ b/libs/langchain/langchain/embeddings/init.py
@@ -49,6 +49,7 @@ from langchain.embeddings.modelscope_hub import ModelScopeEmbeddings
 from langchain.embeddings.mosaicml import MosaicMLInstructorEmbeddings
 from langchain.embeddings.nlpcloud import NLPCloudEmbeddings
 from langchain.embeddings.octoai_embeddings import OctoAIEmbeddings
+from langchain.embeddings.ollama import OllamaEmbeddings
 from langchain.embeddings.openai import OpenAIEmbeddings
 from langchain.embeddings.sagemaker_endpoint import SagemakerEndpointEmbeddings
 from langchain.embeddings.self_hosted import SelfHostedEmbeddings
@@ -106,6 +107,7 @@ __all__ = [
    "AwaEmbeddings",
    "HuggingFaceBgeEmbeddings",
    "ErnieEmbeddings",
+    "OllamaEmbeddings",
    "QianfanEmbeddingsEndpoint",
 ]

--- a/libs/langchain/langchain/embeddings/ollama.py
+++ b/libs/langchain/langchain/embeddings/ollama.py
@@ -0,0 +1,205 @@
+from typing import Any, Dict, List, Mapping, Optional
+
+import requests
+
+from langchain.embeddings.base import Embeddings
+from langchain.pydantic_v1 import BaseModel, Extra
+
+
+class OllamaEmbeddings(BaseModel, Embeddings):
+    """Ollama locally runs large language models.
+
+    To use, follow the instructions at https://ollama.ai/.
+
+    Example:
+        .. code-block:: python
+
+            from langchain.embeddings import OllamaEmbeddings
+            ollama_emb = OllamaEmbeddings(
+                model="llama:7b",
+            )
+            r1 = ollama_emb.embed_documents(
+                [
+                    "Alpha is the first letter of Greek alphabet",
+                    "Beta is the second letter of Greek alphabet",
+                ]
+            )
+            r2 = ollama_emb.embed_query(
+                "What is the second letter of Greek alphabet"
+            )
+
+    """
+
+    base_url: str = "http://localhost:11434"
+    """Base url the model is hosted under."""
+    model: str = "llama2"
+    """Model name to use."""
+
+    embed_instruction: str = "passage: "
+    """Instruction used to embed documents."""
+    query_instruction: str = "query: "
+    """Instruction used to embed the query."""
+
+    mirostat: Optional[int]
+    """Enable Mirostat sampling for controlling perplexity.
+    (default: 0, 0 = disabled, 1 = Mirostat, 2 = Mirostat 2.0)"""
+
+    mirostat_eta: Optional[float]
+    """Influences how quickly the algorithm responds to feedback
+    from the generated text. A lower learning rate will result in
+    slower adjustments, while a higher learning rate will make
+    the algorithm more responsive. (Default: 0.1)"""
+
+    mirostat_tau: Optional[float]
+    """Controls the balance between coherence and diversity
+    of the output. A lower value will result in more focused and
+    coherent text. (Default: 5.0)"""
+
+    num_ctx: Optional[int]
+    """Sets the size of the context window used to generate the
+    next token. (Default: 2048)	"""
+
+    num_gpu: Optional[int]
+    """The number of GPUs to use. On macOS it defaults to 1 to
+    enable metal support, 0 to disable."""
+
+    num_thread: Optional[int]
+    """Sets the number of threads to use during computation.
+    By default, Ollama will detect this for optimal performance.
+    It is recommended to set this value to the number of physical
+    CPU cores your system has (as opposed to the logical number of cores)."""
+
+    repeat_last_n: Optional[int]
+    """Sets how far back for the model to look back to prevent
+    repetition. (Default: 64, 0 = disabled, -1 = num_ctx)"""
+
+    repeat_penalty: Optional[float]
+    """Sets how strongly to penalize repetitions. A higher value (e.g., 1.5)
+    will penalize repetitions more strongly, while a lower value (e.g., 0.9)
+    will be more lenient. (Default: 1.1)"""
+
+    temperature: Optional[float]
+    """The temperature of the model. Increasing the temperature will
+    make the model answer more creatively. (Default: 0.8)"""
+
+    stop: Optional[List[str]]
+    """Sets the stop tokens to use."""
+
+    tfs_z: Optional[float]
+    """Tail free sampling is used to reduce the impact of less probable
+    tokens from the output. A higher value (e.g., 2.0) will reduce the
+    impact more, while a value of 1.0 disables this setting. (default: 1)"""
+
+    top_k: Optional[int]
+    """Reduces the probability of generating nonsense. A higher value (e.g. 100)
+    will give more diverse answers, while a lower value (e.g. 10)
+    will be more conservative. (Default: 40)"""
+
+    top_p: Optional[int]
+    """Works together with top-k. A higher value (e.g., 0.95) will lead
+    to more diverse text, while a lower value (e.g., 0.5) will
+    generate more focused and conservative text. (Default: 0.9)"""
+
+    @property
+    def _default_params(self) -> Dict[str, Any]:
+        """Get the default parameters for calling Ollama."""
+        return {
+            "model": self.model,
+            "options": {
+                "mirostat": self.mirostat,
+                "mirostat_eta": self.mirostat_eta,
+                "mirostat_tau": self.mirostat_tau,
+                "num_ctx": self.num_ctx,
+                "num_gpu": self.num_gpu,
+                "num_thread": self.num_thread,
+                "repeat_last_n": self.repeat_last_n,
+                "repeat_penalty": self.repeat_penalty,
+                "temperature": self.temperature,
+                "stop": self.stop,
+                "tfs_z": self.tfs_z,
+                "top_k": self.top_k,
+                "top_p": self.top_p,
+            },
+        }
+
+    model_kwargs: Optional[dict] = None
+    """Other model keyword args"""
+
+    @property
+    def _identifying_params(self) -> Mapping[str, Any]:
+        """Get the identifying parameters."""
+        return {**{"model": self.model}, **self._default_params}
+
+    class Config:
+        """Configuration for this pydantic object."""
+
+        extra = Extra.forbid
+
+    def _process_emb_response(self, input: str) -> List[float]:
+        """Process a response from the API.
+
+        Args:
+            response: The response from the API.
+
+        Returns:
+            The response as a dictionary.
+        """
+        headers = {
+            "Content-Type": "application/json",
+        }
+
+        try:
+            res = requests.post(
+                f"{self.base_url}/api/embeddings",
+                headers=headers,
+                json={"model": self.model, "prompt": input, **self._default_params},
+            )
+        except requests.exceptions.RequestException as e:
+            raise ValueError(f"Error raised by inference endpoint: {e}")
+
+        if res.status_code != 200:
+            raise ValueError(
+                "Error raised by inference API HTTP code: %s, %s"
+                % (res.status_code, res.text)
+            )
+        try:
+            t = res.json()
+            return t["embedding"]
+        except requests.exceptions.JSONDecodeError as e:
+            raise ValueError(
+                f"Error raised by inference API: {e}.\nResponse: {res.text}"
+            )
+
+    def _embed(self, input: List[str]) -> List[List[float]]:
+        embeddings_list: List[List[float]] = []
+        for prompt in input:
+            embeddings = self._process_emb_response(prompt)
+            embeddings_list.append(embeddings)
+
+        return embeddings_list
+
+    def embed_documents(self, texts: List[str]) -> List[List[float]]:
+        """Embed documents using a Ollama deployed embedding model.
+
+        Args:
+            texts: The list of texts to embed.
+
+        Returns:
+            List of embeddings, one for each text.
+        """
+        instruction_pairs = [f"{self.embed_instruction}{text}" for text in texts]
+        embeddings = self._embed(instruction_pairs)
+        return embeddings
+
+    def embed_query(self, text: str) -> List[float]:
+        """Embed a query using a Ollama deployed embedding model.
+
+        Args:
+            text: The text to embed.
+
+        Returns:
+            Embeddings for the text.
+        """
+        instruction_pair = f"{self.query_instruction}{text}"
+        embedding = self._embed([instruction_pair])[0]
+        return embedding