feat: FastEmbed embedding provider (#13109)

## Description: This PR intends to add [Qdrant/FastEmbed](https://qdrant.github.io/fastembed/) as a local embeddings provider, associated tests and documentation. **Documentation preview:** https://langchain-git-fork-anush008-master-langchain.vercel.app/docs/integrations/text_embedding/fastembed --------- Co-authored-by: Eugene Yurtsev <eyurtsev@gmail.com>
2025-07-01 10:54:15 +00:00 · 2023-11-11 00:21:52 +05:30 · 2023-11-11 00:21:52 +05:30 · 52f34de9b7
commit 52f34de9b7
parent b0e8cbe0b3
5 changed files with 341 additions and 0 deletions
--- a/docs/docs/integrations/text_embedding/fastembed.ipynb
+++ b/docs/docs/integrations/text_embedding/fastembed.ipynb
@ -0,0 +1,154 @@
 {
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Qdrant FastEmbed\n",
    "\n",
    "[FastEmbed](https://qdrant.github.io/fastembed/) is a lightweight, fast, Python library built for embedding generation. \n",
    "\n",
    "- Quantized model weights\n",
    "- ONNX Runtime, no PyTorch dependency\n",
    "- CPU-first design\n",
    "- Data-parallelism for encoding of large datasets."
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "id": "2a773d8d",
   "metadata": {},
   "source": [
    "## Dependencies\n",
    "\n",
    "To use FastEmbed with LangChain, install the `fastembed` Python package."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "91ea14ce-831d-409a-a88f-30353acdabd1",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "%pip install fastembed"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "id": "426f1156",
   "metadata": {},
   "source": [
    "## Imports"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "3f5dc9d7-65e3-4b5b-9086-3327d016cfe0",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "from langchain.embeddings.fastembed import FastEmbedEmbeddings"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Instantiating FastEmbed\n",
    "   \n",
    "### Parameters\n",
    "- `model_name: str` (default: \"BAAI/bge-small-en-v1.5\")\n",
    "    > Name of the FastEmbedding model to use. You can find the list of supported models [here](https://qdrant.github.io/fastembed/examples/Supported_Models/).\n",
    "\n",
    "- `max_length: int` (default: 512)\n",
    "    > The maximum number of tokens. Unknown behavior for values > 512.\n",
    "\n",
    "- `cache_dir: Optional[str]`\n",
    "    > The path to the cache directory. Defaults to `local_cache` in the parent directory.\n",
    "\n",
    "- `threads: Optional[int]`\n",
    "    > The number of threads a single onnxruntime session can use. Defaults to None.\n",
    "\n",
    "- `doc_embed_type: Literal[\"default\", \"passage\"]` (default: \"default\")\n",
    "    > \"default\": Uses FastEmbed's default embedding method.\n",
    "    \n",
    "    > \"passage\": Prefixes the text with \"passage\" before embedding."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "6fb585dd",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "embeddings = FastEmbedEmbeddings()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Usage\n",
    "\n",
    "### Generating document embeddings"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "document_embeddings = embeddings.embed_documents([\"This is a document\", \"This is some other document\"])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Generating query embeddings"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "query_embeddings = embeddings.embed_query(\"This is a query\")"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
 }
--- a/libs/langchain/langchain/embeddings/init.py
+++ b/libs/langchain/langchain/embeddings/init.py
@ -32,6 +32,7 @@ from langchain.embeddings.elasticsearch import ElasticsearchEmbeddings
 from langchain.embeddings.embaas import EmbaasEmbeddings
 from langchain.embeddings.ernie import ErnieEmbeddings
 from langchain.embeddings.fake import DeterministicFakeEmbedding, FakeEmbeddings
 from langchain.embeddings.fastembed import FastEmbedEmbeddings
 from langchain.embeddings.google_palm import GooglePalmEmbeddings
 from langchain.embeddings.gpt4all import GPT4AllEmbeddings
 from langchain.embeddings.gradient_ai import GradientEmbeddings
@ -77,6 +78,7 @@ __all__ = [
    "ClarifaiEmbeddings",
    "CohereEmbeddings",
    "ElasticsearchEmbeddings",
    "FastEmbedEmbeddings",
    "HuggingFaceEmbeddings",
    "HuggingFaceInferenceAPIEmbeddings",
    "GradientEmbeddings",
--- a/libs/langchain/langchain/embeddings/fastembed.py
+++ b/libs/langchain/langchain/embeddings/fastembed.py
@ -0,0 +1,108 @@
 from typing import Any, Dict, List, Literal, Optional
 import numpy as np
 from langchain.pydantic_v1 import BaseModel, Extra, root_validator
 from langchain.schema.embeddings import Embeddings
 class FastEmbedEmbeddings(BaseModel, Embeddings):
    """Qdrant FastEmbedding models.
    FastEmbed is a lightweight, fast, Python library built for embedding generation.
    See more documentation at:
    * https://github.com/qdrant/fastembed/
    * https://qdrant.github.io/fastembed/
    To use this class, you must install the `fastembed` Python package.
    `pip install fastembed`
    Example:
        from langchain.embeddings import FastEmbedEmbeddings
        fastembed = FastEmbedEmbeddings()
    """
    model_name: str = "BAAI/bge-small-en-v1.5"
    """Name of the FastEmbedding model to use
    Defaults to "BAAI/bge-small-en-v1.5"
    Find the list of supported models at
    https://qdrant.github.io/fastembed/examples/Supported_Models/
    """
    max_length: int = 512
    """The maximum number of tokens. Defaults to 512.
    Unknown behavior for values > 512.
    """
    cache_dir: Optional[str]
    """The path to the cache directory.
    Defaults to `local_cache` in the parent directory
    """
    threads: Optional[int]
    """The number of threads single onnxruntime session can use.
    Defaults to None
    """
    doc_embed_type: Literal["default", "passage"] = "default"
    """Type of embedding to use for documents
    "default": Uses FastEmbed's default embedding method
    "passage": Prefixes the text with "passage" before embedding.
    """
    _model: Any  # : :meta private:
    class Config:
        """Configuration for this pydantic object."""
        extra = Extra.forbid
    @root_validator()
    def validate_environment(cls, values: Dict) -> Dict:
        """Validate that FastEmbed has been installed."""
        try:
            from fastembed.embedding import FlagEmbedding
            model_name = values.get("model_name")
            max_length = values.get("max_length")
            cache_dir = values.get("cache_dir")
            threads = values.get("threads")
            values["_model"] = FlagEmbedding(
                model_name=model_name,
                max_length=max_length,
                cache_dir=cache_dir,
                threads=threads,
            )
        except ImportError as ie:
            raise ImportError(
                "Could not import 'fastembed' Python package. "
                "Please install it with `pip install fastembed`."
            ) from ie
        return values
    def embed_documents(self, texts: List[str]) -> List[List[float]]:
        """Generate embeddings for documents using FastEmbed.
        Args:
            texts: The list of texts to embed.
        Returns:
            List of embeddings, one for each text.
        """
        embeddings: List[np.ndarray]
        if self.doc_embed_type == "passage":
            embeddings = self._model.passage_embed(texts)
        else:
            embeddings = self._model.embed(texts)
        return [e.tolist() for e in embeddings]
    def embed_query(self, text: str) -> List[float]:
        """Generate query embeddings using FastEmbed.
        Args:
            text: The text to embed.
        Returns:
            Embeddings for the text.
        """
        query_embeddings: np.ndarray = next(self._model.query_embed(text))
        return query_embeddings.tolist()
--- a/libs/langchain/tests/integration_tests/embeddings/test_fastembed.py
+++ b/libs/langchain/tests/integration_tests/embeddings/test_fastembed.py
@ -0,0 +1,76 @@
 """Test FastEmbed embeddings."""
 import pytest
 from langchain.embeddings.fastembed import FastEmbedEmbeddings
@pytest.mark.parametrize(
    "model_name", ["sentence-transformers/all-MiniLM-L6-v2", "BAAI/bge-small-en-v1.5"]
 )
@pytest.mark.parametrize("max_length", [50, 512])
@pytest.mark.parametrize("doc_embed_type", ["default", "passage"])
@pytest.mark.parametrize("threads", [0, 10])
 def test_fastembed_embedding_documents(
    model_name: str, max_length: int, doc_embed_type: str, threads: int
 ) -> None:
    """Test fastembed embeddings for documents."""
    documents = ["foo bar", "bar foo"]
    embedding = FastEmbedEmbeddings(
        model_name=model_name,
        max_length=max_length,
        doc_embed_type=doc_embed_type,
        threads=threads,
    )
    output = embedding.embed_documents(documents)
    assert len(output) == 2
    assert len(output[0]) == 384
@pytest.mark.parametrize(
    "model_name", ["sentence-transformers/all-MiniLM-L6-v2", "BAAI/bge-small-en-v1.5"]
 )
@pytest.mark.parametrize("max_length", [50, 512])
 def test_fastembed_embedding_query(model_name: str, max_length: int) -> None:
    """Test fastembed embeddings for query."""
    document = "foo bar"
    embedding = FastEmbedEmbeddings(model_name=model_name, max_length=max_length)
    output = embedding.embed_query(document)
    assert len(output) == 384
@pytest.mark.asyncio
@pytest.mark.parametrize(
    "model_name", ["sentence-transformers/all-MiniLM-L6-v2", "BAAI/bge-small-en-v1.5"]
 )
@pytest.mark.parametrize("max_length", [50, 512])
@pytest.mark.parametrize("doc_embed_type", ["default", "passage"])
@pytest.mark.parametrize("threads", [0, 10])
 async def test_fastembed_async_embedding_documents(
    model_name: str, max_length: int, doc_embed_type: str, threads: int
 ) -> None:
    """Test fastembed embeddings for documents."""
    documents = ["foo bar", "bar foo"]
    embedding = FastEmbedEmbeddings(
        model_name=model_name,
        max_length=max_length,
        doc_embed_type=doc_embed_type,
        threads=threads,
    )
    output = await embedding.aembed_documents(documents)
    assert len(output) == 2
    assert len(output[0]) == 384
@pytest.mark.asyncio
@pytest.mark.parametrize(
    "model_name", ["sentence-transformers/all-MiniLM-L6-v2", "BAAI/bge-small-en-v1.5"]
 )
@pytest.mark.parametrize("max_length", [50, 512])
 async def test_fastembed_async_embedding_query(
    model_name: str, max_length: int
 ) -> None:
    """Test fastembed embeddings for query."""
    document = "foo bar"
    embedding = FastEmbedEmbeddings(model_name=model_name, max_length=max_length)
    output = await embedding.aembed_query(document)
    assert len(output) == 384
--- a/libs/langchain/tests/unit_tests/embeddings/test_imports.py
+++ b/libs/langchain/tests/unit_tests/embeddings/test_imports.py
@ -7,6 +7,7 @@ EXPECTED_ALL = [
    "ClarifaiEmbeddings",
    "CohereEmbeddings",
    "ElasticsearchEmbeddings",
    "FastEmbedEmbeddings",
    "HuggingFaceEmbeddings",
    "HuggingFaceInferenceAPIEmbeddings",
    "GradientEmbeddings",