qdrant: New sparse embeddings provider interface - PART 1 (#24015)

## Description

This PR introduces a new sparse embedding provider interface to work
with the new Qdrant implementation that will follow this PR.

Additionally, an implementation of this interface is provided with
https://github.com/qdrant/fastembed.

This PR will be followed by
https://github.com/Anush008/langchain/pull/3.
This commit is contained in:
Anush
2024-07-12 05:37:25 +05:30
committed by GitHub
parent 1132fb801b
commit 7790d67f94
8 changed files with 828 additions and 8 deletions

View File

@@ -0,0 +1,36 @@
from abc import ABC, abstractmethod
from typing import List
from langchain_core.runnables.config import run_in_executor
from pydantic import BaseModel, Field
class SparseVector(BaseModel, extra="forbid"):
"""
Sparse vector structure
"""
indices: List[int] = Field(..., description="indices must be unique")
values: List[float] = Field(
..., description="values and indices must be the same length"
)
class SparseEmbeddings(ABC):
"""An interface for sparse embedding models to use with Qdrant."""
@abstractmethod
def embed_documents(self, texts: List[str]) -> List[SparseVector]:
"""Embed search docs."""
@abstractmethod
def embed_query(self, text: str) -> SparseVector:
"""Embed query text."""
async def aembed_documents(self, texts: List[str]) -> List[SparseVector]:
"""Asynchronous Embed search docs."""
return await run_in_executor(None, self.embed_documents, texts)
async def aembed_query(self, text: str) -> SparseVector:
"""Asynchronous Embed query text."""
return await run_in_executor(None, self.embed_query, text)