mirror of
https://github.com/hwchase17/langchain.git
synced 2025-09-12 21:11:43 +00:00
qdrant: New sparse embeddings provider interface - PART 1 (#24015)
## Description This PR introduces a new sparse embedding provider interface to work with the new Qdrant implementation that will follow this PR. Additionally, an implementation of this interface is provided with https://github.com/qdrant/fastembed. This PR will be followed by https://github.com/Anush008/langchain/pull/3.
This commit is contained in:
36
libs/partners/qdrant/langchain_qdrant/sparse_embeddings.py
Normal file
36
libs/partners/qdrant/langchain_qdrant/sparse_embeddings.py
Normal file
@@ -0,0 +1,36 @@
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import List
|
||||
|
||||
from langchain_core.runnables.config import run_in_executor
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
|
||||
class SparseVector(BaseModel, extra="forbid"):
|
||||
"""
|
||||
Sparse vector structure
|
||||
"""
|
||||
|
||||
indices: List[int] = Field(..., description="indices must be unique")
|
||||
values: List[float] = Field(
|
||||
..., description="values and indices must be the same length"
|
||||
)
|
||||
|
||||
|
||||
class SparseEmbeddings(ABC):
|
||||
"""An interface for sparse embedding models to use with Qdrant."""
|
||||
|
||||
@abstractmethod
|
||||
def embed_documents(self, texts: List[str]) -> List[SparseVector]:
|
||||
"""Embed search docs."""
|
||||
|
||||
@abstractmethod
|
||||
def embed_query(self, text: str) -> SparseVector:
|
||||
"""Embed query text."""
|
||||
|
||||
async def aembed_documents(self, texts: List[str]) -> List[SparseVector]:
|
||||
"""Asynchronous Embed search docs."""
|
||||
return await run_in_executor(None, self.embed_documents, texts)
|
||||
|
||||
async def aembed_query(self, text: str) -> SparseVector:
|
||||
"""Asynchronous Embed query text."""
|
||||
return await run_in_executor(None, self.embed_query, text)
|
Reference in New Issue
Block a user