mirror of
https://github.com/hwchase17/langchain.git
synced 2025-07-19 11:08:55 +00:00
## Description This PR introduces a new sparse embedding provider interface to work with the new Qdrant implementation that will follow this PR. Additionally, an implementation of this interface is provided with https://github.com/qdrant/fastembed. This PR will be followed by https://github.com/Anush008/langchain/pull/3.
37 lines
1.1 KiB
Python
37 lines
1.1 KiB
Python
from abc import ABC, abstractmethod
|
|
from typing import List
|
|
|
|
from langchain_core.runnables.config import run_in_executor
|
|
from pydantic import BaseModel, Field
|
|
|
|
|
|
class SparseVector(BaseModel, extra="forbid"):
|
|
"""
|
|
Sparse vector structure
|
|
"""
|
|
|
|
indices: List[int] = Field(..., description="indices must be unique")
|
|
values: List[float] = Field(
|
|
..., description="values and indices must be the same length"
|
|
)
|
|
|
|
|
|
class SparseEmbeddings(ABC):
|
|
"""An interface for sparse embedding models to use with Qdrant."""
|
|
|
|
@abstractmethod
|
|
def embed_documents(self, texts: List[str]) -> List[SparseVector]:
|
|
"""Embed search docs."""
|
|
|
|
@abstractmethod
|
|
def embed_query(self, text: str) -> SparseVector:
|
|
"""Embed query text."""
|
|
|
|
async def aembed_documents(self, texts: List[str]) -> List[SparseVector]:
|
|
"""Asynchronous Embed search docs."""
|
|
return await run_in_executor(None, self.embed_documents, texts)
|
|
|
|
async def aembed_query(self, text: str) -> SparseVector:
|
|
"""Asynchronous Embed query text."""
|
|
return await run_in_executor(None, self.embed_query, text)
|