qdrant: new Qdrant implementation (#24164)

This commit is contained in:
Anush 2024-07-12 08:22:02 +05:30 committed by GitHub
parent 35784d1c33
commit 7014d07cab
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 1082 additions and 35 deletions

View File

@ -1,5 +1,13 @@
from langchain_qdrant.fastembed_sparse import FastEmbedSparse
from langchain_qdrant.qdrant import QdrantVectorStore, RetrievalMode
from langchain_qdrant.sparse_embeddings import SparseEmbeddings, SparseVector
from langchain_qdrant.vectorstores import Qdrant
__all__ = ["Qdrant", "SparseEmbeddings", "SparseVector", "FastEmbedSparse"]
__all__ = [
"Qdrant",
"QdrantVectorStore",
"SparseEmbeddings",
"SparseVector",
"FastEmbedSparse",
"RetrievalMode",
]

File diff suppressed because it is too large Load Diff

View File

@ -23,6 +23,7 @@ from typing import (
)
import numpy as np
from langchain_core._api.deprecation import deprecated
from langchain_core.documents import Document
from langchain_core.embeddings import Embeddings
from langchain_core.runnables.config import run_in_executor
@ -65,6 +66,7 @@ def sync_call_fallback(method: Callable) -> Callable:
return wrapper
@deprecated(since="0.1.2", alternative="QdrantVectorStore", removal="0.5.0")
class Qdrant(VectorStore):
"""`Qdrant` vector store.

View File

@ -207,13 +207,13 @@ test = ["pytest (>=6)"]
[[package]]
name = "fastembed"
version = "0.3.1"
version = "0.3.3"
description = "Fast, light, accurate library built for retrieval embedding generation"
optional = true
python-versions = "<3.13,>=3.8.0"
files = [
{file = "fastembed-0.3.1-py3-none-any.whl", hash = "sha256:5e9fecf6031a010eff75c02986f556d0635102210c0b1765e784cfca07df8975"},
{file = "fastembed-0.3.1.tar.gz", hash = "sha256:01a52c5c9cdfe2504dfa56b3c1b032d1ad88d621de144fc9a571371a79df6457"},
{file = "fastembed-0.3.3-py3-none-any.whl", hash = "sha256:1282a44dac187bbb0765b7e05a0612f1a7eb28c0c65ccb10b2b963467807fef6"},
{file = "fastembed-0.3.3.tar.gz", hash = "sha256:2735777d8c4462a92ec289ad99534242deb2073cede7d4ad3b37d070994d767f"},
]
[package.dependencies]
@ -622,7 +622,7 @@ files = [
[[package]]
name = "langchain-core"
version = "0.2.11"
version = "0.2.16"
description = "Building applications with LLMs through composability"
optional = false
python-versions = ">=3.8.1,<4.0"
@ -1530,13 +1530,13 @@ files = [
[[package]]
name = "qdrant-client"
version = "1.10.0"
version = "1.10.1"
description = "Client library for the Qdrant vector search engine"
optional = false
python-versions = ">=3.8"
files = [
{file = "qdrant_client-1.10.0-py3-none-any.whl", hash = "sha256:423c2586709ccf3db20850cd85c3d18954692a8faff98367dfa9dc82ab7f91d9"},
{file = "qdrant_client-1.10.0.tar.gz", hash = "sha256:47c4f7abfab152fb7e5e4902ab0e2e9e33483c49ea5e80128ccd0295f342cf9b"},
{file = "qdrant_client-1.10.1-py3-none-any.whl", hash = "sha256:b9fb8fe50dd168d92b2998be7c6135d5a229b3a3258ad158cc69c8adf9ff1810"},
{file = "qdrant_client-1.10.1.tar.gz", hash = "sha256:2284c8c5bb1defb0d9dbacb07d16f344972f395f4f2ed062318476a7951fd84c"},
]
[package.dependencies]
@ -1932,4 +1932,4 @@ fastembed = ["fastembed"]
[metadata]
lock-version = "2.0"
python-versions = ">=3.8.1,<4.0"
content-hash = "192e50ec7768ff659cfe7d2bbc75497dbeba3ee9f1e245d12f2eb3cdf7fe07d9"
content-hash = "a73d49a493892731f3fccc06217626d20b8114bc83ad107bc7336eabca479988"

View File

@ -23,9 +23,9 @@ disallow_untyped_defs = true
[tool.poetry.dependencies]
python = ">=3.8.1,<4.0"
langchain-core = ">=0.1.52,<0.3"
qdrant-client = "^1.9.0"
qdrant-client = "^1.10.1"
pydantic = "^2.7.4"
fastembed = { version = "^0.3.1", python = ">=3.8.1,<3.13", optional = true}
fastembed = { version = "^0.3.3", python = ">=3.8.1,<3.13", optional = true}
[tool.poetry.extras]
fastembed = [ "fastembed"]

View File

@ -31,29 +31,7 @@ def assert_documents_equals(actual: List[Document], expected: List[Document]):
assert actual_doc.metadata == expected_doc.metadata
class FakeEmbeddings(Embeddings):
"""Fake embeddings functionality for testing."""
def embed_documents(self, texts: List[str]) -> List[List[float]]:
"""Return simple embeddings.
Embeddings encode each text as its index."""
return [[float(1.0)] * 9 + [float(i)] for i in range(len(texts))]
async def aembed_documents(self, texts: List[str]) -> List[List[float]]:
return self.embed_documents(texts)
def embed_query(self, text: str) -> List[float]:
"""Return constant query embeddings.
Embeddings are identical to embed_documents(texts)[0].
Distance to each text will be that text's index,
as it was passed to embed_documents."""
return [float(1.0)] * 9 + [float(0.0)]
async def aembed_query(self, text: str) -> List[float]:
return self.embed_query(text)
class ConsistentFakeEmbeddings(FakeEmbeddings):
class ConsistentFakeEmbeddings(Embeddings):
"""Fake embeddings which remember all the texts seen so far to return consistent
vectors for the same texts."""

View File

@ -3,6 +3,8 @@ import pytest
from langchain_qdrant import FastEmbedSparse
pytest.importorskip("fastembed", reason="'fastembed' package is not installed")
@pytest.mark.parametrize(
"model_name", ["Qdrant/bm25", "Qdrant/bm42-all-minilm-l6-v2-attentions"]

View File

@ -1,6 +1,13 @@
from langchain_qdrant import __all__
EXPECTED_ALL = ["Qdrant", "SparseEmbeddings", "SparseVector", "FastEmbedSparse"]
EXPECTED_ALL = [
"Qdrant",
"QdrantVectorStore",
"SparseEmbeddings",
"SparseVector",
"FastEmbedSparse",
"RetrievalMode",
]
def test_all_imports() -> None: