qdrant: new Qdrant implementation (#24164)

This commit is contained in:
Anush 2024-07-12 08:22:02 +05:30 committed by GitHub
parent 35784d1c33
commit 7014d07cab
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 1082 additions and 35 deletions

View File

@ -1,5 +1,13 @@
from langchain_qdrant.fastembed_sparse import FastEmbedSparse from langchain_qdrant.fastembed_sparse import FastEmbedSparse
from langchain_qdrant.qdrant import QdrantVectorStore, RetrievalMode
from langchain_qdrant.sparse_embeddings import SparseEmbeddings, SparseVector from langchain_qdrant.sparse_embeddings import SparseEmbeddings, SparseVector
from langchain_qdrant.vectorstores import Qdrant from langchain_qdrant.vectorstores import Qdrant
__all__ = ["Qdrant", "SparseEmbeddings", "SparseVector", "FastEmbedSparse"] __all__ = [
"Qdrant",
"QdrantVectorStore",
"SparseEmbeddings",
"SparseVector",
"FastEmbedSparse",
"RetrievalMode",
]

File diff suppressed because it is too large Load Diff

View File

@ -23,6 +23,7 @@ from typing import (
) )
import numpy as np import numpy as np
from langchain_core._api.deprecation import deprecated
from langchain_core.documents import Document from langchain_core.documents import Document
from langchain_core.embeddings import Embeddings from langchain_core.embeddings import Embeddings
from langchain_core.runnables.config import run_in_executor from langchain_core.runnables.config import run_in_executor
@ -65,6 +66,7 @@ def sync_call_fallback(method: Callable) -> Callable:
return wrapper return wrapper
@deprecated(since="0.1.2", alternative="QdrantVectorStore", removal="0.5.0")
class Qdrant(VectorStore): class Qdrant(VectorStore):
"""`Qdrant` vector store. """`Qdrant` vector store.

View File

@ -207,13 +207,13 @@ test = ["pytest (>=6)"]
[[package]] [[package]]
name = "fastembed" name = "fastembed"
version = "0.3.1" version = "0.3.3"
description = "Fast, light, accurate library built for retrieval embedding generation" description = "Fast, light, accurate library built for retrieval embedding generation"
optional = true optional = true
python-versions = "<3.13,>=3.8.0" python-versions = "<3.13,>=3.8.0"
files = [ files = [
{file = "fastembed-0.3.1-py3-none-any.whl", hash = "sha256:5e9fecf6031a010eff75c02986f556d0635102210c0b1765e784cfca07df8975"}, {file = "fastembed-0.3.3-py3-none-any.whl", hash = "sha256:1282a44dac187bbb0765b7e05a0612f1a7eb28c0c65ccb10b2b963467807fef6"},
{file = "fastembed-0.3.1.tar.gz", hash = "sha256:01a52c5c9cdfe2504dfa56b3c1b032d1ad88d621de144fc9a571371a79df6457"}, {file = "fastembed-0.3.3.tar.gz", hash = "sha256:2735777d8c4462a92ec289ad99534242deb2073cede7d4ad3b37d070994d767f"},
] ]
[package.dependencies] [package.dependencies]
@ -622,7 +622,7 @@ files = [
[[package]] [[package]]
name = "langchain-core" name = "langchain-core"
version = "0.2.11" version = "0.2.16"
description = "Building applications with LLMs through composability" description = "Building applications with LLMs through composability"
optional = false optional = false
python-versions = ">=3.8.1,<4.0" python-versions = ">=3.8.1,<4.0"
@ -1530,13 +1530,13 @@ files = [
[[package]] [[package]]
name = "qdrant-client" name = "qdrant-client"
version = "1.10.0" version = "1.10.1"
description = "Client library for the Qdrant vector search engine" description = "Client library for the Qdrant vector search engine"
optional = false optional = false
python-versions = ">=3.8" python-versions = ">=3.8"
files = [ files = [
{file = "qdrant_client-1.10.0-py3-none-any.whl", hash = "sha256:423c2586709ccf3db20850cd85c3d18954692a8faff98367dfa9dc82ab7f91d9"}, {file = "qdrant_client-1.10.1-py3-none-any.whl", hash = "sha256:b9fb8fe50dd168d92b2998be7c6135d5a229b3a3258ad158cc69c8adf9ff1810"},
{file = "qdrant_client-1.10.0.tar.gz", hash = "sha256:47c4f7abfab152fb7e5e4902ab0e2e9e33483c49ea5e80128ccd0295f342cf9b"}, {file = "qdrant_client-1.10.1.tar.gz", hash = "sha256:2284c8c5bb1defb0d9dbacb07d16f344972f395f4f2ed062318476a7951fd84c"},
] ]
[package.dependencies] [package.dependencies]
@ -1932,4 +1932,4 @@ fastembed = ["fastembed"]
[metadata] [metadata]
lock-version = "2.0" lock-version = "2.0"
python-versions = ">=3.8.1,<4.0" python-versions = ">=3.8.1,<4.0"
content-hash = "192e50ec7768ff659cfe7d2bbc75497dbeba3ee9f1e245d12f2eb3cdf7fe07d9" content-hash = "a73d49a493892731f3fccc06217626d20b8114bc83ad107bc7336eabca479988"

View File

@ -23,9 +23,9 @@ disallow_untyped_defs = true
[tool.poetry.dependencies] [tool.poetry.dependencies]
python = ">=3.8.1,<4.0" python = ">=3.8.1,<4.0"
langchain-core = ">=0.1.52,<0.3" langchain-core = ">=0.1.52,<0.3"
qdrant-client = "^1.9.0" qdrant-client = "^1.10.1"
pydantic = "^2.7.4" pydantic = "^2.7.4"
fastembed = { version = "^0.3.1", python = ">=3.8.1,<3.13", optional = true} fastembed = { version = "^0.3.3", python = ">=3.8.1,<3.13", optional = true}
[tool.poetry.extras] [tool.poetry.extras]
fastembed = [ "fastembed"] fastembed = [ "fastembed"]

View File

@ -31,29 +31,7 @@ def assert_documents_equals(actual: List[Document], expected: List[Document]):
assert actual_doc.metadata == expected_doc.metadata assert actual_doc.metadata == expected_doc.metadata
class FakeEmbeddings(Embeddings): class ConsistentFakeEmbeddings(Embeddings):
"""Fake embeddings functionality for testing."""
def embed_documents(self, texts: List[str]) -> List[List[float]]:
"""Return simple embeddings.
Embeddings encode each text as its index."""
return [[float(1.0)] * 9 + [float(i)] for i in range(len(texts))]
async def aembed_documents(self, texts: List[str]) -> List[List[float]]:
return self.embed_documents(texts)
def embed_query(self, text: str) -> List[float]:
"""Return constant query embeddings.
Embeddings are identical to embed_documents(texts)[0].
Distance to each text will be that text's index,
as it was passed to embed_documents."""
return [float(1.0)] * 9 + [float(0.0)]
async def aembed_query(self, text: str) -> List[float]:
return self.embed_query(text)
class ConsistentFakeEmbeddings(FakeEmbeddings):
"""Fake embeddings which remember all the texts seen so far to return consistent """Fake embeddings which remember all the texts seen so far to return consistent
vectors for the same texts.""" vectors for the same texts."""

View File

@ -3,6 +3,8 @@ import pytest
from langchain_qdrant import FastEmbedSparse from langchain_qdrant import FastEmbedSparse
pytest.importorskip("fastembed", reason="'fastembed' package is not installed")
@pytest.mark.parametrize( @pytest.mark.parametrize(
"model_name", ["Qdrant/bm25", "Qdrant/bm42-all-minilm-l6-v2-attentions"] "model_name", ["Qdrant/bm25", "Qdrant/bm42-all-minilm-l6-v2-attentions"]

View File

@ -1,6 +1,13 @@
from langchain_qdrant import __all__ from langchain_qdrant import __all__
EXPECTED_ALL = ["Qdrant", "SparseEmbeddings", "SparseVector", "FastEmbedSparse"] EXPECTED_ALL = [
"Qdrant",
"QdrantVectorStore",
"SparseEmbeddings",
"SparseVector",
"FastEmbedSparse",
"RetrievalMode",
]
def test_all_imports() -> None: def test_all_imports() -> None: