This commit is contained in:
Mohammad Mohtashim 2025-07-29 05:29:56 +05:00 committed by GitHub
commit 7ef32d6fbe
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 1308 additions and 38 deletions

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,116 @@
import uuid
import pytest
from qdrant_client import AsyncQdrantClient, models
from langchain_qdrant import QdrantVectorStore, RetrievalMode
from tests.integration_tests.common import (
ConsistentFakeEmbeddings,
ConsistentFakeSparseEmbeddings,
)
from tests.integration_tests.fixtures import qdrant_locations, retrieval_modes
@pytest.mark.parametrize("location", qdrant_locations())
@pytest.mark.parametrize("retrieval_mode", retrieval_modes())
async def test_async_add_texts_basic(
location: str, retrieval_mode: RetrievalMode
) -> None:
"""Test async basic add_texts functionality."""
collection_name = uuid.uuid4().hex
vec_store = await QdrantVectorStore.aconstruct_instance(
embedding=ConsistentFakeEmbeddings(),
retrieval_mode=retrieval_mode,
sparse_embedding=ConsistentFakeSparseEmbeddings(),
collection_name=collection_name,
client_options={"location": location},
)
# Add initial texts
texts1 = ["foo", "bar"]
ids1 = await vec_store.aadd_texts(texts1)
assert len(ids1) == 2
# Add more texts
texts2 = ["baz", "qux"]
ids2 = await vec_store.aadd_texts(texts2)
assert len(ids2) == 2
# Verify all texts are in the collection
async_client = vec_store.client
assert isinstance(async_client, AsyncQdrantClient)
count_result = await async_client.count(collection_name)
assert 4 == count_result.count
# Test search functionality
results = await vec_store.asimilarity_search("foo", k=1)
assert len(results) == 1
assert results[0].page_content == "foo"
@pytest.mark.parametrize("location", qdrant_locations())
async def test_async_add_texts_with_filters(location: str) -> None:
"""Test async add_texts and search with filters."""
collection_name = uuid.uuid4().hex
vec_store = await QdrantVectorStore.aconstruct_instance(
embedding=ConsistentFakeEmbeddings(),
collection_name=collection_name,
client_options={"location": location},
)
texts = ["Red apple", "Blue apple", "Green apple"]
metadatas = [
{"color": "red", "type": "fruit"},
{"color": "blue", "type": "fruit"},
{"color": "green", "type": "fruit"},
]
await vec_store.aadd_texts(texts, metadatas=metadatas)
# Test search with filter
filter_condition = models.Filter(
must=[
models.FieldCondition(
key="metadata.color", match=models.MatchValue(value="red")
)
]
)
results = await vec_store.asimilarity_search("apple", k=3, filter=filter_condition)
assert len(results) == 1
assert results[0].page_content == "Red apple"
assert results[0].metadata["color"] == "red"
@pytest.mark.parametrize("location", qdrant_locations())
async def test_async_add_texts_with_custom_ids(location: str) -> None:
"""Test async add_texts with custom IDs."""
collection_name = uuid.uuid4().hex
vec_store = await QdrantVectorStore.aconstruct_instance(
embedding=ConsistentFakeEmbeddings(),
collection_name=collection_name,
client_options={"location": location},
)
texts = ["First document", "Second document"]
custom_ids = [
"fa38d572-4c31-4579-aedc-1960d79df6df",
"cdc1aa36-d6ab-4fb2-8a94-56674fd27484",
]
returned_ids = await vec_store.aadd_texts(texts, ids=custom_ids)
# Should return the same IDs we provided
assert returned_ids == custom_ids
# Verify documents can be retrieved by custom IDs
docs = await vec_store.aget_by_ids(custom_ids)
assert len(docs) == 2
contents = [doc.page_content for doc in docs]
assert "First document" in contents
assert "Second document" in contents

View File

@ -0,0 +1,124 @@
import uuid
import pytest
from langchain_core.documents import Document
from qdrant_client import AsyncQdrantClient
from langchain_qdrant import QdrantVectorStore, RetrievalMode
from tests.integration_tests.common import (
ConsistentFakeEmbeddings,
ConsistentFakeSparseEmbeddings,
assert_documents_equals,
)
from tests.integration_tests.fixtures import qdrant_locations, retrieval_modes
@pytest.mark.parametrize("location", qdrant_locations())
@pytest.mark.parametrize("retrieval_mode", retrieval_modes())
async def test_async_vectorstore_from_texts(
location: str, retrieval_mode: RetrievalMode
) -> None:
"""Test end to end QdrantVectorStore async construction from texts."""
collection_name = uuid.uuid4().hex
vec_store = await QdrantVectorStore.aconstruct_instance(
embedding=ConsistentFakeEmbeddings(),
retrieval_mode=retrieval_mode,
sparse_embedding=ConsistentFakeSparseEmbeddings(),
collection_name=collection_name,
client_options={"location": location},
)
# Add texts using async method
await vec_store.aadd_texts(["Lorem ipsum dolor sit amet", "Ipsum dolor sit amet"])
# Verify count using AsyncQdrantClient
async_client = vec_store.client
assert isinstance(async_client, AsyncQdrantClient)
count_result = await async_client.count(collection_name)
assert 2 == count_result.count
@pytest.mark.parametrize("location", qdrant_locations())
async def test_async_qdrant_similarity_search(location: str) -> None:
"""Test QdrantVectorStore async similarity search."""
collection_name = uuid.uuid4().hex
vec_store = await QdrantVectorStore.aconstruct_instance(
embedding=ConsistentFakeEmbeddings(),
collection_name=collection_name,
client_options={"location": location},
)
await vec_store.aadd_texts(["foo", "bar", "baz"])
# Test async similarity search
output = await vec_store.asimilarity_search("foo", k=1)
assert len(output) == 1
# Use assert_documents_equals which doesn't assume ordering
assert_documents_equals(actual=output, expected=[Document(page_content="foo")])
@pytest.mark.parametrize("location", qdrant_locations())
async def test_async_qdrant_delete(location: str) -> None:
"""Test QdrantVectorStore async delete functionality."""
collection_name = uuid.uuid4().hex
texts = ["foo", "bar", "baz"]
ids = [
"fa38d572-4c31-4579-aedc-1960d79df6df",
"cdc1aa36-d6ab-4fb2-8a94-56674fd27484",
"b4c1aa36-d6ab-4fb2-8a94-56674fd27485",
]
vec_store = await QdrantVectorStore.aconstruct_instance(
embedding=ConsistentFakeEmbeddings(),
collection_name=collection_name,
client_options={"location": location},
)
await vec_store.aadd_texts(texts, ids=ids)
async_client = vec_store.client
assert isinstance(async_client, AsyncQdrantClient)
# Verify all texts are added
count_result = await async_client.count(collection_name)
assert 3 == count_result.count
# Delete one document
result = await vec_store.adelete([ids[1]]) # Delete the second document
assert result is True
# Verify deletion
count_result = await async_client.count(collection_name)
assert 2 == count_result.count
@pytest.mark.parametrize("location", qdrant_locations())
async def test_async_qdrant_add_documents(location: str) -> None:
"""Test QdrantVectorStore async add documents functionality."""
collection_name = uuid.uuid4().hex
documents = [
Document(page_content="foo", metadata={"page": 1}),
Document(page_content="bar", metadata={"page": 2}),
Document(page_content="baz", metadata={"page": 3}),
]
vec_store = await QdrantVectorStore.aconstruct_instance(
embedding=ConsistentFakeEmbeddings(),
collection_name=collection_name,
client_options={"location": location},
)
# Test async add documents
ids = await vec_store.aadd_documents(documents)
assert len(ids) == 3
assert all(isinstance(id_, str) for id_ in ids)
async_client = vec_store.client
assert isinstance(async_client, AsyncQdrantClient)
# Verify documents are added
count_result = await async_client.count(collection_name)
assert 3 == count_result.count

View File

@ -0,0 +1,114 @@
import uuid
import pytest
from langchain_core.documents import Document
from langchain_qdrant import QdrantVectorStore, RetrievalMode
from tests.integration_tests.common import ConsistentFakeEmbeddings
from tests.integration_tests.fixtures import qdrant_locations
@pytest.mark.parametrize("location", qdrant_locations())
async def test_async_max_marginal_relevance_search_basic(location: str) -> None:
"""Test basic async max marginal relevance search functionality."""
collection_name = uuid.uuid4().hex
vec_store = await QdrantVectorStore.aconstruct_instance(
embedding=ConsistentFakeEmbeddings(),
retrieval_mode=RetrievalMode.DENSE, # MMR only works with dense
collection_name=collection_name,
client_options={"location": location},
)
texts = ["apple", "banana", "cherry", "apple pie", "apple juice"]
await vec_store.aadd_texts(texts)
# Test basic MMR search
results = await vec_store.amax_marginal_relevance_search("apple", k=3, fetch_k=5)
assert len(results) <= 3
assert all(isinstance(doc, Document) for doc in results)
# First result should be most similar
assert "apple" in results[0].page_content.lower()
@pytest.mark.parametrize("location", qdrant_locations())
async def test_async_max_marginal_relevance_search_by_vector(location: str) -> None:
"""Test async MMR search by vector."""
collection_name = uuid.uuid4().hex
vec_store = await QdrantVectorStore.aconstruct_instance(
embedding=ConsistentFakeEmbeddings(),
retrieval_mode=RetrievalMode.DENSE,
collection_name=collection_name,
client_options={"location": location},
)
texts = ["apple", "banana", "cherry", "apple pie"]
await vec_store.aadd_texts(texts)
# Get embedding for search
embedding = ConsistentFakeEmbeddings().embed_query("apple")
# Test MMR by vector
results = await vec_store.amax_marginal_relevance_search_by_vector(
embedding, k=2, fetch_k=4
)
assert len(results) <= 2
assert all(isinstance(doc, Document) for doc in results)
@pytest.mark.parametrize("location", qdrant_locations())
async def test_async_max_marginal_relevance_search_with_score_by_vector(
location: str,
) -> None:
"""Test async MMR search with score by vector."""
collection_name = uuid.uuid4().hex
vec_store = await QdrantVectorStore.aconstruct_instance(
embedding=ConsistentFakeEmbeddings(),
retrieval_mode=RetrievalMode.DENSE,
collection_name=collection_name,
client_options={"location": location},
)
texts = ["apple", "banana", "cherry", "apple pie", "apple juice"]
await vec_store.aadd_texts(texts)
# Get embedding for search
embedding = ConsistentFakeEmbeddings().embed_query("apple")
# Test MMR with scores by vector
results = await vec_store.amax_marginal_relevance_search_with_score_by_vector(
embedding, k=3, fetch_k=5
)
assert len(results) <= 3
for doc, score in results:
assert isinstance(doc, Document)
assert isinstance(score, float)
assert score >= 0.0
@pytest.mark.parametrize("location", qdrant_locations())
async def test_async_max_marginal_relevance_search_empty_collection(
location: str,
) -> None:
"""Test async MMR search on empty collection."""
collection_name = uuid.uuid4().hex
vec_store = await QdrantVectorStore.aconstruct_instance(
embedding=ConsistentFakeEmbeddings(),
retrieval_mode=RetrievalMode.DENSE,
collection_name=collection_name,
client_options={"location": location},
)
# Search in empty collection
results = await vec_store.amax_marginal_relevance_search(
"anything", k=5, fetch_k=10
)
assert len(results) == 0

View File

@ -0,0 +1,128 @@
import uuid
import pytest
from langchain_core.documents import Document
from qdrant_client import models
from langchain_qdrant import QdrantVectorStore, RetrievalMode
from tests.integration_tests.common import (
ConsistentFakeEmbeddings,
ConsistentFakeSparseEmbeddings,
)
from tests.integration_tests.fixtures import qdrant_locations, retrieval_modes
@pytest.mark.parametrize("location", qdrant_locations())
@pytest.mark.parametrize("retrieval_mode", retrieval_modes())
async def test_async_similarity_search_basic(
location: str, retrieval_mode: RetrievalMode
) -> None:
"""Test basic async similarity search functionality."""
collection_name = uuid.uuid4().hex
vec_store = await QdrantVectorStore.aconstruct_instance(
embedding=ConsistentFakeEmbeddings(),
retrieval_mode=retrieval_mode,
sparse_embedding=ConsistentFakeSparseEmbeddings(),
collection_name=collection_name,
client_options={"location": location},
)
texts = ["apple", "banana", "cherry", "date"]
await vec_store.aadd_texts(texts)
# Test basic similarity search
results = await vec_store.asimilarity_search("apple", k=2)
assert len(results) <= 2
assert all(isinstance(doc, Document) for doc in results)
assert results[0].page_content == "apple" # Should be most similar to itself
@pytest.mark.parametrize("location", qdrant_locations())
@pytest.mark.parametrize("retrieval_mode", retrieval_modes())
async def test_async_similarity_search_with_score(
location: str, retrieval_mode: RetrievalMode
) -> None:
"""Test async similarity search with scores."""
collection_name = uuid.uuid4().hex
vec_store = await QdrantVectorStore.aconstruct_instance(
embedding=ConsistentFakeEmbeddings(),
retrieval_mode=retrieval_mode,
sparse_embedding=ConsistentFakeSparseEmbeddings(),
collection_name=collection_name,
client_options={"location": location},
)
texts = ["red apple", "green apple", "blue car", "yellow banana"]
await vec_store.aadd_texts(texts)
# Test similarity search with scores
results = await vec_store.asimilarity_search_with_score("apple", k=3)
assert len(results) <= 3
for doc, score in results:
assert isinstance(doc, Document)
assert isinstance(score, float)
assert score >= 0.0 # Scores should be non-negative
# First result should be most relevant
all_contents = [doc.page_content for doc, _ in results]
assert any("apple" in content for content in all_contents)
@pytest.mark.parametrize("location", qdrant_locations())
async def test_async_similarity_search_empty_collection(location: str) -> None:
"""Test async similarity search on empty collection."""
collection_name = uuid.uuid4().hex
vec_store = await QdrantVectorStore.aconstruct_instance(
embedding=ConsistentFakeEmbeddings(),
collection_name=collection_name,
client_options={"location": location},
)
# Search in empty collection
results = await vec_store.asimilarity_search("anything", k=5)
assert len(results) == 0
@pytest.mark.parametrize("location", qdrant_locations())
async def test_async_similarity_search_with_consistency(location: str) -> None:
"""Test async similarity search with read consistency parameter."""
collection_name = uuid.uuid4().hex
vec_store = await QdrantVectorStore.aconstruct_instance(
embedding=ConsistentFakeEmbeddings(),
collection_name=collection_name,
client_options={"location": location},
)
texts = ["test document"]
await vec_store.aadd_texts(texts)
# Test with different consistency levels
# Test with factor consistency (int)
results = await vec_store.asimilarity_search("test", k=1, consistency=1)
assert len(results) <= 1
if results:
assert results[0].page_content == "test document"
# Test with majority consistency
results = await vec_store.asimilarity_search(
"test", k=1, consistency=models.ReadConsistencyType.MAJORITY
)
assert len(results) <= 1
if results:
assert results[0].page_content == "test document"
# Test with all consistency
results = await vec_store.asimilarity_search(
"test", k=1, consistency=models.ReadConsistencyType.ALL
)
assert len(results) <= 1
if results:
assert results[0].page_content == "test document"

View File

@ -137,7 +137,9 @@ def test_qdrant_add_texts_stores_ids(
batch_size=batch_size,
)
assert 3 == vec_store.client.count(collection_name).count
stored_ids = [point.id for point in vec_store.client.scroll(collection_name)[0]]
assert 3 == vec_store.sync_client.count(collection_name).count
stored_ids = [
point.id for point in vec_store.sync_client.scroll(collection_name)[0]
]
assert set(ids) == set(stored_ids)
assert 3 == len(vec_store.get_by_ids(ids))

View File

@ -48,4 +48,4 @@ def test_qdrant_from_existing_collection_uses_same_collection(
)
qdrant.add_texts(["baz", "bar"])
assert 3 == qdrant.client.count(collection_name).count
assert 3 == qdrant.sync_client.count(collection_name).count

View File

@ -30,7 +30,7 @@ def test_vectorstore_from_texts(location: str, retrieval_mode: RetrievalMode) ->
sparse_embedding=ConsistentFakeSparseEmbeddings(),
)
assert 2 == vec_store.client.count(collection_name).count
assert 2 == vec_store.sync_client.count(collection_name).count
@pytest.mark.parametrize("batch_size", [1, 64])
@ -66,8 +66,10 @@ def test_qdrant_from_texts_stores_ids(
sparse_vector_name=sparse_vector_name,
)
assert 2 == vec_store.client.count(collection_name).count
stored_ids = [point.id for point in vec_store.client.retrieve(collection_name, ids)]
assert 2 == vec_store.sync_client.count(collection_name).count
stored_ids = [
point.id for point in vec_store.sync_client.retrieve(collection_name, ids)
]
assert set(ids) == set(stored_ids)
@ -97,16 +99,20 @@ def test_qdrant_from_texts_stores_embeddings_as_named_vectors(
sparse_embedding=ConsistentFakeSparseEmbeddings(),
)
assert 5 == vec_store.client.count(collection_name).count
assert 5 == vec_store.sync_client.count(collection_name).count
if retrieval_mode in retrieval_modes(sparse=False):
assert all(
(vector_name in point.vector or isinstance(point.vector, list)) # type: ignore
for point in vec_store.client.scroll(collection_name, with_vectors=True)[0]
for point in vec_store.sync_client.scroll(
collection_name, with_vectors=True
)[0]
)
if retrieval_mode in retrieval_modes(dense=False):
assert all(
sparse_vector_name in point.vector # type: ignore
for point in vec_store.client.scroll(collection_name, with_vectors=True)[0]
for point in vec_store.sync_client.scroll(
collection_name, with_vectors=True
)[0]
)
@ -149,7 +155,7 @@ def test_qdrant_from_texts_reuses_same_collection(
sparse_embedding=sparse_embeddings,
)
assert 7 == vec_store.client.count(collection_name).count
assert 7 == vec_store.sync_client.count(collection_name).count
@pytest.mark.parametrize("location", qdrant_locations(use_in_memory=False))
@ -302,7 +308,7 @@ def test_qdrant_from_texts_recreates_collection_on_force_recreate(
force_recreate=True,
)
assert 2 == vec_store.client.count(collection_name).count
assert 2 == vec_store.sync_client.count(collection_name).count
@pytest.mark.parametrize("location", qdrant_locations())
@ -379,7 +385,7 @@ def test_from_texts_passed_optimizers_config_and_on_disk_payload(
sparse_embedding=ConsistentFakeSparseEmbeddings(),
)
collection_info = vec_store.client.get_collection(collection_name)
collection_info = vec_store.sync_client.get_collection(collection_name)
assert collection_info.config.params.vectors[vector_name].on_disk is True # type: ignore
assert collection_info.config.optimizer_config.memmap_threshold == 1000
assert collection_info.config.params.on_disk_payload is True