Merge d0b2f702c3 into 0e287763cd

2025-08-14 07:07:34 +00:00 · 2025-07-29 05:29:56 +05:00 · 2025-07-29 05:29:56 +05:00 · 7ef32d6fbe
commit 7ef32d6fbe
parent 0e287763cd d0b2f702c3
8 changed files with 1308 additions and 38 deletions
--- a/libs/partners/qdrant/langchain_qdrant/qdrant.py
+++ b/libs/partners/qdrant/langchain_qdrant/qdrant.py
--- a/libs/partners/qdrant/tests/integration_tests/async_api/test_qdrant_vector_store_add_texts.py
+++ b/libs/partners/qdrant/tests/integration_tests/async_api/test_qdrant_vector_store_add_texts.py
@ -0,0 +1,116 @@
+import uuid
+
+import pytest
+from qdrant_client import AsyncQdrantClient, models
+
+from langchain_qdrant import QdrantVectorStore, RetrievalMode
+from tests.integration_tests.common import (
+    ConsistentFakeEmbeddings,
+    ConsistentFakeSparseEmbeddings,
+)
+from tests.integration_tests.fixtures import qdrant_locations, retrieval_modes
+
+
+@pytest.mark.parametrize("location", qdrant_locations())
+@pytest.mark.parametrize("retrieval_mode", retrieval_modes())
+async def test_async_add_texts_basic(
+    location: str, retrieval_mode: RetrievalMode
+) -> None:
+    """Test async basic add_texts functionality."""
+    collection_name = uuid.uuid4().hex
+
+    vec_store = await QdrantVectorStore.aconstruct_instance(
+        embedding=ConsistentFakeEmbeddings(),
+        retrieval_mode=retrieval_mode,
+        sparse_embedding=ConsistentFakeSparseEmbeddings(),
+        collection_name=collection_name,
+        client_options={"location": location},
+    )
+
+    # Add initial texts
+    texts1 = ["foo", "bar"]
+    ids1 = await vec_store.aadd_texts(texts1)
+    assert len(ids1) == 2
+
+    # Add more texts
+    texts2 = ["baz", "qux"]
+    ids2 = await vec_store.aadd_texts(texts2)
+    assert len(ids2) == 2
+
+    # Verify all texts are in the collection
+    async_client = vec_store.client
+    assert isinstance(async_client, AsyncQdrantClient)
+    count_result = await async_client.count(collection_name)
+    assert 4 == count_result.count
+
+    # Test search functionality
+    results = await vec_store.asimilarity_search("foo", k=1)
+    assert len(results) == 1
+    assert results[0].page_content == "foo"
+
+
+@pytest.mark.parametrize("location", qdrant_locations())
+async def test_async_add_texts_with_filters(location: str) -> None:
+    """Test async add_texts and search with filters."""
+    collection_name = uuid.uuid4().hex
+
+    vec_store = await QdrantVectorStore.aconstruct_instance(
+        embedding=ConsistentFakeEmbeddings(),
+        collection_name=collection_name,
+        client_options={"location": location},
+    )
+
+    texts = ["Red apple", "Blue apple", "Green apple"]
+    metadatas = [
+        {"color": "red", "type": "fruit"},
+        {"color": "blue", "type": "fruit"},
+        {"color": "green", "type": "fruit"},
+    ]
+
+    await vec_store.aadd_texts(texts, metadatas=metadatas)
+
+    # Test search with filter
+    filter_condition = models.Filter(
+        must=[
+            models.FieldCondition(
+                key="metadata.color", match=models.MatchValue(value="red")
+            )
+        ]
+    )
+
+    results = await vec_store.asimilarity_search("apple", k=3, filter=filter_condition)
+
+    assert len(results) == 1
+    assert results[0].page_content == "Red apple"
+    assert results[0].metadata["color"] == "red"
+
+
+@pytest.mark.parametrize("location", qdrant_locations())
+async def test_async_add_texts_with_custom_ids(location: str) -> None:
+    """Test async add_texts with custom IDs."""
+    collection_name = uuid.uuid4().hex
+
+    vec_store = await QdrantVectorStore.aconstruct_instance(
+        embedding=ConsistentFakeEmbeddings(),
+        collection_name=collection_name,
+        client_options={"location": location},
+    )
+
+    texts = ["First document", "Second document"]
+    custom_ids = [
+        "fa38d572-4c31-4579-aedc-1960d79df6df",
+        "cdc1aa36-d6ab-4fb2-8a94-56674fd27484",
+    ]
+
+    returned_ids = await vec_store.aadd_texts(texts, ids=custom_ids)
+
+    # Should return the same IDs we provided
+    assert returned_ids == custom_ids
+
+    # Verify documents can be retrieved by custom IDs
+    docs = await vec_store.aget_by_ids(custom_ids)
+    assert len(docs) == 2
+
+    contents = [doc.page_content for doc in docs]
+    assert "First document" in contents
+    assert "Second document" in contents
--- a/libs/partners/qdrant/tests/integration_tests/async_api/test_qdrant_vector_store_from_texts.py
+++ b/libs/partners/qdrant/tests/integration_tests/async_api/test_qdrant_vector_store_from_texts.py
@ -0,0 +1,124 @@
+import uuid
+
+import pytest
+from langchain_core.documents import Document
+from qdrant_client import AsyncQdrantClient
+
+from langchain_qdrant import QdrantVectorStore, RetrievalMode
+from tests.integration_tests.common import (
+    ConsistentFakeEmbeddings,
+    ConsistentFakeSparseEmbeddings,
+    assert_documents_equals,
+)
+from tests.integration_tests.fixtures import qdrant_locations, retrieval_modes
+
+
+@pytest.mark.parametrize("location", qdrant_locations())
+@pytest.mark.parametrize("retrieval_mode", retrieval_modes())
+async def test_async_vectorstore_from_texts(
+    location: str, retrieval_mode: RetrievalMode
+) -> None:
+    """Test end to end QdrantVectorStore async construction from texts."""
+    collection_name = uuid.uuid4().hex
+
+    vec_store = await QdrantVectorStore.aconstruct_instance(
+        embedding=ConsistentFakeEmbeddings(),
+        retrieval_mode=retrieval_mode,
+        sparse_embedding=ConsistentFakeSparseEmbeddings(),
+        collection_name=collection_name,
+        client_options={"location": location},
+    )
+
+    # Add texts using async method
+    await vec_store.aadd_texts(["Lorem ipsum dolor sit amet", "Ipsum dolor sit amet"])
+
+    # Verify count using AsyncQdrantClient
+    async_client = vec_store.client
+    assert isinstance(async_client, AsyncQdrantClient)
+    count_result = await async_client.count(collection_name)
+    assert 2 == count_result.count
+
+
+@pytest.mark.parametrize("location", qdrant_locations())
+async def test_async_qdrant_similarity_search(location: str) -> None:
+    """Test QdrantVectorStore async similarity search."""
+    collection_name = uuid.uuid4().hex
+
+    vec_store = await QdrantVectorStore.aconstruct_instance(
+        embedding=ConsistentFakeEmbeddings(),
+        collection_name=collection_name,
+        client_options={"location": location},
+    )
+
+    await vec_store.aadd_texts(["foo", "bar", "baz"])
+
+    # Test async similarity search
+    output = await vec_store.asimilarity_search("foo", k=1)
+    assert len(output) == 1
+    # Use assert_documents_equals which doesn't assume ordering
+    assert_documents_equals(actual=output, expected=[Document(page_content="foo")])
+
+
+@pytest.mark.parametrize("location", qdrant_locations())
+async def test_async_qdrant_delete(location: str) -> None:
+    """Test QdrantVectorStore async delete functionality."""
+    collection_name = uuid.uuid4().hex
+    texts = ["foo", "bar", "baz"]
+    ids = [
+        "fa38d572-4c31-4579-aedc-1960d79df6df",
+        "cdc1aa36-d6ab-4fb2-8a94-56674fd27484",
+        "b4c1aa36-d6ab-4fb2-8a94-56674fd27485",
+    ]
+
+    vec_store = await QdrantVectorStore.aconstruct_instance(
+        embedding=ConsistentFakeEmbeddings(),
+        collection_name=collection_name,
+        client_options={"location": location},
+    )
+
+    await vec_store.aadd_texts(texts, ids=ids)
+
+    async_client = vec_store.client
+    assert isinstance(async_client, AsyncQdrantClient)
+
+    # Verify all texts are added
+    count_result = await async_client.count(collection_name)
+    assert 3 == count_result.count
+
+    # Delete one document
+    result = await vec_store.adelete([ids[1]])  # Delete the second document
+    assert result is True
+
+    # Verify deletion
+    count_result = await async_client.count(collection_name)
+    assert 2 == count_result.count
+
+
+@pytest.mark.parametrize("location", qdrant_locations())
+async def test_async_qdrant_add_documents(location: str) -> None:
+    """Test QdrantVectorStore async add documents functionality."""
+    collection_name = uuid.uuid4().hex
+
+    documents = [
+        Document(page_content="foo", metadata={"page": 1}),
+        Document(page_content="bar", metadata={"page": 2}),
+        Document(page_content="baz", metadata={"page": 3}),
+    ]
+
+    vec_store = await QdrantVectorStore.aconstruct_instance(
+        embedding=ConsistentFakeEmbeddings(),
+        collection_name=collection_name,
+        client_options={"location": location},
+    )
+
+    # Test async add documents
+    ids = await vec_store.aadd_documents(documents)
+    assert len(ids) == 3
+    assert all(isinstance(id_, str) for id_ in ids)
+
+    async_client = vec_store.client
+    assert isinstance(async_client, AsyncQdrantClient)
+
+    # Verify documents are added
+    count_result = await async_client.count(collection_name)
+    assert 3 == count_result.count
--- a/libs/partners/qdrant/tests/integration_tests/async_api/test_qdrant_vector_store_mmr.py
+++ b/libs/partners/qdrant/tests/integration_tests/async_api/test_qdrant_vector_store_mmr.py
@ -0,0 +1,114 @@
+import uuid
+
+import pytest
+from langchain_core.documents import Document
+
+from langchain_qdrant import QdrantVectorStore, RetrievalMode
+from tests.integration_tests.common import ConsistentFakeEmbeddings
+from tests.integration_tests.fixtures import qdrant_locations
+
+
+@pytest.mark.parametrize("location", qdrant_locations())
+async def test_async_max_marginal_relevance_search_basic(location: str) -> None:
+    """Test basic async max marginal relevance search functionality."""
+    collection_name = uuid.uuid4().hex
+
+    vec_store = await QdrantVectorStore.aconstruct_instance(
+        embedding=ConsistentFakeEmbeddings(),
+        retrieval_mode=RetrievalMode.DENSE,  # MMR only works with dense
+        collection_name=collection_name,
+        client_options={"location": location},
+    )
+
+    texts = ["apple", "banana", "cherry", "apple pie", "apple juice"]
+    await vec_store.aadd_texts(texts)
+
+    # Test basic MMR search
+    results = await vec_store.amax_marginal_relevance_search("apple", k=3, fetch_k=5)
+
+    assert len(results) <= 3
+    assert all(isinstance(doc, Document) for doc in results)
+
+    # First result should be most similar
+    assert "apple" in results[0].page_content.lower()
+
+
+@pytest.mark.parametrize("location", qdrant_locations())
+async def test_async_max_marginal_relevance_search_by_vector(location: str) -> None:
+    """Test async MMR search by vector."""
+    collection_name = uuid.uuid4().hex
+
+    vec_store = await QdrantVectorStore.aconstruct_instance(
+        embedding=ConsistentFakeEmbeddings(),
+        retrieval_mode=RetrievalMode.DENSE,
+        collection_name=collection_name,
+        client_options={"location": location},
+    )
+
+    texts = ["apple", "banana", "cherry", "apple pie"]
+    await vec_store.aadd_texts(texts)
+
+    # Get embedding for search
+    embedding = ConsistentFakeEmbeddings().embed_query("apple")
+
+    # Test MMR by vector
+    results = await vec_store.amax_marginal_relevance_search_by_vector(
+        embedding, k=2, fetch_k=4
+    )
+
+    assert len(results) <= 2
+    assert all(isinstance(doc, Document) for doc in results)
+
+
+@pytest.mark.parametrize("location", qdrant_locations())
+async def test_async_max_marginal_relevance_search_with_score_by_vector(
+    location: str,
+) -> None:
+    """Test async MMR search with score by vector."""
+    collection_name = uuid.uuid4().hex
+
+    vec_store = await QdrantVectorStore.aconstruct_instance(
+        embedding=ConsistentFakeEmbeddings(),
+        retrieval_mode=RetrievalMode.DENSE,
+        collection_name=collection_name,
+        client_options={"location": location},
+    )
+
+    texts = ["apple", "banana", "cherry", "apple pie", "apple juice"]
+    await vec_store.aadd_texts(texts)
+
+    # Get embedding for search
+    embedding = ConsistentFakeEmbeddings().embed_query("apple")
+
+    # Test MMR with scores by vector
+    results = await vec_store.amax_marginal_relevance_search_with_score_by_vector(
+        embedding, k=3, fetch_k=5
+    )
+
+    assert len(results) <= 3
+    for doc, score in results:
+        assert isinstance(doc, Document)
+        assert isinstance(score, float)
+        assert score >= 0.0
+
+
+@pytest.mark.parametrize("location", qdrant_locations())
+async def test_async_max_marginal_relevance_search_empty_collection(
+    location: str,
+) -> None:
+    """Test async MMR search on empty collection."""
+    collection_name = uuid.uuid4().hex
+
+    vec_store = await QdrantVectorStore.aconstruct_instance(
+        embedding=ConsistentFakeEmbeddings(),
+        retrieval_mode=RetrievalMode.DENSE,
+        collection_name=collection_name,
+        client_options={"location": location},
+    )
+
+    # Search in empty collection
+    results = await vec_store.amax_marginal_relevance_search(
+        "anything", k=5, fetch_k=10
+    )
+
+    assert len(results) == 0
--- a/libs/partners/qdrant/tests/integration_tests/async_api/test_qdrant_vector_store_search.py
+++ b/libs/partners/qdrant/tests/integration_tests/async_api/test_qdrant_vector_store_search.py
@ -0,0 +1,128 @@
+import uuid
+
+import pytest
+from langchain_core.documents import Document
+from qdrant_client import models
+
+from langchain_qdrant import QdrantVectorStore, RetrievalMode
+from tests.integration_tests.common import (
+    ConsistentFakeEmbeddings,
+    ConsistentFakeSparseEmbeddings,
+)
+from tests.integration_tests.fixtures import qdrant_locations, retrieval_modes
+
+
+@pytest.mark.parametrize("location", qdrant_locations())
+@pytest.mark.parametrize("retrieval_mode", retrieval_modes())
+async def test_async_similarity_search_basic(
+    location: str, retrieval_mode: RetrievalMode
+) -> None:
+    """Test basic async similarity search functionality."""
+    collection_name = uuid.uuid4().hex
+
+    vec_store = await QdrantVectorStore.aconstruct_instance(
+        embedding=ConsistentFakeEmbeddings(),
+        retrieval_mode=retrieval_mode,
+        sparse_embedding=ConsistentFakeSparseEmbeddings(),
+        collection_name=collection_name,
+        client_options={"location": location},
+    )
+
+    texts = ["apple", "banana", "cherry", "date"]
+    await vec_store.aadd_texts(texts)
+
+    # Test basic similarity search
+    results = await vec_store.asimilarity_search("apple", k=2)
+
+    assert len(results) <= 2
+    assert all(isinstance(doc, Document) for doc in results)
+    assert results[0].page_content == "apple"  # Should be most similar to itself
+
+
+@pytest.mark.parametrize("location", qdrant_locations())
+@pytest.mark.parametrize("retrieval_mode", retrieval_modes())
+async def test_async_similarity_search_with_score(
+    location: str, retrieval_mode: RetrievalMode
+) -> None:
+    """Test async similarity search with scores."""
+    collection_name = uuid.uuid4().hex
+
+    vec_store = await QdrantVectorStore.aconstruct_instance(
+        embedding=ConsistentFakeEmbeddings(),
+        retrieval_mode=retrieval_mode,
+        sparse_embedding=ConsistentFakeSparseEmbeddings(),
+        collection_name=collection_name,
+        client_options={"location": location},
+    )
+
+    texts = ["red apple", "green apple", "blue car", "yellow banana"]
+    await vec_store.aadd_texts(texts)
+
+    # Test similarity search with scores
+    results = await vec_store.asimilarity_search_with_score("apple", k=3)
+
+    assert len(results) <= 3
+    for doc, score in results:
+        assert isinstance(doc, Document)
+        assert isinstance(score, float)
+        assert score >= 0.0  # Scores should be non-negative
+
+    # First result should be most relevant
+    all_contents = [doc.page_content for doc, _ in results]
+    assert any("apple" in content for content in all_contents)
+
+
+@pytest.mark.parametrize("location", qdrant_locations())
+async def test_async_similarity_search_empty_collection(location: str) -> None:
+    """Test async similarity search on empty collection."""
+    collection_name = uuid.uuid4().hex
+
+    vec_store = await QdrantVectorStore.aconstruct_instance(
+        embedding=ConsistentFakeEmbeddings(),
+        collection_name=collection_name,
+        client_options={"location": location},
+    )
+
+    # Search in empty collection
+    results = await vec_store.asimilarity_search("anything", k=5)
+
+    assert len(results) == 0
+
+
+@pytest.mark.parametrize("location", qdrant_locations())
+async def test_async_similarity_search_with_consistency(location: str) -> None:
+    """Test async similarity search with read consistency parameter."""
+    collection_name = uuid.uuid4().hex
+
+    vec_store = await QdrantVectorStore.aconstruct_instance(
+        embedding=ConsistentFakeEmbeddings(),
+        collection_name=collection_name,
+        client_options={"location": location},
+    )
+
+    texts = ["test document"]
+    await vec_store.aadd_texts(texts)
+
+    # Test with different consistency levels
+
+    # Test with factor consistency (int)
+    results = await vec_store.asimilarity_search("test", k=1, consistency=1)
+    assert len(results) <= 1
+    if results:
+        assert results[0].page_content == "test document"
+
+    # Test with majority consistency
+    results = await vec_store.asimilarity_search(
+        "test", k=1, consistency=models.ReadConsistencyType.MAJORITY
+    )
+    assert len(results) <= 1
+    if results:
+        assert results[0].page_content == "test document"
+
+    # Test with all consistency
+    results = await vec_store.asimilarity_search(
+        "test", k=1, consistency=models.ReadConsistencyType.ALL
+    )
+    assert len(results) <= 1
+    if results:
+        assert results[0].page_content == "test document"
--- a/libs/partners/qdrant/tests/integration_tests/qdrant_vector_store/test_add_texts.py
+++ b/libs/partners/qdrant/tests/integration_tests/qdrant_vector_store/test_add_texts.py
@ -137,7 +137,9 @@ def test_qdrant_add_texts_stores_ids(
        batch_size=batch_size,
    )

-    assert 3 == vec_store.client.count(collection_name).count
-    stored_ids = [point.id for point in vec_store.client.scroll(collection_name)[0]]
+    assert 3 == vec_store.sync_client.count(collection_name).count
+    stored_ids = [
+        point.id for point in vec_store.sync_client.scroll(collection_name)[0]
+    ]
    assert set(ids) == set(stored_ids)
    assert 3 == len(vec_store.get_by_ids(ids))
--- a/libs/partners/qdrant/tests/integration_tests/qdrant_vector_store/test_from_existing.py
+++ b/libs/partners/qdrant/tests/integration_tests/qdrant_vector_store/test_from_existing.py
@ -48,4 +48,4 @@ def test_qdrant_from_existing_collection_uses_same_collection(
    )
    qdrant.add_texts(["baz", "bar"])

-    assert 3 == qdrant.client.count(collection_name).count
+    assert 3 == qdrant.sync_client.count(collection_name).count
--- a/libs/partners/qdrant/tests/integration_tests/qdrant_vector_store/test_from_texts.py
+++ b/libs/partners/qdrant/tests/integration_tests/qdrant_vector_store/test_from_texts.py
@ -30,7 +30,7 @@ def test_vectorstore_from_texts(location: str, retrieval_mode: RetrievalMode) ->
        sparse_embedding=ConsistentFakeSparseEmbeddings(),
    )

-    assert 2 == vec_store.client.count(collection_name).count
+    assert 2 == vec_store.sync_client.count(collection_name).count


@pytest.mark.parametrize("batch_size", [1, 64])
@ -66,8 +66,10 @@ def test_qdrant_from_texts_stores_ids(
        sparse_vector_name=sparse_vector_name,
    )

-    assert 2 == vec_store.client.count(collection_name).count
-    stored_ids = [point.id for point in vec_store.client.retrieve(collection_name, ids)]
+    assert 2 == vec_store.sync_client.count(collection_name).count
+    stored_ids = [
+        point.id for point in vec_store.sync_client.retrieve(collection_name, ids)
+    ]
    assert set(ids) == set(stored_ids)


@ -97,16 +99,20 @@ def test_qdrant_from_texts_stores_embeddings_as_named_vectors(
        sparse_embedding=ConsistentFakeSparseEmbeddings(),
    )

-    assert 5 == vec_store.client.count(collection_name).count
+    assert 5 == vec_store.sync_client.count(collection_name).count
    if retrieval_mode in retrieval_modes(sparse=False):
        assert all(
            (vector_name in point.vector or isinstance(point.vector, list))  # type: ignore
-            for point in vec_store.client.scroll(collection_name, with_vectors=True)[0]
+            for point in vec_store.sync_client.scroll(
+                collection_name, with_vectors=True
+            )[0]
        )
    if retrieval_mode in retrieval_modes(dense=False):
        assert all(
            sparse_vector_name in point.vector  # type: ignore
-            for point in vec_store.client.scroll(collection_name, with_vectors=True)[0]
+            for point in vec_store.sync_client.scroll(
+                collection_name, with_vectors=True
+            )[0]
        )


@ -149,7 +155,7 @@ def test_qdrant_from_texts_reuses_same_collection(
        sparse_embedding=sparse_embeddings,
    )

-    assert 7 == vec_store.client.count(collection_name).count
+    assert 7 == vec_store.sync_client.count(collection_name).count


@pytest.mark.parametrize("location", qdrant_locations(use_in_memory=False))
@ -302,7 +308,7 @@ def test_qdrant_from_texts_recreates_collection_on_force_recreate(
        force_recreate=True,
    )

-    assert 2 == vec_store.client.count(collection_name).count
+    assert 2 == vec_store.sync_client.count(collection_name).count


@pytest.mark.parametrize("location", qdrant_locations())
@ -379,7 +385,7 @@ def test_from_texts_passed_optimizers_config_and_on_disk_payload(
        sparse_embedding=ConsistentFakeSparseEmbeddings(),
    )

-    collection_info = vec_store.client.get_collection(collection_name)
+    collection_info = vec_store.sync_client.get_collection(collection_name)
    assert collection_info.config.params.vectors[vector_name].on_disk is True  # type: ignore
    assert collection_info.config.optimizer_config.memmap_threshold == 1000
    assert collection_info.config.params.on_disk_payload is True