Update qdrant interface (#3971)

Hello 1) Passing `embedding_function` as a callable seems to be outdated and the common interface is to pass `Embeddings` instance 2) At the moment `Qdrant.add_texts` is designed to be used with `embeddings.embed_query`, which is 1) slow 2) causes ambiguity due to 1. It should be used with `embeddings.embed_documents` This PR solves both problems and also provides some new tests
2025-09-12 00:11:17 +00:00 · 2023-05-06 03:46:40 +04:00
parent 76ed41f48a
commit 2324f19c85
2 changed files with 157 additions and 8 deletions
--- a/tests/integration_tests/vectorstores/test_qdrant.py
+++ b/tests/integration_tests/vectorstores/test_qdrant.py
@@ -1,7 +1,10 @@
 """Test Qdrant functionality."""
+from typing import Callable, Optional
+
 import pytest

 from langchain.docstore.document import Document
+from langchain.embeddings.base import Embeddings
 from langchain.vectorstores import Qdrant
 from tests.integration_tests.vectorstores.fake_embeddings import FakeEmbeddings

@@ -29,6 +32,22 @@ def test_qdrant(content_payload_key: str, metadata_payload_key: str) -> None:
    assert output == [Document(page_content="foo")]


+def test_qdrant_add_documents() -> None:
+    """Test end to end construction and search."""
+    texts = ["foo", "bar", "baz"]
+    docsearch: Qdrant = Qdrant.from_texts(texts, FakeEmbeddings(), location=":memory:")
+
+    new_texts = ["foobar", "foobaz"]
+    docsearch.add_documents([Document(page_content=content) for content in new_texts])
+    output = docsearch.similarity_search("foobar", k=1)
+    # FakeEmbeddings return the same query embedding as the first document embedding
+    # computed in `embedding.embed_documents`. Since embed_documents is called twice,
+    # "foo" embedding is the same as "foobar" embedding
+    assert output == [Document(page_content="foobar")] or output == [
+        Document(page_content="foo")
+    ]
+
+
@pytest.mark.parametrize(
    ["content_payload_key", "metadata_payload_key"],
    [
@@ -98,3 +117,51 @@ def test_qdrant_max_marginal_relevance_search(
        Document(page_content="foo", metadata={"page": 0}),
        Document(page_content="bar", metadata={"page": 1}),
    ]
+
+
+@pytest.mark.parametrize(
+    ["embeddings", "embedding_function"],
+    [
+        (FakeEmbeddings(), None),
+        (FakeEmbeddings().embed_query, None),
+        (None, FakeEmbeddings().embed_query),
+    ],
+)
+def test_qdrant_embedding_interface(
+    embeddings: Optional[Embeddings], embedding_function: Optional[Callable]
+) -> None:
+    from qdrant_client import QdrantClient
+
+    client = QdrantClient(":memory:")
+    collection_name = "test"
+
+    Qdrant(
+        client,
+        collection_name,
+        embeddings=embeddings,
+        embedding_function=embedding_function,
+    )
+
+
+@pytest.mark.parametrize(
+    ["embeddings", "embedding_function"],
+    [
+        (FakeEmbeddings(), FakeEmbeddings().embed_query),
+        (None, None),
+    ],
+)
+def test_qdrant_embedding_interface_raises(
+    embeddings: Optional[Embeddings], embedding_function: Optional[Callable]
+) -> None:
+    from qdrant_client import QdrantClient
+
+    client = QdrantClient(":memory:")
+    collection_name = "test"
+
+    with pytest.raises(ValueError):
+        Qdrant(
+            client,
+            collection_name,
+            embeddings=embeddings,
+            embedding_function=embedding_function,
+        )