qdrant: Add similarity_search_with_score_by_vector() function to the QdrantVectorStore (#29641)

Added `similarity_search_with_score_by_vector()` function to the `QdrantVectorStore` class. It is required when we want to query multiple time with the same embeddings. It was present in the now deprecated original `Qdrant` vectorstore implementation, but was absent from the new one. It is also implemented in a number of others `VectorStore` implementations I have added tests for this new function Note that I also argued in this discussion that it should be part of the general `VectorStore` https://github.com/langchain-ai/langchain/discussions/29638 Co-authored-by: Erick Friis <erick@langchain.dev>
2025-09-09 06:53:59 +00:00 · 2025-02-07 01:55:58 +01:00
parent 488cb4a739
commit 3645181d0e
2 changed files with 72 additions and 8 deletions
--- a/libs/partners/qdrant/langchain_qdrant/qdrant.py
+++ b/libs/partners/qdrant/langchain_qdrant/qdrant.py
@@ -568,7 +568,7 @@ class QdrantVectorStore(VectorStore):
            for result in results
        ]

-    def similarity_search_by_vector(
+    def similarity_search_with_score_by_vector(
        self,
        embedding: List[float],
        k: int = 4,
@@ -578,11 +578,11 @@ class QdrantVectorStore(VectorStore):
        score_threshold: Optional[float] = None,
        consistency: Optional[models.ReadConsistency] = None,
        **kwargs: Any,
-    ) -> List[Document]:
+    ) -> List[tuple[Document, float]]:
        """Return docs most similar to embedding vector.

        Returns:
-            List of Documents most similar to the query.
+            List of Documents most similar to the query and distance for each.
        """
        qdrant_filter = filter

@@ -609,15 +609,46 @@ class QdrantVectorStore(VectorStore):
        ).points

        return [
-            self._document_from_point(
-                result,
-                self.collection_name,
-                self.content_payload_key,
-                self.metadata_payload_key,
+            (
+                self._document_from_point(
+                    result,
+                    self.collection_name,
+                    self.content_payload_key,
+                    self.metadata_payload_key,
+                ),
+                result.score,
            )
            for result in results
        ]

+    def similarity_search_by_vector(
+        self,
+        embedding: List[float],
+        k: int = 4,
+        filter: Optional[models.Filter] = None,
+        search_params: Optional[models.SearchParams] = None,
+        offset: int = 0,
+        score_threshold: Optional[float] = None,
+        consistency: Optional[models.ReadConsistency] = None,
+        **kwargs: Any,
+    ) -> List[Document]:
+        """Return docs most similar to embedding vector.
+
+        Returns:
+            List of Documents most similar to the query.
+        """
+        results = self.similarity_search_with_score_by_vector(
+            embedding,
+            k,
+            filter=filter,
+            search_params=search_params,
+            offset=offset,
+            score_threshold=score_threshold,
+            consistency=consistency,
+            **kwargs,
+        )
+        return list(map(itemgetter(0), results))
+
    def max_marginal_relevance_search(
        self,
        query: str,