diff --git a/libs/partners/qdrant/langchain_qdrant/qdrant.py b/libs/partners/qdrant/langchain_qdrant/qdrant.py index b21dae2efe2..964bf209f92 100644 --- a/libs/partners/qdrant/langchain_qdrant/qdrant.py +++ b/libs/partners/qdrant/langchain_qdrant/qdrant.py @@ -568,7 +568,7 @@ class QdrantVectorStore(VectorStore): for result in results ] - def similarity_search_by_vector( + def similarity_search_with_score_by_vector( self, embedding: List[float], k: int = 4, @@ -578,11 +578,11 @@ class QdrantVectorStore(VectorStore): score_threshold: Optional[float] = None, consistency: Optional[models.ReadConsistency] = None, **kwargs: Any, - ) -> List[Document]: + ) -> List[tuple[Document, float]]: """Return docs most similar to embedding vector. Returns: - List of Documents most similar to the query. + List of Documents most similar to the query and distance for each. """ qdrant_filter = filter @@ -609,15 +609,46 @@ class QdrantVectorStore(VectorStore): ).points return [ - self._document_from_point( - result, - self.collection_name, - self.content_payload_key, - self.metadata_payload_key, + ( + self._document_from_point( + result, + self.collection_name, + self.content_payload_key, + self.metadata_payload_key, + ), + result.score, ) for result in results ] + def similarity_search_by_vector( + self, + embedding: List[float], + k: int = 4, + filter: Optional[models.Filter] = None, + search_params: Optional[models.SearchParams] = None, + offset: int = 0, + score_threshold: Optional[float] = None, + consistency: Optional[models.ReadConsistency] = None, + **kwargs: Any, + ) -> List[Document]: + """Return docs most similar to embedding vector. + + Returns: + List of Documents most similar to the query. + """ + results = self.similarity_search_with_score_by_vector( + embedding, + k, + filter=filter, + search_params=search_params, + offset=offset, + score_threshold=score_threshold, + consistency=consistency, + **kwargs, + ) + return list(map(itemgetter(0), results)) + def max_marginal_relevance_search( self, query: str, diff --git a/libs/partners/qdrant/tests/integration_tests/qdrant_vector_store/test_search.py b/libs/partners/qdrant/tests/integration_tests/qdrant_vector_store/test_search.py index 9525c5c17d1..162c819afdd 100644 --- a/libs/partners/qdrant/tests/integration_tests/qdrant_vector_store/test_search.py +++ b/libs/partners/qdrant/tests/integration_tests/qdrant_vector_store/test_search.py @@ -66,6 +66,39 @@ def test_similarity_search_by_vector( assert_documents_equals(output, [Document(page_content="foo")]) +@pytest.mark.parametrize("location", qdrant_locations()) +@pytest.mark.parametrize("content_payload_key", [QdrantVectorStore.CONTENT_KEY, "foo"]) +@pytest.mark.parametrize( + "metadata_payload_key", [QdrantVectorStore.METADATA_KEY, "bar"] +) +@pytest.mark.parametrize("vector_name", ["", "my-vector"]) +@pytest.mark.parametrize("batch_size", [1, 64]) +def test_similarity_search_with_score_by_vector( + location: str, + content_payload_key: str, + metadata_payload_key: str, + vector_name: str, + batch_size: int, +) -> None: + """Test end to end construction and search.""" + texts = ["foo", "bar", "baz"] + docsearch = QdrantVectorStore.from_texts( + texts, + ConsistentFakeEmbeddings(), + location=location, + content_payload_key=content_payload_key, + metadata_payload_key=metadata_payload_key, + batch_size=batch_size, + vector_name=vector_name, + ) + embeddings = ConsistentFakeEmbeddings().embed_query("foo") + output = docsearch.similarity_search_with_score_by_vector(embeddings, k=1) + assert len(output) == 1 + document, score = output[0] + assert_documents_equals([document], [Document(page_content="foo")]) + assert score >= 0 + + @pytest.mark.parametrize("location", qdrant_locations()) @pytest.mark.parametrize( "metadata_payload_key", [QdrantVectorStore.METADATA_KEY, "bar"]