qdrant: Add similarity_search_with_score_by_vector() function to the QdrantVectorStore (#29641)

Added `similarity_search_with_score_by_vector()` function to the
`QdrantVectorStore` class.

It is required when we want to query multiple time with the same
embeddings. It was present in the now deprecated original `Qdrant`
vectorstore implementation, but was absent from the new one. It is also
implemented in a number of others `VectorStore` implementations

I have added tests for this new function

Note that I also argued in this discussion that it should be part of the
general `VectorStore`
https://github.com/langchain-ai/langchain/discussions/29638

Co-authored-by: Erick Friis <erick@langchain.dev>
This commit is contained in:
Vincent Emonet
2025-02-07 01:55:58 +01:00
committed by GitHub
parent 488cb4a739
commit 3645181d0e
2 changed files with 72 additions and 8 deletions

View File

@@ -568,7 +568,7 @@ class QdrantVectorStore(VectorStore):
for result in results
]
def similarity_search_by_vector(
def similarity_search_with_score_by_vector(
self,
embedding: List[float],
k: int = 4,
@@ -578,11 +578,11 @@ class QdrantVectorStore(VectorStore):
score_threshold: Optional[float] = None,
consistency: Optional[models.ReadConsistency] = None,
**kwargs: Any,
) -> List[Document]:
) -> List[tuple[Document, float]]:
"""Return docs most similar to embedding vector.
Returns:
List of Documents most similar to the query.
List of Documents most similar to the query and distance for each.
"""
qdrant_filter = filter
@@ -609,15 +609,46 @@ class QdrantVectorStore(VectorStore):
).points
return [
self._document_from_point(
result,
self.collection_name,
self.content_payload_key,
self.metadata_payload_key,
(
self._document_from_point(
result,
self.collection_name,
self.content_payload_key,
self.metadata_payload_key,
),
result.score,
)
for result in results
]
def similarity_search_by_vector(
self,
embedding: List[float],
k: int = 4,
filter: Optional[models.Filter] = None,
search_params: Optional[models.SearchParams] = None,
offset: int = 0,
score_threshold: Optional[float] = None,
consistency: Optional[models.ReadConsistency] = None,
**kwargs: Any,
) -> List[Document]:
"""Return docs most similar to embedding vector.
Returns:
List of Documents most similar to the query.
"""
results = self.similarity_search_with_score_by_vector(
embedding,
k,
filter=filter,
search_params=search_params,
offset=offset,
score_threshold=score_threshold,
consistency=consistency,
**kwargs,
)
return list(map(itemgetter(0), results))
def max_marginal_relevance_search(
self,
query: str,