mirror of
https://github.com/hwchase17/langchain.git
synced 2025-06-21 14:18:52 +00:00
qdrant: Add similarity_search_with_score_by_vector()
function to the QdrantVectorStore
(#29641)
Added `similarity_search_with_score_by_vector()` function to the `QdrantVectorStore` class. It is required when we want to query multiple time with the same embeddings. It was present in the now deprecated original `Qdrant` vectorstore implementation, but was absent from the new one. It is also implemented in a number of others `VectorStore` implementations I have added tests for this new function Note that I also argued in this discussion that it should be part of the general `VectorStore` https://github.com/langchain-ai/langchain/discussions/29638 Co-authored-by: Erick Friis <erick@langchain.dev>
This commit is contained in:
parent
488cb4a739
commit
3645181d0e
@ -568,7 +568,7 @@ class QdrantVectorStore(VectorStore):
|
|||||||
for result in results
|
for result in results
|
||||||
]
|
]
|
||||||
|
|
||||||
def similarity_search_by_vector(
|
def similarity_search_with_score_by_vector(
|
||||||
self,
|
self,
|
||||||
embedding: List[float],
|
embedding: List[float],
|
||||||
k: int = 4,
|
k: int = 4,
|
||||||
@ -578,11 +578,11 @@ class QdrantVectorStore(VectorStore):
|
|||||||
score_threshold: Optional[float] = None,
|
score_threshold: Optional[float] = None,
|
||||||
consistency: Optional[models.ReadConsistency] = None,
|
consistency: Optional[models.ReadConsistency] = None,
|
||||||
**kwargs: Any,
|
**kwargs: Any,
|
||||||
) -> List[Document]:
|
) -> List[tuple[Document, float]]:
|
||||||
"""Return docs most similar to embedding vector.
|
"""Return docs most similar to embedding vector.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
List of Documents most similar to the query.
|
List of Documents most similar to the query and distance for each.
|
||||||
"""
|
"""
|
||||||
qdrant_filter = filter
|
qdrant_filter = filter
|
||||||
|
|
||||||
@ -609,15 +609,46 @@ class QdrantVectorStore(VectorStore):
|
|||||||
).points
|
).points
|
||||||
|
|
||||||
return [
|
return [
|
||||||
self._document_from_point(
|
(
|
||||||
result,
|
self._document_from_point(
|
||||||
self.collection_name,
|
result,
|
||||||
self.content_payload_key,
|
self.collection_name,
|
||||||
self.metadata_payload_key,
|
self.content_payload_key,
|
||||||
|
self.metadata_payload_key,
|
||||||
|
),
|
||||||
|
result.score,
|
||||||
)
|
)
|
||||||
for result in results
|
for result in results
|
||||||
]
|
]
|
||||||
|
|
||||||
|
def similarity_search_by_vector(
|
||||||
|
self,
|
||||||
|
embedding: List[float],
|
||||||
|
k: int = 4,
|
||||||
|
filter: Optional[models.Filter] = None,
|
||||||
|
search_params: Optional[models.SearchParams] = None,
|
||||||
|
offset: int = 0,
|
||||||
|
score_threshold: Optional[float] = None,
|
||||||
|
consistency: Optional[models.ReadConsistency] = None,
|
||||||
|
**kwargs: Any,
|
||||||
|
) -> List[Document]:
|
||||||
|
"""Return docs most similar to embedding vector.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of Documents most similar to the query.
|
||||||
|
"""
|
||||||
|
results = self.similarity_search_with_score_by_vector(
|
||||||
|
embedding,
|
||||||
|
k,
|
||||||
|
filter=filter,
|
||||||
|
search_params=search_params,
|
||||||
|
offset=offset,
|
||||||
|
score_threshold=score_threshold,
|
||||||
|
consistency=consistency,
|
||||||
|
**kwargs,
|
||||||
|
)
|
||||||
|
return list(map(itemgetter(0), results))
|
||||||
|
|
||||||
def max_marginal_relevance_search(
|
def max_marginal_relevance_search(
|
||||||
self,
|
self,
|
||||||
query: str,
|
query: str,
|
||||||
|
@ -66,6 +66,39 @@ def test_similarity_search_by_vector(
|
|||||||
assert_documents_equals(output, [Document(page_content="foo")])
|
assert_documents_equals(output, [Document(page_content="foo")])
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize("location", qdrant_locations())
|
||||||
|
@pytest.mark.parametrize("content_payload_key", [QdrantVectorStore.CONTENT_KEY, "foo"])
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
"metadata_payload_key", [QdrantVectorStore.METADATA_KEY, "bar"]
|
||||||
|
)
|
||||||
|
@pytest.mark.parametrize("vector_name", ["", "my-vector"])
|
||||||
|
@pytest.mark.parametrize("batch_size", [1, 64])
|
||||||
|
def test_similarity_search_with_score_by_vector(
|
||||||
|
location: str,
|
||||||
|
content_payload_key: str,
|
||||||
|
metadata_payload_key: str,
|
||||||
|
vector_name: str,
|
||||||
|
batch_size: int,
|
||||||
|
) -> None:
|
||||||
|
"""Test end to end construction and search."""
|
||||||
|
texts = ["foo", "bar", "baz"]
|
||||||
|
docsearch = QdrantVectorStore.from_texts(
|
||||||
|
texts,
|
||||||
|
ConsistentFakeEmbeddings(),
|
||||||
|
location=location,
|
||||||
|
content_payload_key=content_payload_key,
|
||||||
|
metadata_payload_key=metadata_payload_key,
|
||||||
|
batch_size=batch_size,
|
||||||
|
vector_name=vector_name,
|
||||||
|
)
|
||||||
|
embeddings = ConsistentFakeEmbeddings().embed_query("foo")
|
||||||
|
output = docsearch.similarity_search_with_score_by_vector(embeddings, k=1)
|
||||||
|
assert len(output) == 1
|
||||||
|
document, score = output[0]
|
||||||
|
assert_documents_equals([document], [Document(page_content="foo")])
|
||||||
|
assert score >= 0
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize("location", qdrant_locations())
|
@pytest.mark.parametrize("location", qdrant_locations())
|
||||||
@pytest.mark.parametrize(
|
@pytest.mark.parametrize(
|
||||||
"metadata_payload_key", [QdrantVectorStore.METADATA_KEY, "bar"]
|
"metadata_payload_key", [QdrantVectorStore.METADATA_KEY, "bar"]
|
||||||
|
Loading…
Reference in New Issue
Block a user