From 621419f71e1910600d4b02e07a0e1fcdab230d60 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kacper=20=C5=81ukawski?= Date: Mon, 6 Nov 2023 16:36:59 +0100 Subject: [PATCH] Fix normalizing the cosine distance in Qdrant (#12934) Qdrant was incorrectly calculating the cosine similarity and returning `0.0` for the best match, instead of `1.0`. Internally Qdrant returns a cosine score from `-1.0` (worst match) to `1.0` (best match), and the current formula reflects it. --- libs/langchain/langchain/vectorstores/qdrant.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/libs/langchain/langchain/vectorstores/qdrant.py b/libs/langchain/langchain/vectorstores/qdrant.py index b364c991539..b85edabcdc2 100644 --- a/libs/langchain/langchain/vectorstores/qdrant.py +++ b/libs/langchain/langchain/vectorstores/qdrant.py @@ -1837,6 +1837,11 @@ class Qdrant(VectorStore): ) return qdrant + @staticmethod + def _cosine_relevance_score_fn(distance: float) -> float: + """Normalize the distance to a score on a scale [0, 1].""" + return (distance + 1.0) / 2.0 + def _select_relevance_score_fn(self) -> Callable[[float], float]: """ The 'correct' relevance function