From c5e50c40c939c8ee6c383ecbf9cadf0d3192edde Mon Sep 17 00:00:00 2001 From: Sidchat95 <122575389+Sidchat95@users.noreply.github.com> Date: Thu, 13 Jul 2023 00:30:47 -0500 Subject: [PATCH] Fix Document Similarity Check with passed Threshold (#6845) Converting the Similarity obtained in the similarity_search_with_score_by_vector method whilst comparing to the passed threshold. This is because the passed threshold is a number between 0 to 1 and is already in the relevance_score_fn format. As of now, the function is comparing two different scoring parameters and that wouldn't work. Dependencies None Issue: Different scores being compared in similarity_search_with_score_by_vector method in FAISS. Tag maintainer @hwchase17 --------- Co-authored-by: Bagatur --- langchain/vectorstores/faiss.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/langchain/vectorstores/faiss.py b/langchain/vectorstores/faiss.py index eb3df5685c7..ff328184247 100644 --- a/langchain/vectorstores/faiss.py +++ b/langchain/vectorstores/faiss.py @@ -698,6 +698,9 @@ class FAISS(VectorStore): **kwargs: Any, ) -> List[Tuple[Document, float]]: """Return docs and their similarity scores on a scale from 0 to 1.""" + # Pop score threshold so that only relevancy scores, not raw scores, are + # filtered. + score_threshold = kwargs.pop("score_threshold", None) relevance_score_fn = self._select_relevance_score_fn() if relevance_score_fn is None: raise ValueError( @@ -711,4 +714,13 @@ class FAISS(VectorStore): fetch_k=fetch_k, **kwargs, ) - return [(doc, relevance_score_fn(score)) for doc, score in docs_and_scores] + docs_and_rel_scores = [ + (doc, relevance_score_fn(score)) for doc, score in docs_and_scores + ] + if score_threshold is not None: + docs_and_rel_scores = [ + (doc, similarity) + for doc, similarity in docs_and_rel_scores + if similarity >= score_threshold + ] + return docs_and_rel_scores