From b65102bdb23e131ea0fb9daee7a40c5d13dada78 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=8E=8B=E6=96=8C=28Bin=20Wang=29?= Date: Thu, 20 Jul 2023 02:20:52 +1200 Subject: [PATCH] fix: pgvector search_type of similarity_score_threshold not working (#7771) - Description: VectorStoreRetriever->similarity_score_threshold with search_type of "similarity_score_threshold" not working with the following two minor issues, - Issue: 1. In line 237 of `vectorstores/base.py`, "score_threshold" is passed to `_similarity_search_with_relevance_scores` as in the kwargs, while score_threshold is not a valid argument of this method. As a fix, before calling `_similarity_search_with_relevance_scores`, score_threshold is popped from kwargs. 2. In line 596 to 607 of `vectorstores/pgvector.py`, it's checking the distance_strategy against the string in Enum. However, self.distance_strategy will get the property of distance_strategy from line 316, where the callable function is passed. To solve this issue, self.distance_strategy is changed to self._distance_strategy to avoid calling the property method., - Dependencies: No, - Tag maintainer: @rlancemartin, @eyurtsev, - Twitter handle: No --------- Co-authored-by: Bin Wang --- langchain/vectorstores/base.py | 3 ++- langchain/vectorstores/pgvector.py | 8 ++++---- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/langchain/vectorstores/base.py b/langchain/vectorstores/base.py index d529e253abf..a09ee25759c 100644 --- a/langchain/vectorstores/base.py +++ b/langchain/vectorstores/base.py @@ -234,6 +234,8 @@ class VectorStore(ABC): Returns: List of Tuples of (doc, similarity_score) """ + score_threshold = kwargs.pop("score_threshold", None) + docs_and_similarities = self._similarity_search_with_relevance_scores( query, k=k, **kwargs ) @@ -246,7 +248,6 @@ class VectorStore(ABC): f" 0 and 1, got {docs_and_similarities}" ) - score_threshold = kwargs.get("score_threshold") if score_threshold is not None: docs_and_similarities = [ (doc, similarity) diff --git a/langchain/vectorstores/pgvector.py b/langchain/vectorstores/pgvector.py index b0958b78fa5..a2969b9fa0f 100644 --- a/langchain/vectorstores/pgvector.py +++ b/langchain/vectorstores/pgvector.py @@ -593,15 +593,15 @@ class PGVector(VectorStore): # Default strategy is to rely on distance strategy provided # in vectorstore constructor - if self.distance_strategy == DistanceStrategy.COSINE: + if self._distance_strategy == DistanceStrategy.COSINE: return self._cosine_relevance_score_fn - elif self.distance_strategy == DistanceStrategy.EUCLIDEAN: + elif self._distance_strategy == DistanceStrategy.EUCLIDEAN: return self._euclidean_relevance_score_fn - elif self.distance_strategy == DistanceStrategy.MAX_INNER_PRODUCT: + elif self._distance_strategy == DistanceStrategy.MAX_INNER_PRODUCT: return self._max_inner_product_relevance_score_fn else: raise ValueError( "No supported normalization function" - f" for distance_strategy of {self.distance_strategy}." + f" for distance_strategy of {self._distance_strategy}." "Consider providing relevance_score_fn to PGVector constructor." )