Support score_threshold in SupabaseVectorStore similarity search (#14439)

Description: Add support for setting the `score_threshold` for
similarity search in SupabaseVectoreStore.

This pull request addresses issue #14438

Co-authored-by: Harrison Chase <hw.chase.17@gmail.com>
This commit is contained in:
codehound42 2024-01-03 01:47:05 +01:00 committed by GitHub
parent d4a98e4e04
commit 8aa921d3a4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -1,6 +1,7 @@
from __future__ import annotations
import uuid
import warnings
from itertools import repeat
from typing import (
TYPE_CHECKING,
@ -204,7 +205,7 @@ class SupabaseVectorStore(VectorStore):
) -> List[Tuple[Document, float]]:
vector = self._embedding.embed_query(query)
return self.similarity_search_by_vector_with_relevance_scores(
vector, k=k, filter=filter
vector, k=k, filter=filter, **kwargs
)
def match_args(
@ -221,6 +222,7 @@ class SupabaseVectorStore(VectorStore):
k: int,
filter: Optional[Dict[str, Any]] = None,
postgrest_filter: Optional[str] = None,
score_threshold: Optional[float] = None,
) -> List[Tuple[Document, float]]:
match_documents_params = self.match_args(query, filter)
query_builder = self._client.rpc(self.query_name, match_documents_params)
@ -246,6 +248,18 @@ class SupabaseVectorStore(VectorStore):
if search.get("content")
]
if score_threshold is not None:
match_result = [
(doc, similarity)
for doc, similarity in match_result
if similarity >= score_threshold
]
if len(match_result) == 0:
warnings.warn(
"No relevant docs were retrieved using the relevance score"
f" threshold {score_threshold}"
)
return match_result
def similarity_search_by_vector_returning_embeddings(