feat: add function similarity_search_limit_score to vectorstores.redis (#1950)

# Description
***
Add function similarity_search_limit_score and
similarity_search_with_score

# How to use
***
``
rds = Redis.from_existing_index(embeddings,
redis_url="redis://localhost:6379", index_name='link')

rds.similarity_search_limit_score(query, k=3, score=0.2)

rds.similarity_search_with_score(query, k=3)
``

---------

Co-authored-by: Peter <peter.shi@alephf.com>
This commit is contained in:
Peter Shi 2023-03-28 06:05:09 +08:00 committed by GitHub
parent 953e58d004
commit 024efb09f8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -4,7 +4,7 @@ from __future__ import annotations
import json import json
import logging import logging
import uuid import uuid
from typing import Any, Callable, Iterable, List, Mapping, Optional from typing import Any, Callable, Iterable, List, Mapping, Optional, Tuple
import numpy as np import numpy as np
from redis.client import Redis as RedisType from redis.client import Redis as RedisType
@ -86,6 +86,48 @@ class Redis(VectorStore):
def similarity_search( def similarity_search(
self, query: str, k: int = 4, **kwargs: Any self, query: str, k: int = 4, **kwargs: Any
) -> List[Document]: ) -> List[Document]:
docs_and_scores = self.similarity_search_with_score(query, k=k)
return [doc for doc, _ in docs_and_scores]
def similarity_search_limit_score(
self, query: str, k: int = 4, score_threshold: float = 0.2, **kwargs: Any
) -> List[Document]:
"""
Returns the most similar indexed documents to the query text.
Args:
query (str): The query text for which to find similar documents.
k (int): The number of documents to return. Default is 4.
score_threshold (float): The minimum matching score required for a document
to be considered a match. Defaults to 0.2.
Because the similarity calculation algorithm is based on cosine similarity,
the smaller the angle, the higher the similarity.
Returns:
List[Document]: A list of documents that are most similar to the query text,
including the match score for each document.
Note:
If there are no documents that satisfy the score_threshold value,
an empty list is returned.
"""
docs_and_scores = self.similarity_search_with_score(query, k=k)
return [doc for doc, score in docs_and_scores if score < score_threshold]
def similarity_search_with_score(
self, query: str, k: int = 4
) -> List[Tuple[Document, float]]:
"""Return docs most similar to query.
Args:
query: Text to look up documents similar to.
k: Number of Documents to return. Defaults to 4.
Returns:
List of Documents most similar to the query and score for each
"""
try: try:
from redis.commands.search.query import Query from redis.commands.search.query import Query
except ImportError: except ImportError:
@ -120,12 +162,17 @@ class Redis(VectorStore):
# perform vector search # perform vector search
results = self.client.ft(self.index_name).search(redis_query, params_dict) results = self.client.ft(self.index_name).search(redis_query, params_dict)
documents = [ docs = [
Document(page_content=result.content, metadata=json.loads(result.metadata)) (
Document(
page_content=result.content, metadata=json.loads(result.metadata)
),
float(result.vector_score),
)
for result in results.docs for result in results.docs
] ]
return documents return docs
@classmethod @classmethod
def from_texts( def from_texts(