mirror of
https://github.com/hwchase17/langchain.git
synced 2026-02-21 06:33:41 +00:00
core: deprecate vectorstore relevance scoring
This commit is contained in:
@@ -18,6 +18,7 @@ and retrieve the data that are 'most similar' to the embedded query.
|
||||
|
||||
Embeddings, Document
|
||||
""" # noqa: E501
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
@@ -39,6 +40,7 @@ from typing import (
|
||||
TypeVar,
|
||||
)
|
||||
|
||||
from langchain_core._api.deprecation import deprecated
|
||||
from langchain_core.embeddings import Embeddings
|
||||
from langchain_core.pydantic_v1 import Field, root_validator
|
||||
from langchain_core.retrievers import BaseRetriever
|
||||
@@ -184,6 +186,11 @@ class VectorStore(ABC):
|
||||
) -> List[Document]:
|
||||
"""Return docs most similar to query."""
|
||||
|
||||
@deprecated(
|
||||
since="0.1.43",
|
||||
removal="0.2",
|
||||
alternative="euclidean_distance_to_relevance_score",
|
||||
)
|
||||
@staticmethod
|
||||
def _euclidean_relevance_score_fn(distance: float) -> float:
|
||||
"""Return a similarity score on a scale [0, 1]."""
|
||||
@@ -199,12 +206,22 @@ class VectorStore(ABC):
|
||||
# to a similarity function (0 to 1)
|
||||
return 1.0 - distance / math.sqrt(2)
|
||||
|
||||
@deprecated(
|
||||
since="0.1.43",
|
||||
removal="0.2",
|
||||
alternative="cosine_to_relevance_score",
|
||||
)
|
||||
@staticmethod
|
||||
def _cosine_relevance_score_fn(distance: float) -> float:
|
||||
"""Normalize the distance to a score on a scale [0, 1]."""
|
||||
|
||||
return 1.0 - distance
|
||||
|
||||
@deprecated(
|
||||
since="0.1.43",
|
||||
removal="0.2",
|
||||
message="Max inner product relevance scoring is not recommended.",
|
||||
)
|
||||
@staticmethod
|
||||
def _max_inner_product_relevance_score_fn(distance: float) -> float:
|
||||
"""Normalize the distance to a score on a scale [0, 1]."""
|
||||
@@ -213,6 +230,15 @@ class VectorStore(ABC):
|
||||
|
||||
return -1.0 * distance
|
||||
|
||||
@deprecated(
|
||||
since="0.1.43",
|
||||
removal="0.2",
|
||||
message=(
|
||||
"Relevance scoring is no longer implemented on the VectorStore - "
|
||||
"please use external helper methods such as "
|
||||
"euclidean_distance_to_relevance_score instead."
|
||||
),
|
||||
)
|
||||
def _select_relevance_score_fn(self) -> Callable[[float], float]:
|
||||
"""
|
||||
The 'correct' relevance function
|
||||
@@ -244,6 +270,16 @@ class VectorStore(ABC):
|
||||
None, self.similarity_search_with_score, *args, **kwargs
|
||||
)
|
||||
|
||||
@deprecated(
|
||||
since="0.1.43",
|
||||
removal="0.2",
|
||||
message=(
|
||||
"Relevance scoring is no longer implemented on the VectorStore - "
|
||||
"please use retrieve documents with similarity_search_with_score"
|
||||
" and convert scores with external helper methods such as "
|
||||
"euclidean_distance_to_relevance_score instead."
|
||||
),
|
||||
)
|
||||
def _similarity_search_with_relevance_scores(
|
||||
self,
|
||||
query: str,
|
||||
@@ -271,6 +307,16 @@ class VectorStore(ABC):
|
||||
docs_and_scores = self.similarity_search_with_score(query, k, **kwargs)
|
||||
return [(doc, relevance_score_fn(score)) for doc, score in docs_and_scores]
|
||||
|
||||
@deprecated(
|
||||
since="0.1.43",
|
||||
removal="0.2",
|
||||
message=(
|
||||
"Relevance scoring is no longer implemented on the VectorStore - "
|
||||
"please use retrieve documents with similarity_search_with_score"
|
||||
" and convert scores with external helper methods such as "
|
||||
"euclidean_distance_to_relevance_score instead."
|
||||
),
|
||||
)
|
||||
async def _asimilarity_search_with_relevance_scores(
|
||||
self,
|
||||
query: str,
|
||||
@@ -298,6 +344,16 @@ class VectorStore(ABC):
|
||||
docs_and_scores = await self.asimilarity_search_with_score(query, k, **kwargs)
|
||||
return [(doc, relevance_score_fn(score)) for doc, score in docs_and_scores]
|
||||
|
||||
@deprecated(
|
||||
since="0.1.43",
|
||||
removal="0.2",
|
||||
message=(
|
||||
"Relevance scoring is no longer implemented on the VectorStore - "
|
||||
"please use retrieve documents with similarity_search_with_score"
|
||||
" and convert scores with external helper methods such as "
|
||||
"euclidean_distance_to_relevance_score instead."
|
||||
),
|
||||
)
|
||||
def similarity_search_with_relevance_scores(
|
||||
self,
|
||||
query: str,
|
||||
@@ -345,6 +401,16 @@ class VectorStore(ABC):
|
||||
)
|
||||
return docs_and_similarities
|
||||
|
||||
@deprecated(
|
||||
since="0.1.43",
|
||||
removal="0.2",
|
||||
message=(
|
||||
"Relevance scoring is no longer implemented on the VectorStore - "
|
||||
"please use retrieve documents with similarity_search_with_score"
|
||||
" and convert scores with external helper methods such as "
|
||||
"euclidean_distance_to_relevance_score instead."
|
||||
),
|
||||
)
|
||||
async def asimilarity_search_with_relevance_scores(
|
||||
self,
|
||||
query: str,
|
||||
@@ -740,3 +806,27 @@ class VectorStoreRetriever(BaseRetriever):
|
||||
) -> List[str]:
|
||||
"""Add documents to vectorstore."""
|
||||
return await self.vectorstore.aadd_documents(documents, **kwargs)
|
||||
|
||||
|
||||
def euclidean_distance_to_relevance_score(score: float) -> float:
|
||||
"""
|
||||
Takes a euclidean distance score and returns a relevance score on a scale [0, 1].
|
||||
|
||||
Assumptions:
|
||||
- The embeddings are normalized length 1 (unit normed)
|
||||
- Each unit of the vector is [-1, 1]
|
||||
- input scores therefore are in [0, 2], where 0 is most similar and 2 is
|
||||
most dissimilar
|
||||
"""
|
||||
return 1 - (score / 2)
|
||||
|
||||
|
||||
def cosine_to_relevance_score(score: float) -> float:
|
||||
"""
|
||||
Takes a cosine similarity score and returns a relevance score on a scale [0, 1].
|
||||
|
||||
Assumptions:
|
||||
- The input cosine scores are in [-1, 1], where 1 is most similar and -1 is
|
||||
most dissimilar
|
||||
"""
|
||||
return (score + 1) / 2
|
||||
|
||||
Reference in New Issue
Block a user