diff --git a/libs/core/langchain_core/vectorstores.py b/libs/core/langchain_core/vectorstores.py index d6299c61d77..7cb6611c995 100644 --- a/libs/core/langchain_core/vectorstores.py +++ b/libs/core/langchain_core/vectorstores.py @@ -18,6 +18,7 @@ and retrieve the data that are 'most similar' to the embedded query. Embeddings, Document """ # noqa: E501 + from __future__ import annotations import logging @@ -39,6 +40,7 @@ from typing import ( TypeVar, ) +from langchain_core._api.deprecation import deprecated from langchain_core.embeddings import Embeddings from langchain_core.pydantic_v1 import Field, root_validator from langchain_core.retrievers import BaseRetriever @@ -184,6 +186,11 @@ class VectorStore(ABC): ) -> List[Document]: """Return docs most similar to query.""" + @deprecated( + since="0.1.43", + removal="0.2", + alternative="euclidean_distance_to_relevance_score", + ) @staticmethod def _euclidean_relevance_score_fn(distance: float) -> float: """Return a similarity score on a scale [0, 1].""" @@ -199,12 +206,22 @@ class VectorStore(ABC): # to a similarity function (0 to 1) return 1.0 - distance / math.sqrt(2) + @deprecated( + since="0.1.43", + removal="0.2", + alternative="cosine_to_relevance_score", + ) @staticmethod def _cosine_relevance_score_fn(distance: float) -> float: """Normalize the distance to a score on a scale [0, 1].""" return 1.0 - distance + @deprecated( + since="0.1.43", + removal="0.2", + message="Max inner product relevance scoring is not recommended.", + ) @staticmethod def _max_inner_product_relevance_score_fn(distance: float) -> float: """Normalize the distance to a score on a scale [0, 1].""" @@ -213,6 +230,15 @@ class VectorStore(ABC): return -1.0 * distance + @deprecated( + since="0.1.43", + removal="0.2", + message=( + "Relevance scoring is no longer implemented on the VectorStore - " + "please use external helper methods such as " + "euclidean_distance_to_relevance_score instead." + ), + ) def _select_relevance_score_fn(self) -> Callable[[float], float]: """ The 'correct' relevance function @@ -244,6 +270,16 @@ class VectorStore(ABC): None, self.similarity_search_with_score, *args, **kwargs ) + @deprecated( + since="0.1.43", + removal="0.2", + message=( + "Relevance scoring is no longer implemented on the VectorStore - " + "please use retrieve documents with similarity_search_with_score" + " and convert scores with external helper methods such as " + "euclidean_distance_to_relevance_score instead." + ), + ) def _similarity_search_with_relevance_scores( self, query: str, @@ -271,6 +307,16 @@ class VectorStore(ABC): docs_and_scores = self.similarity_search_with_score(query, k, **kwargs) return [(doc, relevance_score_fn(score)) for doc, score in docs_and_scores] + @deprecated( + since="0.1.43", + removal="0.2", + message=( + "Relevance scoring is no longer implemented on the VectorStore - " + "please use retrieve documents with similarity_search_with_score" + " and convert scores with external helper methods such as " + "euclidean_distance_to_relevance_score instead." + ), + ) async def _asimilarity_search_with_relevance_scores( self, query: str, @@ -298,6 +344,16 @@ class VectorStore(ABC): docs_and_scores = await self.asimilarity_search_with_score(query, k, **kwargs) return [(doc, relevance_score_fn(score)) for doc, score in docs_and_scores] + @deprecated( + since="0.1.43", + removal="0.2", + message=( + "Relevance scoring is no longer implemented on the VectorStore - " + "please use retrieve documents with similarity_search_with_score" + " and convert scores with external helper methods such as " + "euclidean_distance_to_relevance_score instead." + ), + ) def similarity_search_with_relevance_scores( self, query: str, @@ -345,6 +401,16 @@ class VectorStore(ABC): ) return docs_and_similarities + @deprecated( + since="0.1.43", + removal="0.2", + message=( + "Relevance scoring is no longer implemented on the VectorStore - " + "please use retrieve documents with similarity_search_with_score" + " and convert scores with external helper methods such as " + "euclidean_distance_to_relevance_score instead." + ), + ) async def asimilarity_search_with_relevance_scores( self, query: str, @@ -740,3 +806,27 @@ class VectorStoreRetriever(BaseRetriever): ) -> List[str]: """Add documents to vectorstore.""" return await self.vectorstore.aadd_documents(documents, **kwargs) + + +def euclidean_distance_to_relevance_score(score: float) -> float: + """ + Takes a euclidean distance score and returns a relevance score on a scale [0, 1]. + + Assumptions: + - The embeddings are normalized length 1 (unit normed) + - Each unit of the vector is [-1, 1] + - input scores therefore are in [0, 2], where 0 is most similar and 2 is + most dissimilar + """ + return 1 - (score / 2) + + +def cosine_to_relevance_score(score: float) -> float: + """ + Takes a cosine similarity score and returns a relevance score on a scale [0, 1]. + + Assumptions: + - The input cosine scores are in [-1, 1], where 1 is most similar and -1 is + most dissimilar + """ + return (score + 1) / 2