mirror of
https://github.com/hwchase17/langchain.git
synced 2025-09-10 23:41:28 +00:00
Add similarity_search_with_normalized_similarities (#2916)
Add a method that exposes a similarity search with corresponding normalized similarity scores. Implement only for FAISS now. ### Motivation: Some memory definitions combine `relevance` with other scores, like recency , importance, etc. While many (but not all) of the `VectorStore`'s expose a `similarity_search_with_score` method, they don't all interpret the units of that score (depends on the distance metric and whether or not the the embeddings are normalized). This PR proposes a `similarity_search_with_normalized_similarities` method that lets consumers of the vector store not have to worry about the metric and embedding scale. *Most providers default to euclidean distance, with Pinecone being one exception (defaults to cosine _similarity_).* --------- Co-authored-by: Harrison Chase <hw.chase.17@gmail.com>
This commit is contained in:
@@ -1,4 +1,5 @@
|
||||
"""Test FAISS functionality."""
|
||||
import math
|
||||
import tempfile
|
||||
|
||||
import pytest
|
||||
@@ -109,3 +110,37 @@ def test_faiss_local_save_load() -> None:
|
||||
docsearch.save_local(temp_file.name)
|
||||
new_docsearch = FAISS.load_local(temp_file.name, FakeEmbeddings())
|
||||
assert new_docsearch.index is not None
|
||||
|
||||
|
||||
def test_faiss_similarity_search_with_relevance_scores() -> None:
|
||||
"""Test the similarity search with normalized similarities."""
|
||||
texts = ["foo", "bar", "baz"]
|
||||
docsearch = FAISS.from_texts(
|
||||
texts,
|
||||
FakeEmbeddings(),
|
||||
normalize_score_fn=lambda score: 1.0 - score / math.sqrt(2),
|
||||
)
|
||||
outputs = docsearch.similarity_search_with_relevance_scores("foo", k=1)
|
||||
output, score = outputs[0]
|
||||
assert output == Document(page_content="foo")
|
||||
assert score == 1.0
|
||||
|
||||
|
||||
def test_faiss_invalid_normalize_fn() -> None:
|
||||
"""Test the similarity search with normalized similarities."""
|
||||
texts = ["foo", "bar", "baz"]
|
||||
docsearch = FAISS.from_texts(
|
||||
texts, FakeEmbeddings(), normalize_score_fn=lambda _: 2.0
|
||||
)
|
||||
with pytest.raises(
|
||||
ValueError, match="Normalized similarity scores must be between 0 and 1"
|
||||
):
|
||||
docsearch.similarity_search_with_relevance_scores("foo", k=1)
|
||||
|
||||
|
||||
def test_missing_normalize_score_fn() -> None:
|
||||
"""Test doesn't perform similarity search without a normalize score function."""
|
||||
with pytest.raises(ValueError):
|
||||
texts = ["foo", "bar", "baz"]
|
||||
faiss_instance = FAISS.from_texts(texts, FakeEmbeddings())
|
||||
faiss_instance.similarity_search_with_relevance_scores("foo", k=2)
|
||||
|
Reference in New Issue
Block a user