Add similarity_search_with_normalized_similarities (#2916)

Add a method that exposes a similarity search with corresponding normalized similarity scores. Implement only for FAISS now. ### Motivation: Some memory definitions combine `relevance` with other scores, like recency , importance, etc. While many (but not all) of the `VectorStore`'s expose a `similarity_search_with_score` method, they don't all interpret the units of that score (depends on the distance metric and whether or not the the embeddings are normalized). This PR proposes a `similarity_search_with_normalized_similarities` method that lets consumers of the vector store not have to worry about the metric and embedding scale. *Most providers default to euclidean distance, with Pinecone being one exception (defaults to cosine _similarity_).* --------- Co-authored-by: Harrison Chase <hw.chase.17@gmail.com>
2025-09-10 23:41:28 +00:00 · 2023-04-15 21:06:08 -07:00
parent b9db20481f
commit 4ffc58e07b
3 changed files with 120 additions and 4 deletions
--- a/tests/integration_tests/vectorstores/test_faiss.py
+++ b/tests/integration_tests/vectorstores/test_faiss.py
@@ -1,4 +1,5 @@
 """Test FAISS functionality."""
+import math
 import tempfile

 import pytest
@@ -109,3 +110,37 @@ def test_faiss_local_save_load() -> None:
        docsearch.save_local(temp_file.name)
        new_docsearch = FAISS.load_local(temp_file.name, FakeEmbeddings())
    assert new_docsearch.index is not None
+
+
+def test_faiss_similarity_search_with_relevance_scores() -> None:
+    """Test the similarity search with normalized similarities."""
+    texts = ["foo", "bar", "baz"]
+    docsearch = FAISS.from_texts(
+        texts,
+        FakeEmbeddings(),
+        normalize_score_fn=lambda score: 1.0 - score / math.sqrt(2),
+    )
+    outputs = docsearch.similarity_search_with_relevance_scores("foo", k=1)
+    output, score = outputs[0]
+    assert output == Document(page_content="foo")
+    assert score == 1.0
+
+
+def test_faiss_invalid_normalize_fn() -> None:
+    """Test the similarity search with normalized similarities."""
+    texts = ["foo", "bar", "baz"]
+    docsearch = FAISS.from_texts(
+        texts, FakeEmbeddings(), normalize_score_fn=lambda _: 2.0
+    )
+    with pytest.raises(
+        ValueError, match="Normalized similarity scores must be between 0 and 1"
+    ):
+        docsearch.similarity_search_with_relevance_scores("foo", k=1)
+
+
+def test_missing_normalize_score_fn() -> None:
+    """Test doesn't perform similarity search without a normalize score function."""
+    with pytest.raises(ValueError):
+        texts = ["foo", "bar", "baz"]
+        faiss_instance = FAISS.from_texts(texts, FakeEmbeddings())
+        faiss_instance.similarity_search_with_relevance_scores("foo", k=2)