Chroma: add vector search with scores (#6864)

- Description: Adding to Chroma integration the option to run a similarity search by a vector with relevance scores. Fixing two minor typos. - Issue: The "lambda_mult" typo is related to #4861 - Maintainer: @rlancemartin, @eyurtsev
2025-08-12 06:13:36 +00:00 · 2023-07-06 16:01:55 +02:00 · 2023-07-06 16:01:55 +02:00 · fed64ae060
commit fed64ae060
parent 576880abc5
2 changed files with 46 additions and 2 deletions
--- a/langchain/vectorstores/chroma.py
+++ b/langchain/vectorstores/chroma.py
@ -181,7 +181,7 @@ class Chroma(VectorStore):
    ) -> List[Document]:
        """Return docs most similar to embedding vector.
        Args:
-            embedding (str): Embedding to look up documents similar to.
+            embedding (List[float]): Embedding to look up documents similar to.
            k (int): Number of Documents to return. Defaults to 4.
            filter (Optional[Dict[str, str]]): Filter by metadata. Defaults to None.
        Returns:
@ -192,6 +192,31 @@ class Chroma(VectorStore):
        )
        return _results_to_docs(results)

+    def similarity_search_by_vector_with_relevance_scores(
+        self,
+        embedding: List[float],
+        k: int = DEFAULT_K,
+        filter: Optional[Dict[str, str]] = None,
+        **kwargs: Any,
+    ) -> List[Tuple[Document, float]]:
+        """
+        Return docs most similar to embedding vector and similarity score.
+
+        Args:
+            embedding (List[float]): Embedding to look up documents similar to.
+            k (int): Number of Documents to return. Defaults to 4.
+            filter (Optional[Dict[str, str]]): Filter by metadata. Defaults to None.
+
+        Returns:
+            List[Tuple[Document, float]]: List of documents most similar to
+            the query text and cosine distance in float for each.
+            Lower score represents more similarity.
+        """
+        results = self.__query_collection(
+            query_embeddings=embedding, n_results=k, where=filter
+        )
+        return _results_to_docs_and_scores(results)
+
    def similarity_search_with_score(
        self,
        query: str,
@ -309,7 +334,7 @@ class Chroma(VectorStore):

        embedding = self._embedding_function.embed_query(query)
        docs = self.max_marginal_relevance_search_by_vector(
-            embedding, k, fetch_k, lambda_mul=lambda_mult, filter=filter
+            embedding, k, fetch_k, lambda_mult=lambda_mult, filter=filter
        )
        return docs

--- a/tests/integration_tests/vectorstores/test_chroma.py
+++ b/tests/integration_tests/vectorstores/test_chroma.py
@ -58,6 +58,25 @@ def test_chroma_with_metadatas_with_scores() -> None:
    assert output == [(Document(page_content="foo", metadata={"page": "0"}), 0.0)]


+def test_chroma_with_metadatas_with_scores_using_vector() -> None:
+    """Test end to end construction and scored search, using embedding vector."""
+    texts = ["foo", "bar", "baz"]
+    metadatas = [{"page": str(i)} for i in range(len(texts))]
+    embeddings = FakeEmbeddings()
+
+    docsearch = Chroma.from_texts(
+        collection_name="test_collection",
+        texts=texts,
+        embedding=embeddings,
+        metadatas=metadatas,
+    )
+    embedded_query = embeddings.embed_query("foo")
+    output = docsearch.similarity_search_by_vector_with_relevance_scores(
+        embedding=embedded_query, k=1
+    )
+    assert output == [(Document(page_content="foo", metadata={"page": "0"}), 0.0)]
+
+
 def test_chroma_search_filter() -> None:
    """Test end to end construction and search with metadata filtering."""
    texts = ["far", "bar", "baz"]