diff --git a/langchain/vectorstores/chroma.py b/langchain/vectorstores/chroma.py index 6ca60def7dc..47d3b4a1498 100644 --- a/langchain/vectorstores/chroma.py +++ b/langchain/vectorstores/chroma.py @@ -181,7 +181,7 @@ class Chroma(VectorStore): ) -> List[Document]: """Return docs most similar to embedding vector. Args: - embedding (str): Embedding to look up documents similar to. + embedding (List[float]): Embedding to look up documents similar to. k (int): Number of Documents to return. Defaults to 4. filter (Optional[Dict[str, str]]): Filter by metadata. Defaults to None. Returns: @@ -192,6 +192,31 @@ class Chroma(VectorStore): ) return _results_to_docs(results) + def similarity_search_by_vector_with_relevance_scores( + self, + embedding: List[float], + k: int = DEFAULT_K, + filter: Optional[Dict[str, str]] = None, + **kwargs: Any, + ) -> List[Tuple[Document, float]]: + """ + Return docs most similar to embedding vector and similarity score. + + Args: + embedding (List[float]): Embedding to look up documents similar to. + k (int): Number of Documents to return. Defaults to 4. + filter (Optional[Dict[str, str]]): Filter by metadata. Defaults to None. + + Returns: + List[Tuple[Document, float]]: List of documents most similar to + the query text and cosine distance in float for each. + Lower score represents more similarity. + """ + results = self.__query_collection( + query_embeddings=embedding, n_results=k, where=filter + ) + return _results_to_docs_and_scores(results) + def similarity_search_with_score( self, query: str, @@ -309,7 +334,7 @@ class Chroma(VectorStore): embedding = self._embedding_function.embed_query(query) docs = self.max_marginal_relevance_search_by_vector( - embedding, k, fetch_k, lambda_mul=lambda_mult, filter=filter + embedding, k, fetch_k, lambda_mult=lambda_mult, filter=filter ) return docs diff --git a/tests/integration_tests/vectorstores/test_chroma.py b/tests/integration_tests/vectorstores/test_chroma.py index 652f8bcb3e6..f19164e5207 100644 --- a/tests/integration_tests/vectorstores/test_chroma.py +++ b/tests/integration_tests/vectorstores/test_chroma.py @@ -58,6 +58,25 @@ def test_chroma_with_metadatas_with_scores() -> None: assert output == [(Document(page_content="foo", metadata={"page": "0"}), 0.0)] +def test_chroma_with_metadatas_with_scores_using_vector() -> None: + """Test end to end construction and scored search, using embedding vector.""" + texts = ["foo", "bar", "baz"] + metadatas = [{"page": str(i)} for i in range(len(texts))] + embeddings = FakeEmbeddings() + + docsearch = Chroma.from_texts( + collection_name="test_collection", + texts=texts, + embedding=embeddings, + metadatas=metadatas, + ) + embedded_query = embeddings.embed_query("foo") + output = docsearch.similarity_search_by_vector_with_relevance_scores( + embedding=embedded_query, k=1 + ) + assert output == [(Document(page_content="foo", metadata={"page": "0"}), 0.0)] + + def test_chroma_search_filter() -> None: """Test end to end construction and search with metadata filtering.""" texts = ["far", "bar", "baz"]