mirror of
https://github.com/hwchase17/langchain.git
synced 2025-06-24 15:43:54 +00:00
Chroma: add vector search with scores (#6864)
- Description: Adding to Chroma integration the option to run a similarity search by a vector with relevance scores. Fixing two minor typos. - Issue: The "lambda_mult" typo is related to #4861 - Maintainer: @rlancemartin, @eyurtsev
This commit is contained in:
parent
576880abc5
commit
fed64ae060
@ -181,7 +181,7 @@ class Chroma(VectorStore):
|
||||
) -> List[Document]:
|
||||
"""Return docs most similar to embedding vector.
|
||||
Args:
|
||||
embedding (str): Embedding to look up documents similar to.
|
||||
embedding (List[float]): Embedding to look up documents similar to.
|
||||
k (int): Number of Documents to return. Defaults to 4.
|
||||
filter (Optional[Dict[str, str]]): Filter by metadata. Defaults to None.
|
||||
Returns:
|
||||
@ -192,6 +192,31 @@ class Chroma(VectorStore):
|
||||
)
|
||||
return _results_to_docs(results)
|
||||
|
||||
def similarity_search_by_vector_with_relevance_scores(
|
||||
self,
|
||||
embedding: List[float],
|
||||
k: int = DEFAULT_K,
|
||||
filter: Optional[Dict[str, str]] = None,
|
||||
**kwargs: Any,
|
||||
) -> List[Tuple[Document, float]]:
|
||||
"""
|
||||
Return docs most similar to embedding vector and similarity score.
|
||||
|
||||
Args:
|
||||
embedding (List[float]): Embedding to look up documents similar to.
|
||||
k (int): Number of Documents to return. Defaults to 4.
|
||||
filter (Optional[Dict[str, str]]): Filter by metadata. Defaults to None.
|
||||
|
||||
Returns:
|
||||
List[Tuple[Document, float]]: List of documents most similar to
|
||||
the query text and cosine distance in float for each.
|
||||
Lower score represents more similarity.
|
||||
"""
|
||||
results = self.__query_collection(
|
||||
query_embeddings=embedding, n_results=k, where=filter
|
||||
)
|
||||
return _results_to_docs_and_scores(results)
|
||||
|
||||
def similarity_search_with_score(
|
||||
self,
|
||||
query: str,
|
||||
@ -309,7 +334,7 @@ class Chroma(VectorStore):
|
||||
|
||||
embedding = self._embedding_function.embed_query(query)
|
||||
docs = self.max_marginal_relevance_search_by_vector(
|
||||
embedding, k, fetch_k, lambda_mul=lambda_mult, filter=filter
|
||||
embedding, k, fetch_k, lambda_mult=lambda_mult, filter=filter
|
||||
)
|
||||
return docs
|
||||
|
||||
|
@ -58,6 +58,25 @@ def test_chroma_with_metadatas_with_scores() -> None:
|
||||
assert output == [(Document(page_content="foo", metadata={"page": "0"}), 0.0)]
|
||||
|
||||
|
||||
def test_chroma_with_metadatas_with_scores_using_vector() -> None:
|
||||
"""Test end to end construction and scored search, using embedding vector."""
|
||||
texts = ["foo", "bar", "baz"]
|
||||
metadatas = [{"page": str(i)} for i in range(len(texts))]
|
||||
embeddings = FakeEmbeddings()
|
||||
|
||||
docsearch = Chroma.from_texts(
|
||||
collection_name="test_collection",
|
||||
texts=texts,
|
||||
embedding=embeddings,
|
||||
metadatas=metadatas,
|
||||
)
|
||||
embedded_query = embeddings.embed_query("foo")
|
||||
output = docsearch.similarity_search_by_vector_with_relevance_scores(
|
||||
embedding=embedded_query, k=1
|
||||
)
|
||||
assert output == [(Document(page_content="foo", metadata={"page": "0"}), 0.0)]
|
||||
|
||||
|
||||
def test_chroma_search_filter() -> None:
|
||||
"""Test end to end construction and search with metadata filtering."""
|
||||
texts = ["far", "bar", "baz"]
|
||||
|
Loading…
Reference in New Issue
Block a user