mirror of
https://github.com/hwchase17/langchain.git
synced 2025-06-24 07:35:18 +00:00
Chroma: add vector search with scores (#6864)
- Description: Adding to Chroma integration the option to run a similarity search by a vector with relevance scores. Fixing two minor typos. - Issue: The "lambda_mult" typo is related to #4861 - Maintainer: @rlancemartin, @eyurtsev
This commit is contained in:
parent
576880abc5
commit
fed64ae060
@ -181,7 +181,7 @@ class Chroma(VectorStore):
|
|||||||
) -> List[Document]:
|
) -> List[Document]:
|
||||||
"""Return docs most similar to embedding vector.
|
"""Return docs most similar to embedding vector.
|
||||||
Args:
|
Args:
|
||||||
embedding (str): Embedding to look up documents similar to.
|
embedding (List[float]): Embedding to look up documents similar to.
|
||||||
k (int): Number of Documents to return. Defaults to 4.
|
k (int): Number of Documents to return. Defaults to 4.
|
||||||
filter (Optional[Dict[str, str]]): Filter by metadata. Defaults to None.
|
filter (Optional[Dict[str, str]]): Filter by metadata. Defaults to None.
|
||||||
Returns:
|
Returns:
|
||||||
@ -192,6 +192,31 @@ class Chroma(VectorStore):
|
|||||||
)
|
)
|
||||||
return _results_to_docs(results)
|
return _results_to_docs(results)
|
||||||
|
|
||||||
|
def similarity_search_by_vector_with_relevance_scores(
|
||||||
|
self,
|
||||||
|
embedding: List[float],
|
||||||
|
k: int = DEFAULT_K,
|
||||||
|
filter: Optional[Dict[str, str]] = None,
|
||||||
|
**kwargs: Any,
|
||||||
|
) -> List[Tuple[Document, float]]:
|
||||||
|
"""
|
||||||
|
Return docs most similar to embedding vector and similarity score.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
embedding (List[float]): Embedding to look up documents similar to.
|
||||||
|
k (int): Number of Documents to return. Defaults to 4.
|
||||||
|
filter (Optional[Dict[str, str]]): Filter by metadata. Defaults to None.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List[Tuple[Document, float]]: List of documents most similar to
|
||||||
|
the query text and cosine distance in float for each.
|
||||||
|
Lower score represents more similarity.
|
||||||
|
"""
|
||||||
|
results = self.__query_collection(
|
||||||
|
query_embeddings=embedding, n_results=k, where=filter
|
||||||
|
)
|
||||||
|
return _results_to_docs_and_scores(results)
|
||||||
|
|
||||||
def similarity_search_with_score(
|
def similarity_search_with_score(
|
||||||
self,
|
self,
|
||||||
query: str,
|
query: str,
|
||||||
@ -309,7 +334,7 @@ class Chroma(VectorStore):
|
|||||||
|
|
||||||
embedding = self._embedding_function.embed_query(query)
|
embedding = self._embedding_function.embed_query(query)
|
||||||
docs = self.max_marginal_relevance_search_by_vector(
|
docs = self.max_marginal_relevance_search_by_vector(
|
||||||
embedding, k, fetch_k, lambda_mul=lambda_mult, filter=filter
|
embedding, k, fetch_k, lambda_mult=lambda_mult, filter=filter
|
||||||
)
|
)
|
||||||
return docs
|
return docs
|
||||||
|
|
||||||
|
@ -58,6 +58,25 @@ def test_chroma_with_metadatas_with_scores() -> None:
|
|||||||
assert output == [(Document(page_content="foo", metadata={"page": "0"}), 0.0)]
|
assert output == [(Document(page_content="foo", metadata={"page": "0"}), 0.0)]
|
||||||
|
|
||||||
|
|
||||||
|
def test_chroma_with_metadatas_with_scores_using_vector() -> None:
|
||||||
|
"""Test end to end construction and scored search, using embedding vector."""
|
||||||
|
texts = ["foo", "bar", "baz"]
|
||||||
|
metadatas = [{"page": str(i)} for i in range(len(texts))]
|
||||||
|
embeddings = FakeEmbeddings()
|
||||||
|
|
||||||
|
docsearch = Chroma.from_texts(
|
||||||
|
collection_name="test_collection",
|
||||||
|
texts=texts,
|
||||||
|
embedding=embeddings,
|
||||||
|
metadatas=metadatas,
|
||||||
|
)
|
||||||
|
embedded_query = embeddings.embed_query("foo")
|
||||||
|
output = docsearch.similarity_search_by_vector_with_relevance_scores(
|
||||||
|
embedding=embedded_query, k=1
|
||||||
|
)
|
||||||
|
assert output == [(Document(page_content="foo", metadata={"page": "0"}), 0.0)]
|
||||||
|
|
||||||
|
|
||||||
def test_chroma_search_filter() -> None:
|
def test_chroma_search_filter() -> None:
|
||||||
"""Test end to end construction and search with metadata filtering."""
|
"""Test end to end construction and search with metadata filtering."""
|
||||||
texts = ["far", "bar", "baz"]
|
texts = ["far", "bar", "baz"]
|
||||||
|
Loading…
Reference in New Issue
Block a user