mirror of
https://github.com/hwchase17/langchain.git
synced 2025-05-31 20:19:43 +00:00
add where_document filter for chroma (#10214)
- Description: add where_document filter parameter in Chroma - Issue: [10082](https://github.com/langchain-ai/langchain/issues/10082) - Dependencies: no - Tag maintainer: for a quicker response, tag the relevant maintainer (see below), - Twitter handle: no @hwchase17 --------- Co-authored-by: Jeremy Lai <jeremy_lai@wiwynn.com> Co-authored-by: Bagatur <baskaryan@gmail.com>
This commit is contained in:
parent
7203c97e8f
commit
e93240f023
@ -142,6 +142,7 @@ class Chroma(VectorStore):
|
||||
query_embeddings: Optional[List[List[float]]] = None,
|
||||
n_results: int = 4,
|
||||
where: Optional[Dict[str, str]] = None,
|
||||
where_document: Optional[Dict[str, str]] = None,
|
||||
**kwargs: Any,
|
||||
) -> List[Document]:
|
||||
"""Query the chroma collection."""
|
||||
@ -157,6 +158,7 @@ class Chroma(VectorStore):
|
||||
query_embeddings=query_embeddings,
|
||||
n_results=n_results,
|
||||
where=where,
|
||||
where_document=where_document,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
@ -264,6 +266,7 @@ class Chroma(VectorStore):
|
||||
embedding: List[float],
|
||||
k: int = DEFAULT_K,
|
||||
filter: Optional[Dict[str, str]] = None,
|
||||
where_document: Optional[Dict[str, str]] = None,
|
||||
**kwargs: Any,
|
||||
) -> List[Document]:
|
||||
"""Return docs most similar to embedding vector.
|
||||
@ -275,7 +278,10 @@ class Chroma(VectorStore):
|
||||
List of Documents most similar to the query vector.
|
||||
"""
|
||||
results = self.__query_collection(
|
||||
query_embeddings=embedding, n_results=k, where=filter
|
||||
query_embeddings=embedding,
|
||||
n_results=k,
|
||||
where=filter,
|
||||
where_document=where_document,
|
||||
)
|
||||
return _results_to_docs(results)
|
||||
|
||||
@ -284,6 +290,7 @@ class Chroma(VectorStore):
|
||||
embedding: List[float],
|
||||
k: int = DEFAULT_K,
|
||||
filter: Optional[Dict[str, str]] = None,
|
||||
where_document: Optional[Dict[str, str]] = None,
|
||||
**kwargs: Any,
|
||||
) -> List[Tuple[Document, float]]:
|
||||
"""
|
||||
@ -300,7 +307,10 @@ class Chroma(VectorStore):
|
||||
Lower score represents more similarity.
|
||||
"""
|
||||
results = self.__query_collection(
|
||||
query_embeddings=embedding, n_results=k, where=filter
|
||||
query_embeddings=embedding,
|
||||
n_results=k,
|
||||
where=filter,
|
||||
where_document=where_document,
|
||||
)
|
||||
return _results_to_docs_and_scores(results)
|
||||
|
||||
@ -309,6 +319,7 @@ class Chroma(VectorStore):
|
||||
query: str,
|
||||
k: int = DEFAULT_K,
|
||||
filter: Optional[Dict[str, str]] = None,
|
||||
where_document: Optional[Dict[str, str]] = None,
|
||||
**kwargs: Any,
|
||||
) -> List[Tuple[Document, float]]:
|
||||
"""Run similarity search with Chroma with distance.
|
||||
@ -325,12 +336,18 @@ class Chroma(VectorStore):
|
||||
"""
|
||||
if self._embedding_function is None:
|
||||
results = self.__query_collection(
|
||||
query_texts=[query], n_results=k, where=filter
|
||||
query_texts=[query],
|
||||
n_results=k,
|
||||
where=filter,
|
||||
where_document=where_document,
|
||||
)
|
||||
else:
|
||||
query_embedding = self._embedding_function.embed_query(query)
|
||||
results = self.__query_collection(
|
||||
query_embeddings=[query_embedding], n_results=k, where=filter
|
||||
query_embeddings=[query_embedding],
|
||||
n_results=k,
|
||||
where=filter,
|
||||
where_document=where_document,
|
||||
)
|
||||
|
||||
return _results_to_docs_and_scores(results)
|
||||
@ -374,6 +391,7 @@ class Chroma(VectorStore):
|
||||
fetch_k: int = 20,
|
||||
lambda_mult: float = 0.5,
|
||||
filter: Optional[Dict[str, str]] = None,
|
||||
where_document: Optional[Dict[str, str]] = None,
|
||||
**kwargs: Any,
|
||||
) -> List[Document]:
|
||||
"""Return docs selected using the maximal marginal relevance.
|
||||
@ -398,6 +416,7 @@ class Chroma(VectorStore):
|
||||
query_embeddings=embedding,
|
||||
n_results=fetch_k,
|
||||
where=filter,
|
||||
where_document=where_document,
|
||||
include=["metadatas", "documents", "distances", "embeddings"],
|
||||
)
|
||||
mmr_selected = maximal_marginal_relevance(
|
||||
@ -419,6 +438,7 @@ class Chroma(VectorStore):
|
||||
fetch_k: int = 20,
|
||||
lambda_mult: float = 0.5,
|
||||
filter: Optional[Dict[str, str]] = None,
|
||||
where_document: Optional[Dict[str, str]] = None,
|
||||
**kwargs: Any,
|
||||
) -> List[Document]:
|
||||
"""Return docs selected using the maximal marginal relevance.
|
||||
@ -445,7 +465,12 @@ class Chroma(VectorStore):
|
||||
|
||||
embedding = self._embedding_function.embed_query(query)
|
||||
docs = self.max_marginal_relevance_search_by_vector(
|
||||
embedding, k, fetch_k, lambda_mult=lambda_mult, filter=filter
|
||||
embedding,
|
||||
k,
|
||||
fetch_k,
|
||||
lambda_mult=lambda_mult,
|
||||
filter=filter,
|
||||
where_document=where_document,
|
||||
)
|
||||
return docs
|
||||
|
||||
@ -472,7 +497,7 @@ class Chroma(VectorStore):
|
||||
offset: The offset to start returning results from.
|
||||
Useful for paging results with limit. Optional.
|
||||
where_document: A WhereDocument type dict used to filter by the documents.
|
||||
E.g. `{$contains: {"text": "hello"}}`. Optional.
|
||||
E.g. `{$contains: "hello"}`. Optional.
|
||||
include: A list of what to include in the results.
|
||||
Can contain `"embeddings"`, `"metadatas"`, `"documents"`.
|
||||
Ids are always included.
|
||||
|
Loading…
Reference in New Issue
Block a user