mirror of
https://github.com/hwchase17/langchain.git
synced 2025-06-02 04:58:46 +00:00
add where_document filter for chroma (#10214)
- Description: add where_document filter parameter in Chroma - Issue: [10082](https://github.com/langchain-ai/langchain/issues/10082) - Dependencies: no - Tag maintainer: for a quicker response, tag the relevant maintainer (see below), - Twitter handle: no @hwchase17 --------- Co-authored-by: Jeremy Lai <jeremy_lai@wiwynn.com> Co-authored-by: Bagatur <baskaryan@gmail.com>
This commit is contained in:
parent
7203c97e8f
commit
e93240f023
@ -142,6 +142,7 @@ class Chroma(VectorStore):
|
|||||||
query_embeddings: Optional[List[List[float]]] = None,
|
query_embeddings: Optional[List[List[float]]] = None,
|
||||||
n_results: int = 4,
|
n_results: int = 4,
|
||||||
where: Optional[Dict[str, str]] = None,
|
where: Optional[Dict[str, str]] = None,
|
||||||
|
where_document: Optional[Dict[str, str]] = None,
|
||||||
**kwargs: Any,
|
**kwargs: Any,
|
||||||
) -> List[Document]:
|
) -> List[Document]:
|
||||||
"""Query the chroma collection."""
|
"""Query the chroma collection."""
|
||||||
@ -157,6 +158,7 @@ class Chroma(VectorStore):
|
|||||||
query_embeddings=query_embeddings,
|
query_embeddings=query_embeddings,
|
||||||
n_results=n_results,
|
n_results=n_results,
|
||||||
where=where,
|
where=where,
|
||||||
|
where_document=where_document,
|
||||||
**kwargs,
|
**kwargs,
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -264,6 +266,7 @@ class Chroma(VectorStore):
|
|||||||
embedding: List[float],
|
embedding: List[float],
|
||||||
k: int = DEFAULT_K,
|
k: int = DEFAULT_K,
|
||||||
filter: Optional[Dict[str, str]] = None,
|
filter: Optional[Dict[str, str]] = None,
|
||||||
|
where_document: Optional[Dict[str, str]] = None,
|
||||||
**kwargs: Any,
|
**kwargs: Any,
|
||||||
) -> List[Document]:
|
) -> List[Document]:
|
||||||
"""Return docs most similar to embedding vector.
|
"""Return docs most similar to embedding vector.
|
||||||
@ -275,7 +278,10 @@ class Chroma(VectorStore):
|
|||||||
List of Documents most similar to the query vector.
|
List of Documents most similar to the query vector.
|
||||||
"""
|
"""
|
||||||
results = self.__query_collection(
|
results = self.__query_collection(
|
||||||
query_embeddings=embedding, n_results=k, where=filter
|
query_embeddings=embedding,
|
||||||
|
n_results=k,
|
||||||
|
where=filter,
|
||||||
|
where_document=where_document,
|
||||||
)
|
)
|
||||||
return _results_to_docs(results)
|
return _results_to_docs(results)
|
||||||
|
|
||||||
@ -284,6 +290,7 @@ class Chroma(VectorStore):
|
|||||||
embedding: List[float],
|
embedding: List[float],
|
||||||
k: int = DEFAULT_K,
|
k: int = DEFAULT_K,
|
||||||
filter: Optional[Dict[str, str]] = None,
|
filter: Optional[Dict[str, str]] = None,
|
||||||
|
where_document: Optional[Dict[str, str]] = None,
|
||||||
**kwargs: Any,
|
**kwargs: Any,
|
||||||
) -> List[Tuple[Document, float]]:
|
) -> List[Tuple[Document, float]]:
|
||||||
"""
|
"""
|
||||||
@ -300,7 +307,10 @@ class Chroma(VectorStore):
|
|||||||
Lower score represents more similarity.
|
Lower score represents more similarity.
|
||||||
"""
|
"""
|
||||||
results = self.__query_collection(
|
results = self.__query_collection(
|
||||||
query_embeddings=embedding, n_results=k, where=filter
|
query_embeddings=embedding,
|
||||||
|
n_results=k,
|
||||||
|
where=filter,
|
||||||
|
where_document=where_document,
|
||||||
)
|
)
|
||||||
return _results_to_docs_and_scores(results)
|
return _results_to_docs_and_scores(results)
|
||||||
|
|
||||||
@ -309,6 +319,7 @@ class Chroma(VectorStore):
|
|||||||
query: str,
|
query: str,
|
||||||
k: int = DEFAULT_K,
|
k: int = DEFAULT_K,
|
||||||
filter: Optional[Dict[str, str]] = None,
|
filter: Optional[Dict[str, str]] = None,
|
||||||
|
where_document: Optional[Dict[str, str]] = None,
|
||||||
**kwargs: Any,
|
**kwargs: Any,
|
||||||
) -> List[Tuple[Document, float]]:
|
) -> List[Tuple[Document, float]]:
|
||||||
"""Run similarity search with Chroma with distance.
|
"""Run similarity search with Chroma with distance.
|
||||||
@ -325,12 +336,18 @@ class Chroma(VectorStore):
|
|||||||
"""
|
"""
|
||||||
if self._embedding_function is None:
|
if self._embedding_function is None:
|
||||||
results = self.__query_collection(
|
results = self.__query_collection(
|
||||||
query_texts=[query], n_results=k, where=filter
|
query_texts=[query],
|
||||||
|
n_results=k,
|
||||||
|
where=filter,
|
||||||
|
where_document=where_document,
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
query_embedding = self._embedding_function.embed_query(query)
|
query_embedding = self._embedding_function.embed_query(query)
|
||||||
results = self.__query_collection(
|
results = self.__query_collection(
|
||||||
query_embeddings=[query_embedding], n_results=k, where=filter
|
query_embeddings=[query_embedding],
|
||||||
|
n_results=k,
|
||||||
|
where=filter,
|
||||||
|
where_document=where_document,
|
||||||
)
|
)
|
||||||
|
|
||||||
return _results_to_docs_and_scores(results)
|
return _results_to_docs_and_scores(results)
|
||||||
@ -374,6 +391,7 @@ class Chroma(VectorStore):
|
|||||||
fetch_k: int = 20,
|
fetch_k: int = 20,
|
||||||
lambda_mult: float = 0.5,
|
lambda_mult: float = 0.5,
|
||||||
filter: Optional[Dict[str, str]] = None,
|
filter: Optional[Dict[str, str]] = None,
|
||||||
|
where_document: Optional[Dict[str, str]] = None,
|
||||||
**kwargs: Any,
|
**kwargs: Any,
|
||||||
) -> List[Document]:
|
) -> List[Document]:
|
||||||
"""Return docs selected using the maximal marginal relevance.
|
"""Return docs selected using the maximal marginal relevance.
|
||||||
@ -398,6 +416,7 @@ class Chroma(VectorStore):
|
|||||||
query_embeddings=embedding,
|
query_embeddings=embedding,
|
||||||
n_results=fetch_k,
|
n_results=fetch_k,
|
||||||
where=filter,
|
where=filter,
|
||||||
|
where_document=where_document,
|
||||||
include=["metadatas", "documents", "distances", "embeddings"],
|
include=["metadatas", "documents", "distances", "embeddings"],
|
||||||
)
|
)
|
||||||
mmr_selected = maximal_marginal_relevance(
|
mmr_selected = maximal_marginal_relevance(
|
||||||
@ -419,6 +438,7 @@ class Chroma(VectorStore):
|
|||||||
fetch_k: int = 20,
|
fetch_k: int = 20,
|
||||||
lambda_mult: float = 0.5,
|
lambda_mult: float = 0.5,
|
||||||
filter: Optional[Dict[str, str]] = None,
|
filter: Optional[Dict[str, str]] = None,
|
||||||
|
where_document: Optional[Dict[str, str]] = None,
|
||||||
**kwargs: Any,
|
**kwargs: Any,
|
||||||
) -> List[Document]:
|
) -> List[Document]:
|
||||||
"""Return docs selected using the maximal marginal relevance.
|
"""Return docs selected using the maximal marginal relevance.
|
||||||
@ -445,7 +465,12 @@ class Chroma(VectorStore):
|
|||||||
|
|
||||||
embedding = self._embedding_function.embed_query(query)
|
embedding = self._embedding_function.embed_query(query)
|
||||||
docs = self.max_marginal_relevance_search_by_vector(
|
docs = self.max_marginal_relevance_search_by_vector(
|
||||||
embedding, k, fetch_k, lambda_mult=lambda_mult, filter=filter
|
embedding,
|
||||||
|
k,
|
||||||
|
fetch_k,
|
||||||
|
lambda_mult=lambda_mult,
|
||||||
|
filter=filter,
|
||||||
|
where_document=where_document,
|
||||||
)
|
)
|
||||||
return docs
|
return docs
|
||||||
|
|
||||||
@ -472,7 +497,7 @@ class Chroma(VectorStore):
|
|||||||
offset: The offset to start returning results from.
|
offset: The offset to start returning results from.
|
||||||
Useful for paging results with limit. Optional.
|
Useful for paging results with limit. Optional.
|
||||||
where_document: A WhereDocument type dict used to filter by the documents.
|
where_document: A WhereDocument type dict used to filter by the documents.
|
||||||
E.g. `{$contains: {"text": "hello"}}`. Optional.
|
E.g. `{$contains: "hello"}`. Optional.
|
||||||
include: A list of what to include in the results.
|
include: A list of what to include in the results.
|
||||||
Can contain `"embeddings"`, `"metadatas"`, `"documents"`.
|
Can contain `"embeddings"`, `"metadatas"`, `"documents"`.
|
||||||
Ids are always included.
|
Ids are always included.
|
||||||
|
Loading…
Reference in New Issue
Block a user