mirror of
https://github.com/hwchase17/langchain.git
synced 2025-07-21 20:13:39 +00:00
docs: Chroma docstrings update (#22001)
Thank you for contributing to LangChain! - [X] **PR title**: "docs: Chroma docstrings update" - Where "package" is whichever of langchain, community, core, experimental, etc. is being modified. Use "docs: ..." for purely docs changes, "templates: ..." for template changes, "infra: ..." for CI changes. - Example: "community: add foobar LLM" - [X] **PR message**: - **Description:** Added and updated Chroma docstrings - **Issue:** https://github.com/langchain-ai/langchain/issues/21983 - [X] **Add tests and docs**: If you're adding a new integration, please include 1. a test for the integration, preferably unit tests that do not rely on network access, 2. an example notebook showing its use. It lives in `docs/docs/integrations` directory. - only docs - [X] **Lint and test**: Run `make format`, `make lint` and `make test` from the root of the package(s) you've modified. See contribution guidelines for more: https://python.langchain.com/docs/contributing/ Additional guidelines: - Make sure optional dependencies are imported within a function. - Please do not add dependencies to pyproject.toml files (even optional ones) unless they are required for unit tests. - Most PRs should not touch more than one package. - Changes should be backwards compatible. - If you are adding something to community, do not re-import it in langchain. If no one reviews your PR within a few days, please @-mention one of baskaryan, efriis, eyurtsev, ccurme, vbarda, hwchase17.
This commit is contained in:
parent
28456c2c33
commit
45351d1bc6
@ -52,7 +52,11 @@ Matrix = Union[List[List[float]], List[np.ndarray], np.ndarray]
|
|||||||
|
|
||||||
|
|
||||||
def cosine_similarity(X: Matrix, Y: Matrix) -> np.ndarray:
|
def cosine_similarity(X: Matrix, Y: Matrix) -> np.ndarray:
|
||||||
"""Row-wise cosine similarity between two equal-width matrices."""
|
"""Row-wise cosine similarity between two equal-width matrices.
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
ValueError: If the number of columns in X and Y are not the same.
|
||||||
|
"""
|
||||||
if len(X) == 0 or len(Y) == 0:
|
if len(X) == 0 or len(Y) == 0:
|
||||||
return np.array([])
|
return np.array([])
|
||||||
|
|
||||||
@ -80,7 +84,21 @@ def maximal_marginal_relevance(
|
|||||||
lambda_mult: float = 0.5,
|
lambda_mult: float = 0.5,
|
||||||
k: int = 4,
|
k: int = 4,
|
||||||
) -> List[int]:
|
) -> List[int]:
|
||||||
"""Calculate maximal marginal relevance."""
|
"""Calculate maximal marginal relevance.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
query_embedding: Query embedding.
|
||||||
|
embedding_list: List of embeddings to select from.
|
||||||
|
lambda_mult: Number between 0 and 1 that determines the degree
|
||||||
|
of diversity among the results with 0 corresponding
|
||||||
|
to maximum diversity and 1 to minimum diversity.
|
||||||
|
Defaults to 0.5.
|
||||||
|
k: Number of Documents to return. Defaults to 4.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of indices of embeddings selected by maximal marginal relevance.
|
||||||
|
"""
|
||||||
|
|
||||||
if min(k, len(embedding_list)) <= 0:
|
if min(k, len(embedding_list)) <= 0:
|
||||||
return []
|
return []
|
||||||
if query_embedding.ndim == 1:
|
if query_embedding.ndim == 1:
|
||||||
@ -136,8 +154,21 @@ class Chroma(VectorStore):
|
|||||||
relevance_score_fn: Optional[Callable[[float], float]] = None,
|
relevance_score_fn: Optional[Callable[[float], float]] = None,
|
||||||
create_collection_if_not_exists: Optional[bool] = True,
|
create_collection_if_not_exists: Optional[bool] = True,
|
||||||
) -> None:
|
) -> None:
|
||||||
"""Initialize with a Chroma client."""
|
"""Initialize with a Chroma client.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
collection_name: Name of the collection to create.
|
||||||
|
embedding_function: Embedding class object. Used to embed texts.
|
||||||
|
persist_director: Directory to persist the collection.
|
||||||
|
client_settings: Chroma client settings
|
||||||
|
collection_metadata: Collection configurations.
|
||||||
|
client: Chroma client. Documentation:
|
||||||
|
https://docs.trychroma.com/reference/js-client#class:-chromaclient
|
||||||
|
relevance_score_fn: Function to calculate relevance score from distance.
|
||||||
|
Used only in `similarity_search_with_relevance_scores`
|
||||||
|
create_collection_if_not_exists: Whether to create collection
|
||||||
|
if it doesn't exist. Defaults to True.
|
||||||
|
"""
|
||||||
if client is not None:
|
if client is not None:
|
||||||
self._client_settings = client_settings
|
self._client_settings = client_settings
|
||||||
self._client = client
|
self._client = client
|
||||||
@ -204,7 +235,23 @@ class Chroma(VectorStore):
|
|||||||
where_document: Optional[Dict[str, str]] = None,
|
where_document: Optional[Dict[str, str]] = None,
|
||||||
**kwargs: Any,
|
**kwargs: Any,
|
||||||
) -> Union[List[Document], chromadb.QueryResult]:
|
) -> Union[List[Document], chromadb.QueryResult]:
|
||||||
"""Query the chroma collection."""
|
"""Query the chroma collection.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
query_texts: List of query texts.
|
||||||
|
query_embeddings: List of query embeddings.
|
||||||
|
n_results: Number of results to return. Defaults to 4.
|
||||||
|
where: dict used to filter results by
|
||||||
|
e.g. {"color" : "red", "price": 4.20}.
|
||||||
|
where_document: dict used to filter by the documents.
|
||||||
|
E.g. {$contains: {"text": "hello"}}.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of `n_results` nearest neighbor embeddings for provided
|
||||||
|
query_embeddings or query_texts.
|
||||||
|
|
||||||
|
See more: https://docs.trychroma.com/reference/py-collection#query
|
||||||
|
"""
|
||||||
return self._collection.query(
|
return self._collection.query(
|
||||||
query_texts=query_texts,
|
query_texts=query_texts,
|
||||||
query_embeddings=query_embeddings, # type: ignore
|
query_embeddings=query_embeddings, # type: ignore
|
||||||
@ -229,12 +276,16 @@ class Chroma(VectorStore):
|
|||||||
"""Run more images through the embeddings and add to the vectorstore.
|
"""Run more images through the embeddings and add to the vectorstore.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
uris List[str]: File path to the image.
|
uris: File path to the image.
|
||||||
metadatas (Optional[List[dict]], optional): Optional list of metadatas.
|
metadatas: Optional list of metadatas.
|
||||||
ids (Optional[List[str]], optional): Optional list of IDs.
|
When querying, you can filter on this metadata.
|
||||||
|
ids: Optional list of IDs.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
List[str]: List of IDs of the added images.
|
List of IDs of the added images.
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
ValueError: When metadata is incorrect.
|
||||||
"""
|
"""
|
||||||
# Map from uris to b64 encoded strings
|
# Map from uris to b64 encoded strings
|
||||||
b64_texts = [self.encode_image(uri=uri) for uri in uris]
|
b64_texts = [self.encode_image(uri=uri) for uri in uris]
|
||||||
@ -312,14 +363,18 @@ class Chroma(VectorStore):
|
|||||||
"""Run more texts through the embeddings and add to the vectorstore.
|
"""Run more texts through the embeddings and add to the vectorstore.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
texts (Iterable[str]): Texts to add to the vectorstore.
|
texts: Texts to add to the vectorstore.
|
||||||
metadatas (Optional[List[dict]], optional): Optional list of metadatas.
|
metadatas: Optional list of metadatas.
|
||||||
ids (Optional[List[str]], optional): Optional list of IDs.
|
When querying, you can filter on this metadata.
|
||||||
|
ids: Optional list of IDs.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
List[str]: List of IDs of the added texts.
|
List of IDs of the added texts.
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
ValueError: When metadata is incorrect.
|
||||||
"""
|
"""
|
||||||
# TODO: Handle the case where the user doesn't provide ids on the Collection
|
|
||||||
if ids is None:
|
if ids is None:
|
||||||
ids = [str(uuid.uuid4()) for _ in texts]
|
ids = [str(uuid.uuid4()) for _ in texts]
|
||||||
embeddings = None
|
embeddings = None
|
||||||
@ -391,12 +446,12 @@ class Chroma(VectorStore):
|
|||||||
"""Run similarity search with Chroma.
|
"""Run similarity search with Chroma.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
query (str): Query text to search for.
|
query: Query text to search for.
|
||||||
k (int): Number of results to return. Defaults to 4.
|
k: Number of results to return. Defaults to 4.
|
||||||
filter (Optional[Dict[str, str]]): Filter by metadata. Defaults to None.
|
filter: Filter by metadata. Defaults to None.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
List[Document]: List of documents most similar to the query text.
|
List of documents most similar to the query text.
|
||||||
"""
|
"""
|
||||||
docs_and_scores = self.similarity_search_with_score(
|
docs_and_scores = self.similarity_search_with_score(
|
||||||
query, k, filter=filter, **kwargs
|
query, k, filter=filter, **kwargs
|
||||||
@ -412,10 +467,14 @@ class Chroma(VectorStore):
|
|||||||
**kwargs: Any,
|
**kwargs: Any,
|
||||||
) -> List[Document]:
|
) -> List[Document]:
|
||||||
"""Return docs most similar to embedding vector.
|
"""Return docs most similar to embedding vector.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
embedding (List[float]): Embedding to look up documents similar to.
|
embedding: Embedding to look up documents similar to.
|
||||||
k (int): Number of Documents to return. Defaults to 4.
|
k: Number of Documents to return. Defaults to 4.
|
||||||
filter (Optional[Dict[str, str]]): Filter by metadata. Defaults to None.
|
filter: Filter by metadata. Defaults to None.
|
||||||
|
where_document: dict used to filter by the documents.
|
||||||
|
E.g. {$contains: {"text": "hello"}}.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
List of Documents most similar to the query vector.
|
List of Documents most similar to the query vector.
|
||||||
"""
|
"""
|
||||||
@ -441,13 +500,14 @@ class Chroma(VectorStore):
|
|||||||
|
|
||||||
Args:
|
Args:
|
||||||
embedding (List[float]): Embedding to look up documents similar to.
|
embedding (List[float]): Embedding to look up documents similar to.
|
||||||
k (int): Number of Documents to return. Defaults to 4.
|
k: Number of Documents to return. Defaults to 4.
|
||||||
filter (Optional[Dict[str, str]]): Filter by metadata. Defaults to None.
|
filter: Filter by metadata. Defaults to None.
|
||||||
|
where_document: dict used to filter by the documents.
|
||||||
|
E.g. {$contains: {"text": "hello"}}.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
List[Tuple[Document, float]]: List of documents most similar to
|
List of documents most similar to the query text and relevance score
|
||||||
the query text and cosine distance in float for each.
|
in float for each. Lower score represents more similarity.
|
||||||
Lower score represents more similarity.
|
|
||||||
"""
|
"""
|
||||||
results = self.__query_collection(
|
results = self.__query_collection(
|
||||||
query_embeddings=embedding,
|
query_embeddings=embedding,
|
||||||
@ -469,14 +529,15 @@ class Chroma(VectorStore):
|
|||||||
"""Run similarity search with Chroma with distance.
|
"""Run similarity search with Chroma with distance.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
query (str): Query text to search for.
|
query: Query text to search for.
|
||||||
k (int): Number of results to return. Defaults to 4.
|
k: Number of results to return. Defaults to 4.
|
||||||
filter (Optional[Dict[str, str]]): Filter by metadata. Defaults to None.
|
filter: Filter by metadata. Defaults to None.
|
||||||
|
where_document: dict used to filter by the documents.
|
||||||
|
E.g. {$contains: {"text": "hello"}}.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
List[Tuple[Document, float]]: List of documents most similar to
|
List of documents most similar to the query text and
|
||||||
the query text and cosine distance in float for each.
|
distance in float for each. Lower score represents more similarity.
|
||||||
Lower score represents more similarity.
|
|
||||||
"""
|
"""
|
||||||
if self._embedding_function is None:
|
if self._embedding_function is None:
|
||||||
results = self.__query_collection(
|
results = self.__query_collection(
|
||||||
@ -499,14 +560,21 @@ class Chroma(VectorStore):
|
|||||||
return _results_to_docs_and_scores(results)
|
return _results_to_docs_and_scores(results)
|
||||||
|
|
||||||
def _select_relevance_score_fn(self) -> Callable[[float], float]:
|
def _select_relevance_score_fn(self) -> Callable[[float], float]:
|
||||||
|
"""Select the relevance score function based on collections distance metric.
|
||||||
|
|
||||||
|
The most similar documents will have the lowest relevance score. Default
|
||||||
|
relevance score function is euclidean distance. Distance metric must be
|
||||||
|
provided in `collection_metadata` during initizalition of Chroma object.
|
||||||
|
Example: collection_metadata={"hnsw:space": "cosine"}. Available distance
|
||||||
|
metrics are: 'cosine', 'l2' and 'ip'.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
The relevance score function.
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
ValueError: If the distance metric is not supported.
|
||||||
"""
|
"""
|
||||||
The 'correct' relevance function
|
|
||||||
may differ depending on a few things, including:
|
|
||||||
- the distance / similarity metric used by the VectorStore
|
|
||||||
- the scale of your embeddings (OpenAI's are unit normed. Many others are not!)
|
|
||||||
- embedding dimensionality
|
|
||||||
- etc.
|
|
||||||
"""
|
|
||||||
if self.override_relevance_score_fn:
|
if self.override_relevance_score_fn:
|
||||||
return self.override_relevance_score_fn
|
return self.override_relevance_score_fn
|
||||||
|
|
||||||
@ -541,18 +609,20 @@ class Chroma(VectorStore):
|
|||||||
**kwargs: Any,
|
**kwargs: Any,
|
||||||
) -> List[Document]:
|
) -> List[Document]:
|
||||||
"""Return docs selected using the maximal marginal relevance.
|
"""Return docs selected using the maximal marginal relevance.
|
||||||
|
|
||||||
Maximal marginal relevance optimizes for similarity to query AND diversity
|
Maximal marginal relevance optimizes for similarity to query AND diversity
|
||||||
among selected documents.
|
among selected documents.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
embedding: Embedding to look up documents similar to.
|
embedding: Embedding to look up documents similar to.
|
||||||
k: Number of Documents to return. Defaults to 4.
|
k: Number of Documents to return. Defaults to 4.
|
||||||
fetch_k: Number of Documents to fetch to pass to MMR algorithm.
|
fetch_k: Number of Documents to fetch to pass to MMR algorithm. Defaults to
|
||||||
|
20.
|
||||||
lambda_mult: Number between 0 and 1 that determines the degree
|
lambda_mult: Number between 0 and 1 that determines the degree
|
||||||
of diversity among the results with 0 corresponding
|
of diversity among the results with 0 corresponding
|
||||||
to maximum diversity and 1 to minimum diversity.
|
to maximum diversity and 1 to minimum diversity.
|
||||||
Defaults to 0.5.
|
Defaults to 0.5.
|
||||||
filter (Optional[Dict[str, str]]): Filter by metadata. Defaults to None.
|
filter: Filter by metadata. Defaults to None.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
List of Documents selected by maximal marginal relevance.
|
List of Documents selected by maximal marginal relevance.
|
||||||
@ -600,10 +670,15 @@ class Chroma(VectorStore):
|
|||||||
of diversity among the results with 0 corresponding
|
of diversity among the results with 0 corresponding
|
||||||
to maximum diversity and 1 to minimum diversity.
|
to maximum diversity and 1 to minimum diversity.
|
||||||
Defaults to 0.5.
|
Defaults to 0.5.
|
||||||
filter (Optional[Dict[str, str]]): Filter by metadata. Defaults to None.
|
filter: Filter by metadata. Defaults to None.
|
||||||
|
where_document: dict used to filter by the documents.
|
||||||
|
E.g. {$contains: {"text": "hello"}}.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
List of Documents selected by maximal marginal relevance.
|
List of Documents selected by maximal marginal relevance.
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
ValueError: If the embedding function is not provided.
|
||||||
"""
|
"""
|
||||||
if self._embedding_function is None:
|
if self._embedding_function is None:
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
@ -611,7 +686,7 @@ class Chroma(VectorStore):
|
|||||||
)
|
)
|
||||||
|
|
||||||
embedding = self._embedding_function.embed_query(query)
|
embedding = self._embedding_function.embed_query(query)
|
||||||
docs = self.max_marginal_relevance_search_by_vector(
|
return self.max_marginal_relevance_search_by_vector(
|
||||||
embedding,
|
embedding,
|
||||||
k,
|
k,
|
||||||
fetch_k,
|
fetch_k,
|
||||||
@ -619,7 +694,6 @@ class Chroma(VectorStore):
|
|||||||
filter=filter,
|
filter=filter,
|
||||||
where_document=where_document,
|
where_document=where_document,
|
||||||
)
|
)
|
||||||
return docs
|
|
||||||
|
|
||||||
def delete_collection(self) -> None:
|
def delete_collection(self) -> None:
|
||||||
"""Delete the collection."""
|
"""Delete the collection."""
|
||||||
@ -656,6 +730,9 @@ class Chroma(VectorStore):
|
|||||||
Can contain `"embeddings"`, `"metadatas"`, `"documents"`.
|
Can contain `"embeddings"`, `"metadatas"`, `"documents"`.
|
||||||
Ids are always included.
|
Ids are always included.
|
||||||
Defaults to `["metadatas", "documents"]`. Optional.
|
Defaults to `["metadatas", "documents"]`. Optional.
|
||||||
|
|
||||||
|
Return:
|
||||||
|
A dict with the keys `"ids"`, `"embeddings"`, `"metadatas"`, `"documents"`.
|
||||||
"""
|
"""
|
||||||
kwargs = {
|
kwargs = {
|
||||||
"ids": ids,
|
"ids": ids,
|
||||||
@ -674,8 +751,8 @@ class Chroma(VectorStore):
|
|||||||
"""Update a document in the collection.
|
"""Update a document in the collection.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
document_id (str): ID of the document to update.
|
document_id: ID of the document to update.
|
||||||
document (Document): Document to update.
|
document: Document to update.
|
||||||
"""
|
"""
|
||||||
return self.update_documents([document_id], [document])
|
return self.update_documents([document_id], [document])
|
||||||
|
|
||||||
@ -684,8 +761,11 @@ class Chroma(VectorStore):
|
|||||||
"""Update a document in the collection.
|
"""Update a document in the collection.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
ids (List[str]): List of ids of the document to update.
|
ids: List of ids of the document to update.
|
||||||
documents (List[Document]): List of documents to update.
|
documents: List of documents to update.
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
ValueError: If the embedding function is not provided.
|
||||||
"""
|
"""
|
||||||
text = [document.page_content for document in documents]
|
text = [document.page_content for document in documents]
|
||||||
metadata = [document.metadata for document in documents]
|
metadata = [document.metadata for document in documents]
|
||||||
@ -741,14 +821,14 @@ class Chroma(VectorStore):
|
|||||||
Otherwise, the data will be ephemeral in-memory.
|
Otherwise, the data will be ephemeral in-memory.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
texts (List[str]): List of texts to add to the collection.
|
texts: List of texts to add to the collection.
|
||||||
collection_name (str): Name of the collection to create.
|
collection_name: Name of the collection to create.
|
||||||
persist_directory (Optional[str]): Directory to persist the collection.
|
persist_directory: Directory to persist the collection.
|
||||||
embedding (Optional[Embeddings]): Embedding function. Defaults to None.
|
embedding: Embedding function. Defaults to None.
|
||||||
metadatas (Optional[List[dict]]): List of metadatas. Defaults to None.
|
metadatas: List of metadatas. Defaults to None.
|
||||||
ids (Optional[List[str]]): List of document IDs. Defaults to None.
|
ids: List of document IDs. Defaults to None.
|
||||||
client_settings (Optional[chromadb.config.Settings]): Chroma client settings
|
client_settings: Chroma client settings
|
||||||
collection_metadata (Optional[Dict]): Collection configurations.
|
collection_metadata: Collection configurations.
|
||||||
Defaults to None.
|
Defaults to None.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
@ -804,13 +884,13 @@ class Chroma(VectorStore):
|
|||||||
Otherwise, the data will be ephemeral in-memory.
|
Otherwise, the data will be ephemeral in-memory.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
collection_name (str): Name of the collection to create.
|
collection_name: Name of the collection to create.
|
||||||
persist_directory (Optional[str]): Directory to persist the collection.
|
persist_directory: Directory to persist the collection.
|
||||||
ids (Optional[List[str]]): List of document IDs. Defaults to None.
|
ids : List of document IDs. Defaults to None.
|
||||||
documents (List[Document]): List of documents to add to the vectorstore.
|
documents: List of documents to add to the vectorstore.
|
||||||
embedding (Optional[Embeddings]): Embedding function. Defaults to None.
|
embedding: Embedding function. Defaults to None.
|
||||||
client_settings (Optional[chromadb.config.Settings]): Chroma client settings
|
client_settings: Chroma client settings
|
||||||
collection_metadata (Optional[Dict]): Collection configurations.
|
collection_metadata: Collection configurations.
|
||||||
Defaults to None.
|
Defaults to None.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
|
Loading…
Reference in New Issue
Block a user