diff --git a/libs/partners/chroma/langchain_chroma/vectorstores.py b/libs/partners/chroma/langchain_chroma/vectorstores.py index 227da2d230f..2b966f662f2 100644 --- a/libs/partners/chroma/langchain_chroma/vectorstores.py +++ b/libs/partners/chroma/langchain_chroma/vectorstores.py @@ -79,11 +79,14 @@ def cosine_similarity(X: Matrix, Y: Matrix) -> np.ndarray: X = np.array(X) Y = np.array(Y) if X.shape[1] != Y.shape[1]: - raise ValueError( + msg = ( "Number of columns in X and Y must be the same. X has shape" f"{X.shape} " f"and Y has shape {Y.shape}." ) + raise ValueError( + msg, + ) X_norm = np.linalg.norm(X, axis=1) Y_norm = np.linalg.norm(Y, axis=1) @@ -285,7 +288,7 @@ class Chroma(VectorStore): collection_metadata: Optional[dict] = None, client: Optional[chromadb.ClientAPI] = None, relevance_score_fn: Optional[Callable[[float], float]] = None, - create_collection_if_not_exists: Optional[bool] = True, + create_collection_if_not_exists: Optional[bool] = True, # noqa: FBT002 ) -> None: """Initialize with a Chroma client. @@ -351,10 +354,13 @@ class Chroma(VectorStore): def _collection(self) -> chromadb.Collection: """Returns the underlying Chroma collection or throws an exception.""" if self._chroma_collection is None: - raise ValueError( + msg = ( "Chroma collection not initialized. " "Use `reset_collection` to re-create and initialize the collection. " ) + raise ValueError( + msg, + ) return self._chroma_collection @property @@ -392,10 +398,10 @@ class Chroma(VectorStore): """ return self._collection.query( query_texts=query_texts, - query_embeddings=query_embeddings, # type: ignore + query_embeddings=query_embeddings, # type: ignore[arg-type] n_results=n_results, - where=where, # type: ignore - where_document=where_document, # type: ignore + where=where, # type: ignore[arg-type] + where_document=where_document, # type: ignore[arg-type] **kwargs, ) @@ -432,11 +438,12 @@ class Chroma(VectorStore): if ids is None: ids = [str(uuid.uuid4()) for _ in uris] else: - ids = [id if id is not None else str(uuid.uuid4()) for id in ids] + ids = [id_ if id_ is not None else str(uuid.uuid4()) for id_ in ids] embeddings = None # Set embeddings if self._embedding_function is not None and hasattr( - self._embedding_function, "embed_image" + self._embedding_function, + "embed_image", ): embeddings = self._embedding_function.embed_image(uris=uris) if metadatas: @@ -461,8 +468,8 @@ class Chroma(VectorStore): ids_with_metadata = [ids[idx] for idx in non_empty_ids] try: self._collection.upsert( - metadatas=metadatas, # type: ignore - embeddings=embeddings_with_metadatas, # type: ignore + metadatas=metadatas, # type: ignore[arg-type] + embeddings=embeddings_with_metadatas, # type: ignore[arg-type] documents=images_with_metadatas, ids=ids_with_metadata, ) @@ -473,8 +480,7 @@ class Chroma(VectorStore): "langchain_community.vectorstores.utils.filter_complex_metadata." ) raise ValueError(e.args[0] + "\n\n" + msg) - else: - raise e + raise e if empty_ids: images_without_metadatas = [b64_texts[j] for j in empty_ids] embeddings_without_metadatas = ( @@ -519,7 +525,7 @@ class Chroma(VectorStore): if ids is None: ids = [str(uuid.uuid4()) for _ in texts] else: - ids = [id if id is not None else str(uuid.uuid4()) for id in ids] + ids = [id_ if id_ is not None else str(uuid.uuid4()) for id_ in ids] embeddings = None texts = list(texts) @@ -549,8 +555,8 @@ class Chroma(VectorStore): ids_with_metadata = [ids[idx] for idx in non_empty_ids] try: self._collection.upsert( - metadatas=metadatas, # type: ignore - embeddings=embeddings_with_metadatas, # type: ignore + metadatas=metadatas, # type: ignore[arg-type] + embeddings=embeddings_with_metadatas, # type: ignore[arg-type] documents=texts_with_metadatas, ids=ids_with_metadata, ) @@ -561,8 +567,7 @@ class Chroma(VectorStore): "langchain_community.vectorstores.utils.filter_complex_metadata." ) raise ValueError(e.args[0] + "\n\n" + msg) - else: - raise e + raise e if empty_ids: texts_without_metadatas = [texts[j] for j in empty_ids] embeddings_without_metadatas = ( @@ -570,13 +575,13 @@ class Chroma(VectorStore): ) ids_without_metadatas = [ids[j] for j in empty_ids] self._collection.upsert( - embeddings=embeddings_without_metadatas, # type: ignore + embeddings=embeddings_without_metadatas, # type: ignore[arg-type] documents=texts_without_metadatas, ids=ids_without_metadatas, ) else: self._collection.upsert( - embeddings=embeddings, # type: ignore + embeddings=embeddings, # type: ignore[arg-type] documents=texts, ids=ids, ) @@ -586,7 +591,7 @@ class Chroma(VectorStore): self, query: str, k: int = DEFAULT_K, - filter: Optional[dict[str, str]] = None, + filter: Optional[dict[str, str]] = None, # noqa: A002 **kwargs: Any, ) -> list[Document]: """Run similarity search with Chroma. @@ -601,7 +606,10 @@ class Chroma(VectorStore): List of documents most similar to the query text. """ docs_and_scores = self.similarity_search_with_score( - query, k, filter=filter, **kwargs + query, + k, + filter=filter, + **kwargs, ) return [doc for doc, _ in docs_and_scores] @@ -609,7 +617,7 @@ class Chroma(VectorStore): self, embedding: list[float], k: int = DEFAULT_K, - filter: Optional[dict[str, str]] = None, + filter: Optional[dict[str, str]] = None, # noqa: A002 where_document: Optional[dict[str, str]] = None, **kwargs: Any, ) -> list[Document]: @@ -639,7 +647,7 @@ class Chroma(VectorStore): self, embedding: list[float], k: int = DEFAULT_K, - filter: Optional[dict[str, str]] = None, + filter: Optional[dict[str, str]] = None, # noqa: A002 where_document: Optional[dict[str, str]] = None, **kwargs: Any, ) -> list[tuple[Document, float]]: @@ -670,7 +678,7 @@ class Chroma(VectorStore): self, query: str, k: int = DEFAULT_K, - filter: Optional[dict[str, str]] = None, + filter: Optional[dict[str, str]] = None, # noqa: A002 where_document: Optional[dict[str, str]] = None, **kwargs: Any, ) -> list[tuple[Document, float]]: @@ -712,7 +720,7 @@ class Chroma(VectorStore): self, query: str, k: int = DEFAULT_K, - filter: Optional[dict[str, str]] = None, + filter: Optional[dict[str, str]] = None, # noqa: A002 where_document: Optional[dict[str, str]] = None, **kwargs: Any, ) -> list[tuple[Document, np.ndarray]]: @@ -780,22 +788,24 @@ class Chroma(VectorStore): if distance == "cosine": return self._cosine_relevance_score_fn - elif distance == "l2": + if distance == "l2": return self._euclidean_relevance_score_fn - elif distance == "ip": + if distance == "ip": return self._max_inner_product_relevance_score_fn - else: - raise ValueError( - "No supported normalization function" - f" for distance metric of type: {distance}." - "Consider providing relevance_score_fn to Chroma constructor." - ) + msg = ( + "No supported normalization function" + f" for distance metric of type: {distance}." + "Consider providing relevance_score_fn to Chroma constructor." + ) + raise ValueError( + msg, + ) def similarity_search_by_image( self, uri: str, k: int = DEFAULT_K, - filter: Optional[dict[str, str]] = None, + filter: Optional[dict[str, str]] = None, # noqa: A002 **kwargs: Any, ) -> list[Document]: """Search for similar images based on the given image URI. @@ -817,29 +827,29 @@ class Chroma(VectorStore): ValueError: If the embedding function does not support image embeddings. """ if self._embedding_function is None or not hasattr( - self._embedding_function, "embed_image" + self._embedding_function, + "embed_image", ): - raise ValueError("The embedding function must support image embedding.") + msg = "The embedding function must support image embedding." + raise ValueError(msg) # Obtain image embedding # Assuming embed_image returns a single embedding image_embedding = self._embedding_function.embed_image(uris=[uri]) # Perform similarity search based on the obtained embedding - results = self.similarity_search_by_vector( + return self.similarity_search_by_vector( embedding=image_embedding, k=k, filter=filter, **kwargs, ) - return results - def similarity_search_by_image_with_relevance_score( self, uri: str, k: int = DEFAULT_K, - filter: Optional[dict[str, str]] = None, + filter: Optional[dict[str, str]] = None, # noqa: A002 **kwargs: Any, ) -> list[tuple[Document, float]]: """Search for similar images based on the given image URI. @@ -861,31 +871,31 @@ class Chroma(VectorStore): ValueError: If the embedding function does not support image embeddings. """ if self._embedding_function is None or not hasattr( - self._embedding_function, "embed_image" + self._embedding_function, + "embed_image", ): - raise ValueError("The embedding function must support image embedding.") + msg = "The embedding function must support image embedding." + raise ValueError(msg) # Obtain image embedding # Assuming embed_image returns a single embedding image_embedding = self._embedding_function.embed_image(uris=[uri]) # Perform similarity search based on the obtained embedding - results = self.similarity_search_by_vector_with_relevance_scores( + return self.similarity_search_by_vector_with_relevance_scores( embedding=image_embedding, k=k, filter=filter, **kwargs, ) - return results - def max_marginal_relevance_search_by_vector( self, embedding: list[float], k: int = DEFAULT_K, fetch_k: int = 20, lambda_mult: float = 0.5, - filter: Optional[dict[str, str]] = None, + filter: Optional[dict[str, str]] = None, # noqa: A002 where_document: Optional[dict[str, str]] = None, **kwargs: Any, ) -> list[Document]: @@ -928,8 +938,7 @@ class Chroma(VectorStore): candidates = _results_to_docs(results) - selected_results = [r for i, r in enumerate(candidates) if i in mmr_selected] - return selected_results + return [r for i, r in enumerate(candidates) if i in mmr_selected] def max_marginal_relevance_search( self, @@ -937,7 +946,7 @@ class Chroma(VectorStore): k: int = DEFAULT_K, fetch_k: int = 20, lambda_mult: float = 0.5, - filter: Optional[dict[str, str]] = None, + filter: Optional[dict[str, str]] = None, # noqa: A002 where_document: Optional[dict[str, str]] = None, **kwargs: Any, ) -> list[Document]: @@ -966,8 +975,9 @@ class Chroma(VectorStore): ValueError: If the embedding function is not provided. """ if self._embedding_function is None: + msg = "For MMR search, you must specify an embedding function on creation." raise ValueError( - "For MMR search, you must specify an embedding function on creation." + msg, ) embedding = self._embedding_function.embed_query(query) @@ -1032,7 +1042,7 @@ class Chroma(VectorStore): if include is not None: kwargs["include"] = include - return self._collection.get(**kwargs) # type: ignore + return self._collection.get(**kwargs) # type: ignore[arg-type, return-value] def get_by_ids(self, ids: Sequence[str], /) -> list[Document]: """Get documents by their IDs. @@ -1062,7 +1072,9 @@ class Chroma(VectorStore): return [ Document(page_content=doc, metadata=meta, id=doc_id) for doc, meta, doc_id in zip( - results["documents"], results["metadatas"], results["ids"] + results["documents"], + results["metadatas"], + results["ids"], ) ] @@ -1075,7 +1087,6 @@ class Chroma(VectorStore): """ return self.update_documents([document_id], [document]) - # type: ignore def update_documents(self, ids: list[str], documents: list[Document]) -> None: """Update a document in the collection. @@ -1089,24 +1100,27 @@ class Chroma(VectorStore): text = [document.page_content for document in documents] metadata = [document.metadata for document in documents] if self._embedding_function is None: + msg = "For update, you must specify an embedding function on creation." raise ValueError( - "For update, you must specify an embedding function on creation." + msg, ) embeddings = self._embedding_function.embed_documents(text) if hasattr( - self._collection._client, "get_max_batch_size" + self._collection._client, # noqa: SLF001 + "get_max_batch_size", ) or hasattr( # for Chroma 0.5.1 and above - self._collection._client, "max_batch_size" + self._collection._client, # noqa: SLF001 + "max_batch_size", ): # for Chroma 0.4.10 and above from chromadb.utils.batch_utils import create_batches for batch in create_batches( - api=self._collection._client, + api=self._collection._client, # noqa: SLF001 ids=ids, - metadatas=metadata, # type: ignore + metadatas=metadata, # type: ignore[arg-type] documents=text, - embeddings=embeddings, # type: ignore + embeddings=embeddings, # type: ignore[arg-type] ): self._collection.update( ids=batch[0], @@ -1117,9 +1131,9 @@ class Chroma(VectorStore): else: self._collection.update( ids=ids, - embeddings=embeddings, # type: ignore + embeddings=embeddings, # type: ignore[arg-type] documents=text, - metadatas=metadata, # type: ignore + metadatas=metadata, # type: ignore[arg-type] ) @classmethod @@ -1170,23 +1184,25 @@ class Chroma(VectorStore): if ids is None: ids = [str(uuid.uuid4()) for _ in texts] else: - ids = [id if id is not None else str(uuid.uuid4()) for id in ids] + ids = [id_ if id_ is not None else str(uuid.uuid4()) for id_ in ids] if hasattr( - chroma_collection._client, "get_max_batch_size" + chroma_collection._client, # noqa: SLF001 + "get_max_batch_size", ) or hasattr( # for Chroma 0.5.1 and above - chroma_collection._client, "max_batch_size" + chroma_collection._client, # noqa: SLF001 + "max_batch_size", ): # for Chroma 0.4.10 and above from chromadb.utils.batch_utils import create_batches for batch in create_batches( - api=chroma_collection._client, + api=chroma_collection._client, # noqa: SLF001 ids=ids, - metadatas=metadatas, # type: ignore + metadatas=metadatas, # type: ignore[arg-type] documents=texts, ): chroma_collection.add_texts( texts=batch[3] if batch[3] else [], - metadatas=batch[2] if batch[2] else None, # type: ignore + metadatas=batch[2] if batch[2] else None, # type: ignore[arg-type] ids=batch[0], ) else: diff --git a/libs/partners/chroma/pyproject.toml b/libs/partners/chroma/pyproject.toml index 73825651b47..dc1d6a6d5e1 100644 --- a/libs/partners/chroma/pyproject.toml +++ b/libs/partners/chroma/pyproject.toml @@ -62,8 +62,52 @@ disallow_untyped_defs = true target-version = "py39" [tool.ruff.lint] -select = ["E", "F", "I", "T201", "D", "UP", "S"] -ignore = [ "UP007", ] +select = [ + "A", # flake8-builtins + "ASYNC", # flake8-async + "C4", # flake8-comprehensions + "COM", # flake8-commas + "D", # pydocstyle + "DOC", # pydoclint + "E", # pycodestyle error + "EM", # flake8-errmsg + "F", # pyflakes + "FA", # flake8-future-annotations + "FBT", # flake8-boolean-trap + "FLY", # flake8-flynt + "I", # isort + "ICN", # flake8-import-conventions + "INT", # flake8-gettext + "ISC", # isort-comprehensions + "PGH", # pygrep-hooks + "PIE", # flake8-pie + "PERF", # flake8-perf + "PYI", # flake8-pyi + "Q", # flake8-quotes + "RET", # flake8-return + "RSE", # flake8-rst-docstrings + "RUF", # ruff + "S", # flake8-bandit + "SLF", # flake8-self + "SLOT", # flake8-slots + "SIM", # flake8-simplify + "T10", # flake8-debugger + "T20", # flake8-print + "TID", # flake8-tidy-imports + "UP", # pyupgrade + "W", # pycodestyle warning + "YTT", # flake8-2020 +] +ignore = [ + "D100", # Missing docstring in public module + "D101", # Missing docstring in public class + "D102", # Missing docstring in public method + "D103", # Missing docstring in public function + "D104", # Missing docstring in public package + "D105", # Missing docstring in magic method + "D107", # Missing docstring in __init__ + "UP007", # pyupgrade: non-pep604-annotation-union +] [tool.coverage.run] omit = ["tests/*"] @@ -84,6 +128,7 @@ convention = "google" [tool.ruff.lint.extend-per-file-ignores] "tests/**/*.py" = [ - "S101", # Tests need assertions - "S311", # Standard pseudo-random generators are not suitable for cryptographic purposes + "S101", # Tests need assertions + "S311", # Standard pseudo-random generators are not suitable for cryptographic purposes + "SLF001", # Private member access in tests ] \ No newline at end of file diff --git a/libs/partners/chroma/scripts/check_imports.py b/libs/partners/chroma/scripts/check_imports.py index a7fcbc5ca63..b850913e53e 100644 --- a/libs/partners/chroma/scripts/check_imports.py +++ b/libs/partners/chroma/scripts/check_imports.py @@ -10,7 +10,7 @@ if __name__ == "__main__": for file in files: try: SourceFileLoader("x", file).load_module() - except Exception: + except Exception: # noqa: PERF203 has_failure = True print(file) # noqa: T201 traceback.print_exc() diff --git a/libs/partners/chroma/tests/integration_tests/fake_embeddings.py b/libs/partners/chroma/tests/integration_tests/fake_embeddings.py index e04f295782b..9f318b60cfe 100644 --- a/libs/partners/chroma/tests/integration_tests/fake_embeddings.py +++ b/libs/partners/chroma/tests/integration_tests/fake_embeddings.py @@ -44,7 +44,7 @@ class ConsistentFakeEmbeddings(FakeEmbeddings): if text not in self.known_texts: self.known_texts.append(text) vector = [1.0] * (self.dimensionality - 1) + [ - float(self.known_texts.index(text)) + float(self.known_texts.index(text)), ] out_vectors.append(vector) return out_vectors diff --git a/libs/partners/chroma/tests/integration_tests/test_compile.py b/libs/partners/chroma/tests/integration_tests/test_compile.py index 5196f4c0f8c..5c812af3144 100644 --- a/libs/partners/chroma/tests/integration_tests/test_compile.py +++ b/libs/partners/chroma/tests/integration_tests/test_compile.py @@ -4,4 +4,3 @@ import pytest # type: ignore[import-not-found] @pytest.mark.compile def test_placeholder() -> None: """Used for compiling integration tests without running any real tests.""" - pass diff --git a/libs/partners/chroma/tests/integration_tests/test_vectorstores.py b/libs/partners/chroma/tests/integration_tests/test_vectorstores.py index a42ba95254c..f5794c3875c 100644 --- a/libs/partners/chroma/tests/integration_tests/test_vectorstores.py +++ b/libs/partners/chroma/tests/integration_tests/test_vectorstores.py @@ -28,8 +28,8 @@ class MyEmbeddingFunction: def __init__(self, fak: Fak): self.fak = fak - def __call__(self, input: Embeddable) -> list[list[float]]: - texts = cast(list[str], input) + def __call__(self, input_: Embeddable) -> list[list[float]]: + texts = cast(list[str], input_) return self.fak.embed_documents(texts=texts) @@ -44,7 +44,9 @@ def test_chroma() -> None: """Test end to end construction and search.""" texts = ["foo", "bar", "baz"] docsearch = Chroma.from_texts( - collection_name="test_collection", texts=texts, embedding=FakeEmbeddings() + collection_name="test_collection", + texts=texts, + embedding=FakeEmbeddings(), ) output = docsearch.similarity_search("foo", k=1) @@ -92,7 +94,9 @@ async def test_chroma_async() -> None: """Test end to end construction and search.""" texts = ["foo", "bar", "baz"] docsearch = Chroma.from_texts( - collection_name="test_collection", texts=texts, embedding=FakeEmbeddings() + collection_name="test_collection", + texts=texts, + embedding=FakeEmbeddings(), ) output = await docsearch.asimilarity_search("foo", k=1) @@ -173,7 +177,7 @@ def test_chroma_with_metadatas_with_scores_and_ids() -> None: output = docsearch.similarity_search_with_score("foo", k=1) docsearch.delete_collection() assert output == [ - (Document(page_content="foo", metadata={"page": "0"}, id="id_0"), 0.0) + (Document(page_content="foo", metadata={"page": "0"}, id="id_0"), 0.0), ] @@ -211,11 +215,12 @@ def test_chroma_with_metadatas_with_scores_using_vector() -> None: ) embedded_query = embeddings.embed_query("foo") output = docsearch.similarity_search_by_vector_with_relevance_scores( - embedding=embedded_query, k=1 + embedding=embedded_query, + k=1, ) docsearch.delete_collection() assert output == [ - (Document(page_content="foo", metadata={"page": "0"}, id="id_0"), 0.0) + (Document(page_content="foo", metadata={"page": "0"}, id="id_0"), 0.0), ] @@ -235,10 +240,10 @@ def test_chroma_search_filter() -> None: output2 = docsearch.similarity_search("far", k=1, filter={"first_letter": "b"}) docsearch.delete_collection() assert output1 == [ - Document(page_content="far", metadata={"first_letter": "f"}, id="id_0") + Document(page_content="far", metadata={"first_letter": "f"}, id="id_0"), ] assert output2 == [ - Document(page_content="bar", metadata={"first_letter": "b"}, id="id_1") + Document(page_content="bar", metadata={"first_letter": "b"}, id="id_1"), ] @@ -255,17 +260,21 @@ def test_chroma_search_filter_with_scores() -> None: ids=ids, ) output1 = docsearch.similarity_search_with_score( - "far", k=1, filter={"first_letter": "f"} + "far", + k=1, + filter={"first_letter": "f"}, ) output2 = docsearch.similarity_search_with_score( - "far", k=1, filter={"first_letter": "b"} + "far", + k=1, + filter={"first_letter": "b"}, ) docsearch.delete_collection() assert output1 == [ - (Document(page_content="far", metadata={"first_letter": "f"}, id="id_0"), 0.0) + (Document(page_content="far", metadata={"first_letter": "f"}, id="id_0"), 0.0), ] assert output2 == [ - (Document(page_content="bar", metadata={"first_letter": "b"}, id="id_1"), 1.0) + (Document(page_content="bar", metadata={"first_letter": "b"}, id="id_1"), 1.0), ] @@ -365,7 +374,9 @@ def test_chroma_mmr() -> None: """Test end to end construction and search.""" texts = ["foo", "bar", "baz"] docsearch = Chroma.from_texts( - collection_name="test_collection", texts=texts, embedding=FakeEmbeddings() + collection_name="test_collection", + texts=texts, + embedding=FakeEmbeddings(), ) output = docsearch.max_marginal_relevance_search("foo", k=1) docsearch.delete_collection() @@ -379,7 +390,9 @@ def test_chroma_mmr_by_vector() -> None: texts = ["foo", "bar", "baz"] embeddings = FakeEmbeddings() docsearch = Chroma.from_texts( - collection_name="test_collection", texts=texts, embedding=embeddings + collection_name="test_collection", + texts=texts, + embedding=embeddings, ) embedded_query = embeddings.embed_query("foo") output = docsearch.max_marginal_relevance_search_by_vector(embedded_query, k=1) @@ -393,7 +406,9 @@ def test_chroma_with_include_parameter() -> None: """Test end to end construction and include parameter.""" texts = ["foo", "bar", "baz"] docsearch = Chroma.from_texts( - collection_name="test_collection", texts=texts, embedding=FakeEmbeddings() + collection_name="test_collection", + texts=texts, + embedding=FakeEmbeddings(), ) output1 = docsearch.get(include=["embeddings"]) output2 = docsearch.get() @@ -424,7 +439,7 @@ def test_chroma_update_document() -> None: embedding=embedding, ids=[document_id], ) - old_embedding = docsearch._collection.peek()["embeddings"][ # type: ignore + old_embedding = docsearch._collection.peek()["embeddings"][ # type: ignore[index] docsearch._collection.peek()["ids"].index(document_id) ] @@ -441,7 +456,7 @@ def test_chroma_update_document() -> None: output = docsearch.similarity_search(updated_content, k=1) # Assert that the new embedding is correct - new_embedding = docsearch._collection.peek()["embeddings"][ # type: ignore + new_embedding = docsearch._collection.peek()["embeddings"][ # type: ignore[index] docsearch._collection.peek()["ids"].index(document_id) ] @@ -449,7 +464,7 @@ def test_chroma_update_document() -> None: # Assert that the updated document is returned by the search assert output == [ - Document(page_content=updated_content, metadata={"page": "0"}, id=document_id) + Document(page_content=updated_content, metadata={"page": "0"}, id=document_id), ] assert list(new_embedding) == list(embedding.embed_documents([updated_content])[0]) @@ -470,7 +485,9 @@ def test_chroma_update_document_with_id() -> None: # Create an instance of Document with initial content and metadata original_doc = Document( - page_content=initial_content, metadata={"page": "0"}, id=document_id + page_content=initial_content, + metadata={"page": "0"}, + id=document_id, ) # Initialize a Chroma instance with the original document @@ -479,7 +496,7 @@ def test_chroma_update_document_with_id() -> None: documents=[original_doc], embedding=embedding, ) - old_embedding = docsearch._collection.peek()["embeddings"][ # type: ignore + old_embedding = docsearch._collection.peek()["embeddings"][ # type: ignore[index] docsearch._collection.peek()["ids"].index(document_id) ] @@ -488,7 +505,9 @@ def test_chroma_update_document_with_id() -> None: # Create a new Document instance with the updated content and the same id updated_doc = Document( - page_content=updated_content, metadata={"page": "0"}, id=document_id + page_content=updated_content, + metadata={"page": "0"}, + id=document_id, ) # Update the document in the Chroma instance @@ -498,7 +517,7 @@ def test_chroma_update_document_with_id() -> None: output = docsearch.similarity_search(updated_content, k=1) # Assert that the new embedding is correct - new_embedding = docsearch._collection.peek()["embeddings"][ # type: ignore + new_embedding = docsearch._collection.peek()["embeddings"][ # type: ignore[index] docsearch._collection.peek()["ids"].index(document_id) ] @@ -506,7 +525,7 @@ def test_chroma_update_document_with_id() -> None: # Assert that the updated document is returned by the search assert output == [ - Document(page_content=updated_content, metadata={"page": "0"}, id=document_id) + Document(page_content=updated_content, metadata={"page": "0"}, id=document_id), ] assert list(new_embedding) == list(embedding.embed_documents([updated_content])[0]) @@ -568,7 +587,8 @@ def test_chroma_add_documents_mixed_metadata() -> None: assert actual_ids == ids assert sorted(search, key=lambda d: d.page_content) == sorted( - docs, key=lambda d: d.page_content + docs, + key=lambda d: d.page_content, ) @@ -582,9 +602,7 @@ def is_api_accessible(url: str) -> bool: def batch_support_chroma_version() -> bool: major, minor, patch = chromadb.__version__.split(".") - if int(major) == 0 and int(minor) >= 4 and int(patch) >= 10: - return True - return False + return bool(int(major) == 0 and int(minor) >= 4 and int(patch) >= 10) @pytest.mark.requires("chromadb") @@ -601,9 +619,9 @@ def test_chroma_large_batch() -> None: embedding_function = MyEmbeddingFunction(fak=Fak(size=255)) col = client.get_or_create_collection( "my_collection", - embedding_function=embedding_function, # type: ignore + embedding_function=embedding_function, # type: ignore[arg-type] ) - docs = ["This is a test document"] * (client.get_max_batch_size() + 100) # type: ignore + docs = ["This is a test document"] * (client.get_max_batch_size() + 100) db = Chroma.from_texts( client=client, collection_name=col.name, @@ -629,9 +647,9 @@ def test_chroma_large_batch_update() -> None: embedding_function = MyEmbeddingFunction(fak=Fak(size=255)) col = client.get_or_create_collection( "my_collection", - embedding_function=embedding_function, # type: ignore + embedding_function=embedding_function, # type: ignore[arg-type] ) - docs = ["This is a test document"] * (client.get_max_batch_size() + 100) # type: ignore + docs = ["This is a test document"] * (client.get_max_batch_size() + 100) ids = [str(uuid.uuid4()) for _ in range(len(docs))] db = Chroma.from_texts( client=client, @@ -642,11 +660,12 @@ def test_chroma_large_batch_update() -> None: ) new_docs = [ Document( - page_content="This is a new test document", metadata={"doc_id": f"{i}"} + page_content="This is a new test document", + metadata={"doc_id": f"{i}"}, ) for i in range(len(docs) - 10) ] - new_ids = [_id for _id in ids[: len(new_docs)]] + new_ids = list(ids[: len(new_docs)]) db.update_documents(ids=new_ids, documents=new_docs) db.delete_collection() @@ -658,14 +677,15 @@ def test_chroma_large_batch_update() -> None: reason="API not accessible", ) @pytest.mark.skipif( - batch_support_chroma_version(), reason="ChromaDB version does not support batching" + batch_support_chroma_version(), + reason="ChromaDB version does not support batching", ) def test_chroma_legacy_batching() -> None: client = chromadb.HttpClient() embedding_function = Fak(size=255) col = client.get_or_create_collection( "my_collection", - embedding_function=MyEmbeddingFunction, # type: ignore + embedding_function=MyEmbeddingFunction, # type: ignore[arg-type] ) docs = ["This is a test document"] * 100 db = Chroma.from_texts( @@ -683,7 +703,9 @@ def test_create_collection_if_not_exist_default() -> None: """Tests existing behaviour without the new create_collection_if_not_exists flag.""" texts = ["foo", "bar", "baz"] docsearch = Chroma.from_texts( - collection_name="test_collection", texts=texts, embedding=FakeEmbeddings() + collection_name="test_collection", + texts=texts, + embedding=FakeEmbeddings(), ) assert docsearch._client.get_collection("test_collection") is not None docsearch.delete_collection() @@ -798,7 +820,7 @@ def test_delete_where_clause(client: chromadb.ClientAPI) -> None: [ Document(page_content="foo", metadata={"test": "bar"}), Document(page_content="bar", metadata={"test": "foo"}), - ] + ], ) assert vectorstore._collection.count() == 2 vectorstore.delete(where={"test": "bar"}) diff --git a/libs/partners/chroma/tests/unit_tests/test_standard.py b/libs/partners/chroma/tests/unit_tests/test_standard.py index e933339a453..b2aa15d9a92 100644 --- a/libs/partners/chroma/tests/unit_tests/test_standard.py +++ b/libs/partners/chroma/tests/unit_tests/test_standard.py @@ -9,11 +9,10 @@ from langchain_chroma import Chroma class TestChromaStandard(VectorStoreIntegrationTests): @pytest.fixture() - def vectorstore(self) -> Generator[VectorStore, None, None]: # type: ignore + def vectorstore(self) -> Generator[VectorStore, None, None]: # type: ignore[override] """Get an empty vectorstore for unit tests.""" store = Chroma(embedding_function=self.get_embeddings()) try: yield store finally: store.delete_collection() - pass