fix typo with colossalai/trainer utils zero (#3908)

2025-09-13 21:22:49 +00:00 · 2023-06-07 16:08:37 +08:00
parent b306cecf28
commit a9d1cadc49
15 changed files with 28 additions and 28 deletions
--- a/colossalai/zero/gemini/chunk/chunk.py
+++ b/colossalai/zero/gemini/chunk/chunk.py
@@ -416,7 +416,7 @@ class Chunk:
        Copy data slice to the memory space indexed by the input tensor in the chunk.

        Args:
-            tensor (torch.Tensor): the tensor used to retrive meta information
+            tensor (torch.Tensor): the tensor used to retrieve meta information
            data_slice (torch.Tensor): the tensor to be copied to the chunk
        """
        # sanity check
--- a/colossalai/zero/gemini/chunk/manager.py
+++ b/colossalai/zero/gemini/chunk/manager.py
@@ -157,7 +157,7 @@ class ChunkManager:
        Copy data to the chunk.

        Args:
-            tensor (torch.Tensor): the tensor used to retrive meta information
+            tensor (torch.Tensor): the tensor used to retrieve meta information
            data (torch.Tensor): the tensor to be copied to the chunk
        """
        chunk = self.tensor_chunk_map[tensor]
--- a/colossalai/zero/gemini/memory_tracer/chunk_memstats_collector.py
+++ b/colossalai/zero/gemini/memory_tracer/chunk_memstats_collector.py
@@ -25,7 +25,7 @@ class ChunkMemStatsCollector(MemStatsCollector):
    # override
    def record_model_data_volume(self) -> None:
        """
-        record model data volumn on cuda and cpu.
+        record model data volume on cuda and cpu.
        """
        if self._start_flag and not self.use_outside_memstats:
            cuda_mem = self._chunk_manager.total_mem['cuda']
--- a/colossalai/zero/gemini/memory_tracer/memory_monitor.py
+++ b/colossalai/zero/gemini/memory_tracer/memory_monitor.py
@@ -45,7 +45,7 @@ class MemoryMonitor:

 class AsyncMemoryMonitor(MemoryMonitor):
    """
-    An Async Memory Monitor runing during computing. Sampling memory usage of the current GPU
+    An Async Memory Monitor running during computing. Sampling memory usage of the current GPU
    at interval of `1/(10**power)` sec.

    The idea comes from Runtime Memory Tracer of PatrickStar
@@ -67,7 +67,7 @@ class AsyncMemoryMonitor(MemoryMonitor):
        async_mem_monitor.save('log.pkl')

    Args:
-        power (int, optional): the power of time interva. Defaults to 10.
+        power (int, optional): the power of time interval. Defaults to 10.

    .. _PatrickStar: Parallel Training of Pre-trained Models via Chunk-based Memory Management:
        https://arxiv.org/abs/2108.05818
--- a/colossalai/zero/gemini/utils.py
+++ b/colossalai/zero/gemini/utils.py
@@ -73,7 +73,7 @@ def get_static_torch_model(zero_ddp_model,
        zero_ddp_model (ZeroDDP): a zero ddp model
        device (torch.device): the device of the final torch model
        dtype (torch.dtype): the dtype of the final torch model
-        only_rank_0 (bool): if True, only rank0 has the coverted torch model
+        only_rank_0 (bool): if True, only rank0 has the converted torch model

    Returns:
        torch.nn.Module: a static torch model used for saving checkpoints or numeric checks
--- a/colossalai/zero/legacy/gemini/ophooks/utils.py
+++ b/colossalai/zero/legacy/gemini/ophooks/utils.py
@@ -88,7 +88,7 @@ def register_ophooks_recursively(module: torch.nn.Module,
                                 ophook_list: List[BaseOpHook],
                                 name: str = "",
                                 filter_fn: Optional[Callable] = None):
-    r"""Recursilvely register pre/post hooks for all submodules in the module in FWD and BWD."""
+    r"""Recursively register pre/post hooks for all submodules in the module in FWD and BWD."""
    assert isinstance(module, torch.nn.Module)
    assert isinstance(ophook_list, (list, tuple))
    assert len(ophook_list) > 0, 'expected at least 1 hook in the argument ophook_list but found 0'
@@ -103,7 +103,7 @@ def register_ophooks_recursively(module: torch.nn.Module,
    if len(list(module.parameters(recurse=False))) == 0:
        return

-    # return from flitered module
+    # return from filtered module
    if filter_fn is not None and filter_fn(module):
        return

--- a/colossalai/zero/legacy/gemini/tensor_utils.py
+++ b/colossalai/zero/legacy/gemini/tensor_utils.py
@@ -77,7 +77,7 @@ def colo_model_data_tensor_move_inline(t: Union[StatefulTensor, torch.Tensor], t
    move a tensor to the target_device
    Args:
        t (Union[StatefulTensor, torch.Tensor]): the tensor be moved
-        target_device: a traget device, if type is int, it the index of cuda card.
+        target_device: a target device, if type is int, it the index of cuda card.
    """
    if not isinstance(target_device, torch.device):
        target_device = torch.device(f'cuda:{target_device}')