[nfc] fix typo colossalai/zero (#3923)

2025-09-12 20:54:35 +00:00 · 2023-06-08 00:01:29 +08:00
parent a9d1cadc49
commit de0d7df33f
6 changed files with 11 additions and 11 deletions
--- a/colossalai/zero/low_level/_utils.py
+++ b/colossalai/zero/low_level/_utils.py
@@ -261,7 +261,7 @@ def sync_param(flat_tensor, tensor_list):
    share the same memory space. This function will update the tensor list so that
    they point to the same value.

-    :param flat_tensor: A flat tensor obtained by calling `torch._utils._unflatten_dense_tensors` on a tensor lsit
+    :param flat_tensor: A flat tensor obtained by calling `torch._utils._unflatten_dense_tensors` on a tensor list
    :param tensor_list: A list of tensors corresponding to the flattened tensor
    :type flat_tensor: torch.Tensor
    :type tensor_list: List[torch.Tensor]
--- a/colossalai/zero/low_level/low_level_optim.py
+++ b/colossalai/zero/low_level/low_level_optim.py
@@ -207,8 +207,8 @@ class LowLevelZeroOptimizer(ColossalaiOptimizer):
            for param in self._working_param_groups[group_id]:
                self._param_store.set_param_reduction_state(param, False)

-        # intialize communication stream for
-        # communication-compuation overlapping
+        # initialize communication stream for
+        # communication-computation overlapping
        if self._overlap_communication:
            self._comm_stream = torch.cuda.Stream()

@@ -269,7 +269,7 @@ class LowLevelZeroOptimizer(ColossalaiOptimizer):
        params_per_rank = [[] for _ in range(self._world_size)]
        numel_per_rank = [0 for _ in range(self._world_size)]

-        # partititon the parameters in a greedy fashion
+        # partition the parameters in a greedy fashion
        sorted_params = sorted(param_list, key=lambda x: x.numel(), reverse=True)
        for param in sorted_params:
            # allocate this parameter to the rank with
@@ -297,7 +297,7 @@ class LowLevelZeroOptimizer(ColossalaiOptimizer):
            param_group = self._working_param_groups[group_id]
            for param in param_group:
                if param.requires_grad:
-                    # determines the reduction destionation rank
+                    # determines the reduction destination rank
                    # this is only valid for stage 2
                    # dst_rank = None means using all-reduce
                    # else using reduce