fix typo colossalai/auto_parallel autochunk fx/passes etc. (#3808)

2025-07-18 01:12:42 +00:00 · 2023-05-24 09:01:50 +08:00 · 2023-05-24 09:01:50 +08:00 · 7f8203af69
commit 7f8203af69
parent 725365f297
19 changed files with 31 additions and 31 deletions
--- a/.github/workflows/README.md
+++ b/.github/workflows/README.md
@ -14,7 +14,7 @@
      - [Compatibility Test on Dispatch](#compatibility-test-on-dispatch)
    - [Release](#release)
    - [User Friendliness](#user-friendliness)
-    - [Commmunity](#commmunity)
+    - [Community](#community)
  - [Configuration](#configuration)
  - [Progress Log](#progress-log)
@ -97,7 +97,7 @@ This workflow is triggered by manually dispatching the workflow. It has the foll
 | `Synchronize submodule` | `submodule.yml`         | This workflow will check if any git submodule is updated. If so, it will create a PR to update the submodule pointers.                 |
 | `Close inactive issues` | `close_inactive.yml`    | This workflow will close issues which are stale for 14 days.                                                                           |
-### Commmunity
+### Community
 | Workflow Name                                | File name                        | Description                                                                      |
 | -------------------------------------------- | -------------------------------- | -------------------------------------------------------------------------------- |
--- a/colossalai/auto_parallel/passes/meta_info_prop.py
+++ b/colossalai/auto_parallel/passes/meta_info_prop.py
@ -148,7 +148,7 @@ class MetaInfoProp:
        graph_info.fwd_tmp = buffer_tensors
        graph_info.fwd_out = output_tensors
-        # fetch other memory informations
+        # fetch other memory information
        memory_cost = meta_info.memory_cost
        graph_info.fwd_mem_tmp = memory_cost.fwd.temp
        graph_info.fwd_mem_out = memory_cost.fwd.activation
--- a/colossalai/auto_parallel/tensor_shard/node_handler/strategy/batch_norm_generator.py
+++ b/colossalai/auto_parallel/tensor_shard/node_handler/strategy/batch_norm_generator.py
@ -44,7 +44,7 @@ class BatchNormStrategyGenerator(StrategyGenerator):
        '''
        Compute the computation cost per device with this specific strategy.
-        Note: compute_cost need to be devided by TFLOPS, now it just shows the computation size.
+        Note: compute_cost need to be divided by TFLOPS, now it just shows the computation size.
        '''
        # TODO: a constant coefficient need to be added.
        # 1D: (L) * N * Cin
--- a/colossalai/auto_parallel/tensor_shard/node_handler/strategy/conv_strategy_generator.py
+++ b/colossalai/auto_parallel/tensor_shard/node_handler/strategy/conv_strategy_generator.py
@ -38,9 +38,9 @@ class ConvStrategyGenerator(StrategyGenerator):
        '''
        Compute the computation cost per device with this specific strategy.
-        Note: compute_cost need to be devided by TFLOPS, now it just shows the computation size.
+        Note: compute_cost need to be divided by TFLOPS, now it just shows the computation size.
        '''
-        # TODO: compute_cost need to be devided by TFLOPS, now it just shows the computation size.
+        # TODO: compute_cost need to be divided by TFLOPS, now it just shows the computation size.
        # 1D: (L) * N * Cout * Cin * kernel
        # 2D: (H * W) * N * Cout * Cin * kernel
        # 3D: (H * W  * D) * N * Cout * Cin * kernel
--- a/colossalai/auto_parallel/tensor_shard/node_handler/strategy/layer_norm_generator.py
+++ b/colossalai/auto_parallel/tensor_shard/node_handler/strategy/layer_norm_generator.py
@ -34,9 +34,9 @@ class LayerNormGenerator(StrategyGenerator):
        '''
        Compute the computation cost per device with this specific strategy.
-        Note: compute_cost need to be devided by TFLOPS, now it just shows the computation size.
+        Note: compute_cost need to be divided by TFLOPS, now it just shows the computation size.
        '''
-        # TODO: compute_cost need to be devided by TFLOPS, now it just shows the computation size.
+        # TODO: compute_cost need to be divided by TFLOPS, now it just shows the computation size.
        # TODO: a constant coefficient need to be added.
        sharded_input_shape = strategy.sharding_specs[self.op_data['input']].get_sharded_shape_per_device()
--- a/colossalai/auto_parallel/tensor_shard/node_handler/strategy/normal_pooling_generator.py
+++ b/colossalai/auto_parallel/tensor_shard/node_handler/strategy/normal_pooling_generator.py
@ -17,7 +17,7 @@ class NormalPoolStrategyGenerator(StrategyGenerator):
    """
    NormalPoolStrategyGenerator is a generic class to generate strategies for pool operation like MaxPoolxd.
    The reason we call this normal pool is AvgPoolxd and MaxPoolxd are taking the kernel size element from image,
-    and reduce them depening on the operation type.
+    and reduce them depending on the operation type.
    """
    def validate(self) -> bool:
@ -35,9 +35,9 @@ class NormalPoolStrategyGenerator(StrategyGenerator):
        '''
        Compute the computation cost per device with this specific strategy.
-        Note: compute_cost need to be devided by TFLOPS, now it just shows the computation size.
+        Note: compute_cost need to be divided by TFLOPS, now it just shows the computation size.
        '''
-        # TODO: compute_cost need to be devided by TFLOPS, now it just shows the computation size.
+        # TODO: compute_cost need to be divided by TFLOPS, now it just shows the computation size.
        # 1D: (Lout) * N * C * kernel
        # 2D: (H * W) * N * Cout * Cin * kernel
        # 3D: (H * W  * D) * N * Cout * Cin * kernel
--- a/colossalai/autochunk/trace_flow.py
+++ b/colossalai/autochunk/trace_flow.py
@ -366,8 +366,8 @@ class TraceFlow(object):
        # find non chunk inputs
        chunk_info = self._get_non_chunk_inputs(chunk_info, start_idx, end_idx)
-        # reassgin reshape size, some size may have changed due to chunk
+        # reassign reshape size, some size may have changed due to chunk
-        chunk_info = self._reassgin_reshape_size(chunk_info)
+        chunk_info = self._reassign_reshape_size(chunk_info)
        return chunk_info
@ -428,10 +428,10 @@ class TraceFlow(object):
        chunk_info["outputs_dim"].append(output_dim)
        return True
-    def _reassgin_reshape_size(self, chunk_info):
+    def _reassign_reshape_size(self, chunk_info):
        """
        Some shape args in reshape may have changed due to chunk
-        reassgin those changed shape
+        reassign those changed shape
        """
        chunk_region = chunk_info["region"]
        reshape_size = {}
--- a/colossalai/autochunk/trace_indice.py
+++ b/colossalai/autochunk/trace_indice.py
@ -397,7 +397,7 @@ class TraceIndice(object):
        input_node = node.args[0]
        assert len(get_node_shape(input_node)) == 4
-        # assgin index
+        # assign index
        self._assign_indice_as_input(node, node_idx, input_node)
        self._del_dim(node_idx, 1)
        self._add_dim(node_idx, 1)
@ -415,7 +415,7 @@ class TraceIndice(object):
        assert node.kwargs['size'] is None
        assert len(get_node_shape(node)) == 4
-        # assgin index
+        # assign index
        self._assign_indice_as_input(node, node_idx)
        self._mark_computation(node, node_idx, [-1, -2])
--- a/colossalai/booster/plugin/gemini_plugin.py
+++ b/colossalai/booster/plugin/gemini_plugin.py
@ -179,7 +179,7 @@ class GeminiPlugin(DPPluginBase):
            Users can provide this argument to speed up searching.
            If users do not know this argument before training, it is ok. We will use a default value 1024.
        min_chunk_size_mb (float, optional): the minimum chunk size in MegaByte.
-            If the aggregate size of parameters is still samller than the minimum chunk size,
+            If the aggregate size of parameters is still smaller than the minimum chunk size,
            all parameters will be compacted into one small chunk.
        memstats (MemStats, optional) the memory statistics collector by a runtime memory tracer.
        gpu_margin_mem_ratio (float, optional): The ratio of GPU remaining memory (after the first forward-backward)
--- a/colossalai/cluster/dist_coordinator.py
+++ b/colossalai/cluster/dist_coordinator.py
@ -181,7 +181,7 @@ class DistCoordinator(metaclass=SingletonMeta):
        """
        is_master = self.is_master(process_group)
-        # define an inner functiuon
+        # define an inner function
        def decorator(func):
            @functools.wraps(func)
--- a/colossalai/device/alpha_beta_profiler.py
+++ b/colossalai/device/alpha_beta_profiler.py
@ -381,7 +381,7 @@ class AlphaBetaProfiler:
        first_latency, first_bandwidth = _extract_alpha_beta(first_axis, first_axis_process_group)
        second_latency, second_bandwidth = _extract_alpha_beta(second_axis, second_axis_process_group)
        mesh_alpha = [first_latency, second_latency]
-        # The beta values have been enlarged by 1e10 times temporarilly because the computation cost
+        # The beta values have been enlarged by 1e10 times temporarily because the computation cost
        # is still estimated in the unit of TFLOPs instead of time. We will remove this factor in future.
        mesh_beta = [1e10 / first_bandwidth, 1e10 / second_bandwidth]
--- a/colossalai/engine/schedule/_pipeline_schedule.py
+++ b/colossalai/engine/schedule/_pipeline_schedule.py
@ -152,9 +152,9 @@ class PipelineSchedule(BaseSchedule):
            raise TypeError(f"Expected data to be of type torch.Tensor, list, tuple, or dict, but got {type(data)}")
    def load_micro_batch(self):
-        mciro_batch_data = self._get_data_slice(self.batch_data, self.microbatch_offset)
+        micro_batch_data = self._get_data_slice(self.batch_data, self.microbatch_offset)
        self.microbatch_offset += self.microbatch_size
-        return self._move_to_device(mciro_batch_data)
+        return self._move_to_device(micro_batch_data)
    def pre_processing(self, engine):
        from colossalai.zero.legacy import ShardedModelV2
--- a/colossalai/engine/schedule/_pipeline_schedule_v2.py
+++ b/colossalai/engine/schedule/_pipeline_schedule_v2.py
@ -84,7 +84,7 @@ class PipelineScheduleV2(PipelineSchedule):
            'The argument \'return_loss\' has to be True when \'forward_only\' is False, but got False.'
        self.load_batch(data_iter)
-        # num_warmup_microbatches is the step when not all the processers are working
+        # num_warmup_microbatches is the step when not all the processes are working
        num_warmup_microbatches = \
            (gpc.get_world_size(ParallelMode.PIPELINE)
             - gpc.get_local_rank(ParallelMode.PIPELINE) - 1)
--- a/colossalai/fx/codegen/activation_checkpoint_codegen.py
+++ b/colossalai/fx/codegen/activation_checkpoint_codegen.py
@ -523,7 +523,7 @@ def emit_code_with_activation_checkpoint(body, ckpt_func, nodes, emit_node_func,
    # append code text to body
    for idx, node in enumerate(node_list):
        # if this is the first node of the ckpt region
-        # append the ckpt function defition
+        # append the ckpt function definition
        if idx in start_idx:
            label = start_idx.index(idx)
            ckpt_fn_def = _gen_ckpt_fn_def(label, input_vars[label])
--- a/colossalai/fx/passes/adding_split_node_pass.py
+++ b/colossalai/fx/passes/adding_split_node_pass.py
@ -206,7 +206,7 @@ def avgcompute_split_pass(gm: torch.fx.GraphModule, pp_size: int):
 def avgnode_split_pass(gm: torch.fx.GraphModule, pp_size: int):
    """
-    In avgnode_split_pass, simpliy split graph by node number.
+    In avgnode_split_pass, simply split graph by node number.
    """
    mod_graph = gm.graph
    avg_num_node = len(mod_graph.nodes) // pp_size
--- a/colossalai/fx/passes/experimental/adding_shape_consistency_pass.py
+++ b/colossalai/fx/passes/experimental/adding_shape_consistency_pass.py
@ -16,7 +16,7 @@ def apply(*args, **kwargs):
    return shape_consistency_manager.apply(*args, **kwargs)
-def solution_annotatation_pass(gm: torch.fx.GraphModule, solution: List[int], device_mesh):
+def solution_annotation_pass(gm: torch.fx.GraphModule, solution: List[int], device_mesh):
    mod_graph = gm.graph
    nodes = tuple(mod_graph.nodes)
--- a/colossalai/fx/passes/meta_info_prop.py
+++ b/colossalai/fx/passes/meta_info_prop.py
@ -31,7 +31,7 @@ class TensorMetadata(NamedTuple):
    numel: int
    is_tensor: bool
    # TODO: we can add a list of sharding spec here, and record the sharding
-    # behaviour by appending sharding spec into list.
+    # behavior by appending sharding spec into list.
 def _extract_tensor_metadata(result: torch.Tensor) -> TensorMetadata:
--- a/colossalai/fx/passes/passes_for_gpt2_test.py
+++ b/colossalai/fx/passes/passes_for_gpt2_test.py
@ -230,7 +230,7 @@ def split_module_for_gpt2_test(
                    use_partition.partitions_dependent_on.setdefault(def_partition_name)
    node_process_list = list(m.graph.nodes)
-    # split nodes into parititons
+    # split nodes into partitions
    while node_process_list:
        node = node_process_list.pop(0)
        orig_nodes[node.name] = node
@ -277,7 +277,7 @@ def split_module_for_gpt2_test(
    if len(sorted_partitions) != len(partitions):
        raise RuntimeError("cycle exists between partitions!")
-    # add placeholders to parititons
+    # add placeholders to partitions
    for partition_name in sorted_partitions:
        partition = partitions[partition_name]
        for input in partition.inputs:
--- a/colossalai/fx/passes/split_module.py
+++ b/colossalai/fx/passes/split_module.py
@ -29,8 +29,8 @@ class Partition:
            f" nodes: {self.node_names},\n" \
            f" inputs: {self.inputs},\n" \
            f" outputs: {self.outputs},\n" \
-            f" partitions depenent on: {self.partitions_dependent_on},\n" \
+            f" partitions dependent on: {self.partitions_dependent_on},\n" \
-            f" parition dependents: {self.partition_dependents}"
+            f" partition dependents: {self.partition_dependents}"
 # Creates subgraphs out of main graph