[doc] Fix typo under colossalai and doc(#3618)

* Fixed several spelling errors under colossalai * Fix the spelling error in colossalai and docs directory * Cautious Changed the spelling error under the example folder * Update runtime_preparation_pass.py revert autograft to autograd * Update search_chunk.py utile to until * Update check_installation.py change misteach to mismatch in line 91 * Update 1D_tensor_parallel.md revert to perceptron * Update 2D_tensor_parallel.md revert to perceptron in line 73 * Update 2p5D_tensor_parallel.md revert to perceptron in line 71 * Update 3D_tensor_parallel.md revert to perceptron in line 80 * Update README.md revert to resnet in line 42 * Update reorder_graph.py revert to indice in line 7 * Update p2p.py revert to megatron in line 94 * Update initialize.py revert to torchrun in line 198 * Update routers.py change to detailed in line 63 * Update routers.py change to detailed in line 146 * Update README.md revert random number in line 402
2025-09-15 22:19:38 +00:00 · 2023-04-26 11:38:43 +08:00
parent e1b0a78afa
commit b9a8dff7e5
72 changed files with 158 additions and 158 deletions
--- a/colossalai/tensor/colo_tensor.py
+++ b/colossalai/tensor/colo_tensor.py
@@ -184,7 +184,7 @@ class ColoTensor(torch.Tensor):
            # we have to capture the `backward` function
            # and make sure that it does not in `torch._C.DisableTorchFunction()` context
            if func is torch.Tensor.backward:
-                assert len(args) == 1    # only has 1 paramter
+                assert len(args) == 1    # only has 1 parameter
                backward_tensor = torch.Tensor(args[0])
                tensor_kwargs = {k: torch.Tensor(v) if torch.is_tensor(v) else v for k, v in kwargs.items()}
                return backward_tensor.backward(**tensor_kwargs)
@@ -228,7 +228,7 @@ class ColoTensor(torch.Tensor):
        2. If the pg is not not None and not equal to the current process group.
        First, convert the tensor as replicated among the TP process group.
        Second, reset the process group to the new pg.
-        Third, conver the tensor (new replicated both among the tp process group) to the new dist_spec.
+        Third, convert the tensor (new replicated both among the tp process group) to the new dist_spec.

        Args:
            dist_spec (_DistSpec): the new dist spec.
@@ -297,7 +297,7 @@ class ColoTensor(torch.Tensor):
    def size_global(self, *args) -> torch.Size:
        """size_global

-        override the torch buildin size()
+        override the torch building size()
        the shape passed in must be in a replicate placement.

        Returns:
--- a/colossalai/tensor/comm_spec.py
+++ b/colossalai/tensor/comm_spec.py
@@ -391,7 +391,7 @@ class CommSpec:
    to determine the buffer shape, and logical_process_axis

    Argument:
-        comm_pattern(CollectiveCommPattern): decribe the communication method used in this spec.
+        comm_pattern(CollectiveCommPattern): describe the communication method used in this spec.
        sharding_spec(ShardingSpec): This is sharding spec of the tensor which will join the communication action.
        gather_dim(int, Optional): The gather_dim of the tensor will be gathered.
        shard_dim(int, Optional): The shard_dim of the tensor will be sharded.
--- a/colossalai/tensor/compute_spec.py
+++ b/colossalai/tensor/compute_spec.py
@@ -10,7 +10,7 @@ class ComputePattern(Enum):

 class ComputeSpec(object):
    """ComputeSpec
-    The Specification for compuattion pattern
+    The Specification for computation pattern

    Args:
        compute_pattern (ComputePattern): an Enum instance for compute pattern.
--- a/colossalai/tensor/d_tensor/layout.py
+++ b/colossalai/tensor/d_tensor/layout.py
@@ -14,7 +14,7 @@ class Layout:
    """Layout of a tensor.

    Attributes:
-        device_mesh: the device mesh to store the tensor distributedly.
+        device_mesh: the device mesh to store the tensor distributed.
        device_type: the type of the device mesh, e.g. 'cpu' or 'cuda'.
        sharding_spec: the sharding specification to describe how the tensor is sharded.
        entire_shape: the entire shape of the global tensor.
--- a/colossalai/tensor/d_tensor/sharding_spec.py
+++ b/colossalai/tensor/d_tensor/sharding_spec.py
@@ -14,7 +14,7 @@ NAN = 'nan'

 class DimSpec:
    '''
-    Sharding spec for single dimension of the sharded tensor decribe the sharding dimension of
+    Sharding spec for single dimension of the sharded tensor describe the sharding dimension of
    logical device mesh and give a method to compute the difference between them.
    This class is used internally in ShardingSpec.

@@ -143,7 +143,7 @@ class ShardingSpec:

    Argument:
        dim_partition_dict(Dict[int, List[int]], optional): The key is the dimension of tensor to be sharded,
-            and the value of the key decribe which logical axis will be sharded in that dimension.
+            and the value of the key describe which logical axis will be sharded in that dimension.
        sharding_sequence(List[DimSpec], optional): A straight view of ShardingSpec looks like [R, R, S0, S1].
    '''

--- a/colossalai/tensor/dist_spec_mgr.py
+++ b/colossalai/tensor/dist_spec_mgr.py
@@ -61,7 +61,7 @@ class DistSpecManager:
        Args:
            tensor (torch.Tensor): a global (replicated) tensor before shard
            dist_spec (_DistSpec): the distributed spec. to be sharded as.
-            pg (ProcessGrouo): the process group of the corresponding colotensor
+            pg (ProcessGroup): the process group of the corresponding colotensor
        Returns:
            torch.Tensor: a torch tensor after sharded.
        """
--- a/colossalai/tensor/distspec.py
+++ b/colossalai/tensor/distspec.py
@@ -15,7 +15,7 @@ class _DistSpec:
    A class indicates Distributed Specification.
    The DistSpec is only works for the tensor parallel process groups.
    Because the dist spec of data parallel process group can be automatically deduced.
-    This is an internal data structrue.
+    This is an internal data structure.
    The API for users should be `ShardSpec` and `ReplicaSpec`.

    Args:
--- a/colossalai/tensor/shape_consistency.py
+++ b/colossalai/tensor/shape_consistency.py
@@ -73,7 +73,7 @@ class ShapeConsistencyManager(metaclass=SingletonMeta):
                                orig_cost_dict: Dict[str, float]) -> Dict[ShardingSpec, float]:
        '''
        Get all valid sharding specs from source_spec with single all-gather operation, and
-        accumulate commucation cost on origin cost which will finally be used in auto sharding solver.
+        accumulate communication cost on origin cost which will finally be used in auto sharding solver.
        For the all-gather operation, we just care about the S dimension.

        Argument:
@@ -145,7 +145,7 @@ class ShapeConsistencyManager(metaclass=SingletonMeta):
                                orig_cost_dict: Dict[str, float]) -> Dict[ShardingSpec, float]:
        '''
        Get all valid sharding specs from source_spec with single all-to-all operation, and
-        accumulate commucation cost on origin cost which will finally be used in auto sharding solver.
+        accumulate communication cost on origin cost which will finally be used in auto sharding solver.
        For the all-to-all operation, we just care about the pairs containing S dimension.

        Argument:
--- a/colossalai/tensor/sharding_spec.py
+++ b/colossalai/tensor/sharding_spec.py
@@ -18,7 +18,7 @@ NAN = 'nan'

 class _DimSpec:
    '''
-    Sharding spec for single dimension of the sharded tensor decribe the sharding dimension of
+    Sharding spec for single dimension of the sharded tensor describe the sharding dimension of
    logical device mesh and give a method to compute the difference between them.
    This class is used internally in ShardingSpec.

--- a/colossalai/tensor/utils.py
+++ b/colossalai/tensor/utils.py
@@ -18,7 +18,7 @@ def all_gather_simulator(target_pair):

    Argument:
        target_pair(Tuple[int, List[int]]): The first element is the dimension of tensor to be sharded,
-        and the second element decribes which logical axis will be sharded in that dimension.
+        and the second element describes which logical axis will be sharded in that dimension.
    '''
    _, shard_list = target_pair
    new_shard_list = shard_list[:-1]
@@ -36,7 +36,7 @@ def all_to_all_simulator(f_target_pair, b_target_pair):
    Therefore, if the behind shard_list is not None, we just extend it to the front shard_list.
    Argument:
        target_pair(Tuple[int, List[int]]): The first element is the dimension of tensor to be sharded,
-        and the second element decribes which logical axis will be sharded in that dimension.
+        and the second element describes which logical axis will be sharded in that dimension.
    e.g.:
        all-to-all(S0, S1) -> [S01, R]
        all-to-all(S0, R) -> [R, S0]
@@ -46,7 +46,7 @@ def all_to_all_simulator(f_target_pair, b_target_pair):

    Argument:
        target_pair(Tuple[int, List[int]]): The first element is the dimension of tensor to be sharded,
-        and the second element decribes which logical axis will be sharded in that dimension.
+        and the second element describes which logical axis will be sharded in that dimension.
    '''
    _, f_shard_list = f_target_pair
    _, b_shard_list = b_target_pair