[NFC]fix typo colossalai/auto_parallel nn utils etc. (#3779)

* fix typo colossalai/autochunk auto_parallel amp * fix typo colossalai/auto_parallel nn utils etc.
2025-09-16 14:41:53 +00:00 · 2023-05-23 15:28:20 +08:00
parent e871e342b3
commit 9265f2d4d7
16 changed files with 46 additions and 46 deletions
--- a/colossalai/utils/tensor_detector/readme.md
+++ b/colossalai/utils/tensor_detector/readme.md
@@ -46,7 +46,7 @@ detector.detect()

 I have made some comments on the right of the output for your understanding.

-Note that the total `Mem` of all the tensors and parameters is not equal to `Total GPU Memery Allocated`.  PyTorch's memory management is really complicated, and for models of a large scale, it's impossible to figure out clearly.
+Note that the total `Mem` of all the tensors and parameters is not equal to `Total GPU Memory Allocated`.  PyTorch's memory management is really complicated, and for models of a large scale, it's impossible to figure out clearly.

 **The order of print is not equal to the order the tensor creates, but they are really close.**

@@ -61,7 +61,7 @@ Note that the total `Mem` of all the tensors and parameters is not equal to `Tot
 +  mlp.2.bias                        cuda:0               (32,)      True       torch.float32          128 B
 ------------------------------------------------------------------------------------------------------------
 Detect Location: "test_tensor_detector.py" line 27
-Totle GPU Memery Allocated on cuda:0 is 4.5 KB
+Total GPU Memory Allocated on cuda:0 is 4.5 KB
 ------------------------------------------------------------------------------------------------------------


@@ -72,7 +72,7 @@ Totle GPU Memery Allocated on cuda:0 is 4.5 KB
 +  Tensor                            cuda:0               (32,)      True       torch.float32          128 B    # output
 ------------------------------------------------------------------------------------------------------------
 Detect Location: "test_tensor_detector.py" line 30
-Totle GPU Memery Allocated on cuda:0 is 5.5 KB
+Total GPU Memory Allocated on cuda:0 is 5.5 KB
 ------------------------------------------------------------------------------------------------------------


@@ -82,7 +82,7 @@ Totle GPU Memery Allocated on cuda:0 is 5.5 KB
 +  Tensor                            cuda:0                  ()      True       torch.float32            4 B    # loss
 ------------------------------------------------------------------------------------------------------------
 Detect Location: "test_tensor_detector.py" line 32
-Totle GPU Memery Allocated on cuda:0 is 6.0 KB
+Total GPU Memory Allocated on cuda:0 is 6.0 KB
 ------------------------------------------------------------------------------------------------------------


@@ -103,7 +103,7 @@ Totle GPU Memery Allocated on cuda:0 is 6.0 KB
 -  Tensor                            cuda:0                (8,)      True       torch.float32           32 B    # deleted activation
 ------------------------------------------------------------------------------------------------------------
 Detect Location: "test_tensor_detector.py" line 34
-Totle GPU Memery Allocated on cuda:0 is 10.0 KB
+Total GPU Memory Allocated on cuda:0 is 10.0 KB
 ------------------------------------------------------------------------------------------------------------


@@ -117,7 +117,7 @@ Totle GPU Memery Allocated on cuda:0 is 10.0 KB
 +  Tensor                            cuda:0               (32,)     False       torch.float32          128 B
 ------------------------------------------------------------------------------------------------------------
 Detect Location: "test_tensor_detector.py" line 36
-Totle GPU Memery Allocated on cuda:0 is 14.0 KB
+Total GPU Memory Allocated on cuda:0 is 14.0 KB
 ------------------------------------------------------------------------------------------------------------
 ```

--- a/colossalai/utils/tensor_detector/tensor_detector.py
+++ b/colossalai/utils/tensor_detector/tensor_detector.py
@@ -55,7 +55,7 @@ class TensorDetector():
        return self.mem_format(memory_size)

    def mem_format(self, real_memory_size):
-        # format the tensor memory into a reasonal magnitude
+        # format the tensor memory into a reasonable magnitude
        if real_memory_size >= 2**30:
            return str(real_memory_size / (2**30)) + ' GB'
        if real_memory_size >= 2**20:
@@ -71,7 +71,7 @@ class TensorDetector():
                if (not self.include_cpu) and obj.device == torch.device('cpu'):
                    continue
                self.detected.append(id(obj))
-                # skip paramters we had added in __init__ when module is an instance of nn.Module for the first epoch
+                # skip parameters we had added in __init__ when module is an instance of nn.Module for the first epoch
                if id(obj) not in self.tensor_info:

                    name = type(obj).__name__
@@ -84,7 +84,7 @@ class TensorDetector():
                                    name = par_name + ' (with grad)'
                        else:
                            # with no grad attached
-                            # there will be no new paramters created during running
+                            # there will be no new parameters created during running
                            # so it must be in saved_tensor_info
                            continue
                    # we can also marked common tensors as tensor(with grad)
@@ -155,7 +155,7 @@ class TensorDetector():
            if device == torch.device('cpu'):
                continue
            gpu_mem_alloc = self.mem_format(torch.cuda.memory_allocated(device))
-            self.info += f"Totle GPU Memery Allocated on {device} is {gpu_mem_alloc}\n"
+            self.info += f"Total GPU Memory Allocated on {device} is {gpu_mem_alloc}\n"
        self.info += LINE
        self.info += '\n\n'
        if self.show_info: