[misc] update pre-commit and run all files (#4752)

* [misc] update pre-commit * [misc] run pre-commit * [misc] remove useless configuration files * [misc] ignore cuda for clang-format
2025-09-04 02:26:51 +00:00 · 2023-09-19 14:20:26 +08:00
parent 3c6b831c26
commit 079bf3cb26
1268 changed files with 50037 additions and 38444 deletions
--- a/colossalai/utils/init.py
+++ b/colossalai/utils/init.py
@@ -13,20 +13,20 @@ from .tensor_detector import TensorDetector
 from .timer import MultiTimer, Timer

 __all__ = [
-    'conditional_context',
-    'get_current_device',
-    'synchronize',
-    'empty_cache',
-    'set_to_cuda',
-    'Timer',
-    'MultiTimer',
-    'multi_tensor_applier',
-    'TensorDetector',
-    'ensure_path_exists',
-    'disposable',
-    '_cast_float',
-    'free_storage',
-    'set_seed',
-    'is_ddp_ignored',
-    'set_device',
+    "conditional_context",
+    "get_current_device",
+    "synchronize",
+    "empty_cache",
+    "set_to_cuda",
+    "Timer",
+    "MultiTimer",
+    "multi_tensor_applier",
+    "TensorDetector",
+    "ensure_path_exists",
+    "disposable",
+    "_cast_float",
+    "free_storage",
+    "set_seed",
+    "is_ddp_ignored",
+    "set_device",
 ]
--- a/colossalai/utils/common.py
+++ b/colossalai/utils/common.py
@@ -28,7 +28,7 @@ def conditional_context(context_manager, enable=True):


 def is_ddp_ignored(p):
-    return getattr(p, '_ddp_to_ignore', False)
+    return getattr(p, "_ddp_to_ignore", False)


 def disposable(func: Callable) -> Callable:
--- a/colossalai/utils/cuda.py
+++ b/colossalai/utils/cuda.py
@@ -29,9 +29,9 @@ def get_current_device() -> torch.device:
    If cuda available, return gpu, otherwise return cpu.
    """
    if torch.cuda.is_available():
-        return torch.device(f'cuda:{torch.cuda.current_device()}')
+        return torch.device(f"cuda:{torch.cuda.current_device()}")
    else:
-        return torch.device('cpu')
+        return torch.device("cpu")


 def synchronize():
--- a/colossalai/utils/model/utils.py
+++ b/colossalai/utils/model/utils.py
@@ -27,19 +27,18 @@ def call_to_str(base, *args, **kwargs):
    Returns:
        str: A string representation of base(*args, **kwargs)
    """
-    name = f'{base}('
+    name = f"{base}("
    if args:
-        name += ', '.join(repr(arg) for arg in args)
+        name += ", ".join(repr(arg) for arg in args)
        if kwargs:
-            name += ', '
+            name += ", "
    if kwargs:
-        name += ', '.join(f'{key}={repr(arg)}' for key, arg in kwargs.items())
-    name += ')'
+        name += ", ".join(f"{key}={repr(arg)}" for key, arg in kwargs.items())
+    name += ")"
    return name


 class InsertPostInitMethodToModuleSubClasses(object):
-
    def __init__(self, default_dtype: Optional[torch.dtype] = None):
        self._old_default_dtype = None
        self._default_dtype = default_dtype
@@ -53,7 +52,6 @@ class InsertPostInitMethodToModuleSubClasses(object):
            torch.set_default_dtype(self._default_dtype)

        def preprocess_after(f):
-
            @functools.wraps(f)
            def wrapper(module: torch.nn.Module, *args, **kwargs):
                f(module, *args, **kwargs)
@@ -74,7 +72,7 @@ class InsertPostInitMethodToModuleSubClasses(object):
        substitute_init_recursively(torch.nn.modules.module.Module, _enable_class, set())

        # holding on to the current __init__subclass__ for exit
-        torch.nn.modules.module.Module._old_init_subclass = (torch.nn.modules.module.Module.__init_subclass__)
+        torch.nn.modules.module.Module._old_init_subclass = torch.nn.modules.module.Module.__init_subclass__
        # Replace .__init__() for future subclasses of torch.nn.Module
        torch.nn.modules.module.Module.__init_subclass__ = classmethod(_init_subclass)

@@ -82,12 +80,11 @@ class InsertPostInitMethodToModuleSubClasses(object):
        return self

    def __exit__(self, exc_type, exc_value, traceback):
-
        if self._default_dtype is not None:
            torch.set_default_dtype(self._old_default_dtype)

        def _disable_class(cls):
-            if not hasattr(cls, '_old_init'):
+            if not hasattr(cls, "_old_init"):
                raise AttributeError(
                    f"_old_init is not found in the {cls.__name__}, please make sure that you have imported {cls.__name__} before entering the context."
                )
@@ -97,7 +94,7 @@ class InsertPostInitMethodToModuleSubClasses(object):
        substitute_init_recursively(torch.nn.modules.module.Module, _disable_class, set())

        # Replace .__init__() for future subclasses of torch.nn.Module
-        torch.nn.modules.module.Module.__init_subclass__ = (torch.nn.modules.module.Module._old_init_subclass)
+        torch.nn.modules.module.Module.__init_subclass__ = torch.nn.modules.module.Module._old_init_subclass

        self._post_context_exec()
        # Now that we cleaned up the metaclass injection, raise the exception.
--- a/colossalai/utils/moe.py
+++ b/colossalai/utils/moe.py
@@ -19,8 +19,8 @@ def get_moe_epsize_param_dict(model: nn.Module) -> Dict[int, List[nn.Parameter]]
    """
    epsize_param_dict = dict()
    for param in model.parameters():
-        if not hasattr(param, 'moe_info'):
-            ep_size = 1    # set ep_size to 1 for dp parameters
+        if not hasattr(param, "moe_info"):
+            ep_size = 1  # set ep_size to 1 for dp parameters
        else:
            ep_size = param.moe_info.ep_size
        if ep_size not in epsize_param_dict:
@@ -37,7 +37,6 @@ def sync_moe_model_param(model: nn.Module):
        model (:class:`torch.nn.Module`): A pyTorch model on whose parameters you check the consistency.
    """
    if is_using_ddp():
-
        param_dict = get_moe_epsize_param_dict(model)

        # synchronize the parameters whose dp_group is the whole world
--- a/colossalai/utils/multi_tensor_apply/multi_tensor_apply.py
+++ b/colossalai/utils/multi_tensor_apply/multi_tensor_apply.py
@@ -25,7 +25,9 @@ class MultiTensorApply(object):
            raise RuntimeError(
                "Attempted to call MultiTensorApply method, but MultiTensorApply "
                "is not available, possibly because Apex was installed without "
-                "--cpp_ext --cuda_ext.  Original import error message:", MultiTensorApply.import_err)
+                "--cpp_ext --cuda_ext.  Original import error message:",
+                MultiTensorApply.import_err,
+            )

    def __call__(self, op, noop_flag_buffer, tensor_lists, *args):
        self.check_avail()
--- a/colossalai/utils/rank_recorder/README.md
+++ b/colossalai/utils/rank_recorder/README.md
@@ -1,7 +1,7 @@
 # Rank Recorder
 This is a useful tool to get the records of certain functions in each rank. The records of each rank will dump into a json file after the end of multiple process program. You can parse and visualize the json file easily.

-Before using the tool, you should ensure dist.is_initialized() return true before exit of program. 
+Before using the tool, you should ensure dist.is_initialized() return true before exit of program.

 ## Usage

@@ -58,10 +58,10 @@ def worker(rank):

    with recorder("calc_1(x100)", rank) as r:
        calc(100, 100)
-    
+
    with recorder("calc_2(x400)", rank) as r:
        calc(400, 400)
-    
+
    with recorder("calc_2(x200)", rank) as r:
        calc(200, 200)

@@ -69,4 +69,4 @@ if __name__ == "__main__":
    mp.spawn(worker, nprocs=WORLD_SIZE)
 ```

-run the script directly and you will get `kernel_select.json` and `kernel_select.png` in your current folder.
+run the script directly and you will get `kernel_select.json` and `kernel_select.png` in your current folder.
--- a/colossalai/utils/rank_recorder/init.py
+++ b/colossalai/utils/rank_recorder/init.py
@@ -1,3 +1,3 @@
 from colossalai.utils.rank_recorder.rank_recorder import recorder

-__all__ = ["recorder"]
+__all__ = ["recorder"]
--- a/colossalai/utils/rank_recorder/rank_recorder.py
+++ b/colossalai/utils/rank_recorder/rank_recorder.py
@@ -1,18 +1,15 @@
-import time
-from typing import List, Dict
+import atexit
 import json
 import os
-import time
 import shutil
-import atexit
+import time
+from typing import Dict, List

+import matplotlib.colors as mcolors
+import matplotlib.pyplot as plt
 import torch
 import torch.distributed as dist

-import json
-import matplotlib.pyplot as plt
-import matplotlib.colors as mcolors
-
 cmap = list(mcolors.TABLEAU_COLORS.values())

 LOG_FOLDER = "record.log"
@@ -20,7 +17,6 @@ MAX_WAIT_TIME = 20


 class Event:
-
    def __init__(self, start: int, end: int, name: str, rank: int) -> None:
        self.start = start
        self.end = end
@@ -29,16 +25,15 @@ class Event:


 class Recorder:
-
    def __init__(self) -> None:
        self.rank_to_history: Dict[int, List[Event]] = {}
        self.base_time = time.time()
        self.temp_event = None

-        self.export_format = 'png'
-        self.export_name = 'test'
+        self.export_format = "png"
+        self.export_name = "test"
        self.dpi = 500
-        self.theme = 'dark_background'
+        self.theme = "dark_background"
        self.figure_width = 30
        self.figure_height = 10
        self.legend_fontsize = 16
@@ -84,18 +79,18 @@ class Recorder:
    def dump_record(self):
        rank = dist.get_rank()
        rank_to_history = self.rank_to_history
-        records = {'base_time': self.base_time, 'content': {}}
+        records = {"base_time": self.base_time, "content": {}}
        for record_rank in rank_to_history:
            history = rank_to_history[record_rank]
            recs = []
            for event in history:
-                rec = {'start': event.start, 'end': event.end, 'name': event.name}
+                rec = {"start": event.start, "end": event.end, "name": event.name}
                recs.append(rec)
-            records['content'][record_rank] = recs
+            records["content"][record_rank] = recs

-        dump_name = f'{rank}.json'
+        dump_name = f"{rank}.json"
        dump_path = os.path.join(LOG_FOLDER, dump_name)
-        with open(dump_path, 'w', encoding='utf-8') as f:
+        with open(dump_path, "w", encoding="utf-8") as f:
            json.dump(records, f, ensure_ascii=False)

    def merge_recode(self):
@@ -117,24 +112,22 @@ class Recorder:
        logs_path = [os.path.join(LOG_FOLDER, file) for file in os.listdir(LOG_FOLDER)]
        recoders = {}
        for path in logs_path:
-            with open(path, 'r', encoding='utf-8') as f:
+            with open(path, "r", encoding="utf-8") as f:
                recs = json.load(f)
-            for record_rank in recs['content']:
-                history = recs['content'][record_rank]
+            for record_rank in recs["content"]:
+                history = recs["content"][record_rank]
                recoders[record_rank] = []
                for rec in history:
-                    recoders[record_rank].append({
-                        'start': rec['start'] - base_time,
-                        'end': rec['end'] - base_time,
-                        'name': rec['name']
-                    })
+                    recoders[record_rank].append(
+                        {"start": rec["start"] - base_time, "end": rec["end"] - base_time, "name": rec["name"]}
+                    )

        shutil.rmtree(LOG_FOLDER)
-        with open(self.export_name + '.json', 'w', encoding='utf-8') as f:
+        with open(self.export_name + ".json", "w", encoding="utf-8") as f:
            json.dump(recoders, f, ensure_ascii=False)

    def visualize_record(self):
-        with open(self.export_name + '.json', 'r', encoding='utf-8') as f:
+        with open(self.export_name + ".json", "r", encoding="utf-8") as f:
            records = json.load(f)
        records = dict(records)
        ranks = list(sorted(records.keys()))
@@ -147,9 +140,9 @@ class Recorder:
        for rank in ranks:
            rank_records = records[rank]
            for rec in rank_records:
-                s = rec['start']
-                e = rec['end']
-                name = rec['name']
+                s = rec["start"]
+                e = rec["end"]
+                name = rec["name"]
                if name not in name_list:
                    name_list[name] = len(name_list)
                bar = plt.barh(rank, width=e - s, height=self.bar_height, left=s, color=cmap[name_list[name]])
@@ -157,8 +150,8 @@ class Recorder:
                    plots[name] = bar

        plt.legend(list(plots.values()), list(plots.keys()), loc="upper left", fontsize=self.legend_fontsize)
-        plt.yticks(ticks=ranks, labels=[f'Device:{rank}' for rank in ranks], fontsize=self.device_fontsize)
-        plt.grid(axis='x')
+        plt.yticks(ticks=ranks, labels=[f"Device:{rank}" for rank in ranks], fontsize=self.device_fontsize)
+        plt.grid(axis="x")
        plt.savefig("{}.{}".format(self.export_name, self.export_format))

    def exit_worker(self):
--- a/colossalai/utils/tensor_detector/init.py
+++ b/colossalai/utils/tensor_detector/init.py
@@ -1 +1 @@
-from .tensor_detector import TensorDetector
+from .tensor_detector import TensorDetector
--- a/colossalai/utils/tensor_detector/readme.md
+++ b/colossalai/utils/tensor_detector/readme.md
@@ -14,7 +14,7 @@ class MLP(nn.Module):
        super().__init__()
        self.mlp = nn.Sequential(nn.Linear(64, 8),
                                 nn.ReLU(),
-                                 nn.Linear(8, 32))   
+                                 nn.Linear(8, 32))
    def forward(self, x):
        return self.mlp(x)
 ```
@@ -125,4 +125,3 @@ Total GPU Memory Allocated on cuda:0 is 14.0 KB

 This tool was inspired by https://github.com/Stonesjtu/pytorch_memlab/blob/master/pytorch_memlab/mem_reporter.py
 and https://github.com/Oldpan/Pytorch-Memory-Utils
-
--- a/colossalai/utils/tensor_detector/tensor_detector.py
+++ b/colossalai/utils/tensor_detector/tensor_detector.py
@@ -1,21 +1,19 @@
 import gc
 import inspect
+from collections import defaultdict
+from typing import Optional
+
 import torch
 import torch.nn as nn
-from typing import Optional
-from collections import defaultdict

 LINE_WIDTH = 108
-LINE = '-' * LINE_WIDTH + '\n'
+LINE = "-" * LINE_WIDTH + "\n"


-class TensorDetector():
-
-    def __init__(self,
-                 show_info: bool = True,
-                 log: str = None,
-                 include_cpu: bool = False,
-                 module: Optional[nn.Module] = None):
+class TensorDetector:
+    def __init__(
+        self, show_info: bool = True, log: str = None, include_cpu: bool = False, module: Optional[nn.Module] = None
+    ):
        """This class is a detector to detect tensor on different devices.

        Args:
@@ -57,40 +55,39 @@ class TensorDetector():
    def mem_format(self, real_memory_size):
        # format the tensor memory into a reasonable magnitude
        if real_memory_size >= 2**30:
-            return str(real_memory_size / (2**30)) + ' GB'
+            return str(real_memory_size / (2**30)) + " GB"
        if real_memory_size >= 2**20:
-            return str(real_memory_size / (2**20)) + ' MB'
+            return str(real_memory_size / (2**20)) + " MB"
        if real_memory_size >= 2**10:
-            return str(real_memory_size / (2**10)) + ' KB'
-        return str(real_memory_size) + ' B'
+            return str(real_memory_size / (2**10)) + " KB"
+        return str(real_memory_size) + " B"

    def collect_tensors_state(self):
        for obj in gc.get_objects():
            if torch.is_tensor(obj):
                # skip cpu tensor when include_cpu is false and the tensor we have collected before
-                if (not self.include_cpu) and obj.device == torch.device('cpu'):
+                if (not self.include_cpu) and obj.device == torch.device("cpu"):
                    continue
                self.detected.append(id(obj))
                # skip parameters we had added in __init__ when module is an instance of nn.Module for the first epoch
                if id(obj) not in self.tensor_info:
-
                    name = type(obj).__name__
                    # after backward, we want to update the records, to show you the change
-                    if isinstance(self.module, nn.Module) and name == 'Parameter':
+                    if isinstance(self.module, nn.Module) and name == "Parameter":
                        if obj.grad is not None:
                            # with grad attached
                            for par_name, param in self.module.named_parameters():
                                if param.requires_grad and param.grad.equal(obj.grad):
-                                    name = par_name + ' (with grad)'
+                                    name = par_name + " (with grad)"
                        else:
                            # with no grad attached
                            # there will be no new parameters created during running
                            # so it must be in saved_tensor_info
                            continue
                    # we can also marked common tensors as tensor(with grad)
-                    if name == 'Tensor' and (obj.is_leaf or obj.retains_grad):
+                    if name == "Tensor" and (obj.is_leaf or obj.retains_grad):
                        if obj.grad is not None:
-                            name = name + ' (with grad)'
+                            name = name + " (with grad)"
                    # in fact, common tensor have no grad
                    # unless you set retain_grad()
                    if id(obj) in self.saved_tensor_info.keys() and name == self.saved_tensor_info[id(obj)][0]:
@@ -111,10 +108,10 @@ class TensorDetector():
                    self.devices.append(obj.device)

    def print_tensors_state(self):
-        template_format = '{:3s}{:<30s}{:>10s}{:>20s}{:>10s}{:>20s}{:>15s}'
+        template_format = "{:3s}{:<30s}{:>10s}{:>20s}{:>10s}{:>20s}{:>15s}"
        self.info += LINE
-        self.info += template_format.format('  ', 'Tensor', 'device', 'shape', 'grad', 'dtype', 'Mem')
-        self.info += '\n'
+        self.info += template_format.format("  ", "Tensor", "device", "shape", "grad", "dtype", "Mem")
+        self.info += "\n"
        self.info += LINE

        # if a tensor updates this turn, and was recorded before
@@ -124,24 +121,30 @@ class TensorDetector():
        minus = outdated + minus
        if len(self.order) > 0:
            for tensor_id in self.order:
-                self.info += template_format.format('+', str(self.tensor_info[tensor_id][0]),
-                                                    str(self.tensor_info[tensor_id][1]),
-                                                    str(tuple(self.tensor_info[tensor_id][2])),
-                                                    str(self.tensor_info[tensor_id][3]),
-                                                    str(self.tensor_info[tensor_id][4]),
-                                                    str(self.tensor_info[tensor_id][5]))
-                self.info += '\n'
+                self.info += template_format.format(
+                    "+",
+                    str(self.tensor_info[tensor_id][0]),
+                    str(self.tensor_info[tensor_id][1]),
+                    str(tuple(self.tensor_info[tensor_id][2])),
+                    str(self.tensor_info[tensor_id][3]),
+                    str(self.tensor_info[tensor_id][4]),
+                    str(self.tensor_info[tensor_id][5]),
+                )
+                self.info += "\n"
        if len(self.order) > 0 and len(minus) > 0:
-            self.info += '\n'
+            self.info += "\n"
        if len(minus) > 0:
            for tensor_id in minus:
-                self.info += template_format.format('-', str(self.saved_tensor_info[tensor_id][0]),
-                                                    str(self.saved_tensor_info[tensor_id][1]),
-                                                    str(tuple(self.saved_tensor_info[tensor_id][2])),
-                                                    str(self.saved_tensor_info[tensor_id][3]),
-                                                    str(self.saved_tensor_info[tensor_id][4]),
-                                                    str(self.saved_tensor_info[tensor_id][5]))
-                self.info += '\n'
+                self.info += template_format.format(
+                    "-",
+                    str(self.saved_tensor_info[tensor_id][0]),
+                    str(self.saved_tensor_info[tensor_id][1]),
+                    str(tuple(self.saved_tensor_info[tensor_id][2])),
+                    str(self.saved_tensor_info[tensor_id][3]),
+                    str(self.saved_tensor_info[tensor_id][4]),
+                    str(self.saved_tensor_info[tensor_id][5]),
+                )
+                self.info += "\n"
                # deleted the updated tensor
                self.saved_tensor_info.pop(tensor_id)

@@ -152,16 +155,16 @@ class TensorDetector():
        self.info += LINE
        self.info += f"Detect Location: {locate_msg}\n"
        for device in self.devices:
-            if device == torch.device('cpu'):
+            if device == torch.device("cpu"):
                continue
            gpu_mem_alloc = self.mem_format(torch.cuda.memory_allocated(device))
            self.info += f"Total GPU Memory Allocated on {device} is {gpu_mem_alloc}\n"
        self.info += LINE
-        self.info += '\n\n'
+        self.info += "\n\n"
        if self.show_info:
            print(self.info)
        if self.log is not None:
-            with open(self.log + '.log', 'a') as f:
+            with open(self.log + ".log", "a") as f:
                f.write(self.info)

    def detect(self, include_cpu=False):
--- a/colossalai/utils/timer.py
+++ b/colossalai/utils/timer.py
@@ -2,12 +2,12 @@
 # -*- encoding: utf-8 -*-
 import time
 from typing import Tuple
+
 from .cuda import synchronize


 class Timer:
-    """A timer object which helps to log the execution times, and provides different tools to assess the times.
-    """
+    """A timer object which helps to log the execution times, and provides different tools to assess the times."""

    def __init__(self):
        self._started = False
@@ -25,16 +25,14 @@ class Timer:
        return time.time()

    def start(self):
-        """Firstly synchronize cuda, reset the clock and then start the timer.
-        """
+        """Firstly synchronize cuda, reset the clock and then start the timer."""
        self._elapsed = 0
        synchronize()
        self._start_time = time.time()
        self._started = True

    def lap(self):
-        """lap time and return elapsed time
-        """
+        """lap time and return elapsed time"""
        return self.current_time - self._start_time

    def stop(self, keep_in_history: bool = False):
@@ -80,12 +78,11 @@ class Timer:
        Note:
            Use it only when timer is not in progress
        """
-        assert not self._started, 'Timer is still in progress'
+        assert not self._started, "Timer is still in progress"
        return self._elapsed

    def reset(self):
-        """Clear up the timer and its history
-        """
+        """Clear up the timer and its history"""
        self._history = []
        self._started = False
        self._elapsed = 0