[misc] update pre-commit and run all files (#4752)

* [misc] update pre-commit

* [misc] run pre-commit

* [misc] remove useless configuration files

* [misc] ignore cuda for clang-format
This commit is contained in:
Hongxin Liu
2023-09-19 14:20:26 +08:00
committed by GitHub
parent 3c6b831c26
commit 079bf3cb26
1268 changed files with 50037 additions and 38444 deletions

View File

@@ -13,20 +13,20 @@ from .tensor_detector import TensorDetector
from .timer import MultiTimer, Timer
__all__ = [
'conditional_context',
'get_current_device',
'synchronize',
'empty_cache',
'set_to_cuda',
'Timer',
'MultiTimer',
'multi_tensor_applier',
'TensorDetector',
'ensure_path_exists',
'disposable',
'_cast_float',
'free_storage',
'set_seed',
'is_ddp_ignored',
'set_device',
"conditional_context",
"get_current_device",
"synchronize",
"empty_cache",
"set_to_cuda",
"Timer",
"MultiTimer",
"multi_tensor_applier",
"TensorDetector",
"ensure_path_exists",
"disposable",
"_cast_float",
"free_storage",
"set_seed",
"is_ddp_ignored",
"set_device",
]

View File

@@ -28,7 +28,7 @@ def conditional_context(context_manager, enable=True):
def is_ddp_ignored(p):
return getattr(p, '_ddp_to_ignore', False)
return getattr(p, "_ddp_to_ignore", False)
def disposable(func: Callable) -> Callable:

View File

@@ -29,9 +29,9 @@ def get_current_device() -> torch.device:
If cuda available, return gpu, otherwise return cpu.
"""
if torch.cuda.is_available():
return torch.device(f'cuda:{torch.cuda.current_device()}')
return torch.device(f"cuda:{torch.cuda.current_device()}")
else:
return torch.device('cpu')
return torch.device("cpu")
def synchronize():

View File

@@ -27,19 +27,18 @@ def call_to_str(base, *args, **kwargs):
Returns:
str: A string representation of base(*args, **kwargs)
"""
name = f'{base}('
name = f"{base}("
if args:
name += ', '.join(repr(arg) for arg in args)
name += ", ".join(repr(arg) for arg in args)
if kwargs:
name += ', '
name += ", "
if kwargs:
name += ', '.join(f'{key}={repr(arg)}' for key, arg in kwargs.items())
name += ')'
name += ", ".join(f"{key}={repr(arg)}" for key, arg in kwargs.items())
name += ")"
return name
class InsertPostInitMethodToModuleSubClasses(object):
def __init__(self, default_dtype: Optional[torch.dtype] = None):
self._old_default_dtype = None
self._default_dtype = default_dtype
@@ -53,7 +52,6 @@ class InsertPostInitMethodToModuleSubClasses(object):
torch.set_default_dtype(self._default_dtype)
def preprocess_after(f):
@functools.wraps(f)
def wrapper(module: torch.nn.Module, *args, **kwargs):
f(module, *args, **kwargs)
@@ -74,7 +72,7 @@ class InsertPostInitMethodToModuleSubClasses(object):
substitute_init_recursively(torch.nn.modules.module.Module, _enable_class, set())
# holding on to the current __init__subclass__ for exit
torch.nn.modules.module.Module._old_init_subclass = (torch.nn.modules.module.Module.__init_subclass__)
torch.nn.modules.module.Module._old_init_subclass = torch.nn.modules.module.Module.__init_subclass__
# Replace .__init__() for future subclasses of torch.nn.Module
torch.nn.modules.module.Module.__init_subclass__ = classmethod(_init_subclass)
@@ -82,12 +80,11 @@ class InsertPostInitMethodToModuleSubClasses(object):
return self
def __exit__(self, exc_type, exc_value, traceback):
if self._default_dtype is not None:
torch.set_default_dtype(self._old_default_dtype)
def _disable_class(cls):
if not hasattr(cls, '_old_init'):
if not hasattr(cls, "_old_init"):
raise AttributeError(
f"_old_init is not found in the {cls.__name__}, please make sure that you have imported {cls.__name__} before entering the context."
)
@@ -97,7 +94,7 @@ class InsertPostInitMethodToModuleSubClasses(object):
substitute_init_recursively(torch.nn.modules.module.Module, _disable_class, set())
# Replace .__init__() for future subclasses of torch.nn.Module
torch.nn.modules.module.Module.__init_subclass__ = (torch.nn.modules.module.Module._old_init_subclass)
torch.nn.modules.module.Module.__init_subclass__ = torch.nn.modules.module.Module._old_init_subclass
self._post_context_exec()
# Now that we cleaned up the metaclass injection, raise the exception.

View File

@@ -19,8 +19,8 @@ def get_moe_epsize_param_dict(model: nn.Module) -> Dict[int, List[nn.Parameter]]
"""
epsize_param_dict = dict()
for param in model.parameters():
if not hasattr(param, 'moe_info'):
ep_size = 1 # set ep_size to 1 for dp parameters
if not hasattr(param, "moe_info"):
ep_size = 1 # set ep_size to 1 for dp parameters
else:
ep_size = param.moe_info.ep_size
if ep_size not in epsize_param_dict:
@@ -37,7 +37,6 @@ def sync_moe_model_param(model: nn.Module):
model (:class:`torch.nn.Module`): A pyTorch model on whose parameters you check the consistency.
"""
if is_using_ddp():
param_dict = get_moe_epsize_param_dict(model)
# synchronize the parameters whose dp_group is the whole world

View File

@@ -25,7 +25,9 @@ class MultiTensorApply(object):
raise RuntimeError(
"Attempted to call MultiTensorApply method, but MultiTensorApply "
"is not available, possibly because Apex was installed without "
"--cpp_ext --cuda_ext. Original import error message:", MultiTensorApply.import_err)
"--cpp_ext --cuda_ext. Original import error message:",
MultiTensorApply.import_err,
)
def __call__(self, op, noop_flag_buffer, tensor_lists, *args):
self.check_avail()

View File

@@ -1,7 +1,7 @@
# Rank Recorder
This is a useful tool to get the records of certain functions in each rank. The records of each rank will dump into a json file after the end of multiple process program. You can parse and visualize the json file easily.
Before using the tool, you should ensure dist.is_initialized() return true before exit of program.
Before using the tool, you should ensure dist.is_initialized() return true before exit of program.
## Usage
@@ -58,10 +58,10 @@ def worker(rank):
with recorder("calc_1(x100)", rank) as r:
calc(100, 100)
with recorder("calc_2(x400)", rank) as r:
calc(400, 400)
with recorder("calc_2(x200)", rank) as r:
calc(200, 200)
@@ -69,4 +69,4 @@ if __name__ == "__main__":
mp.spawn(worker, nprocs=WORLD_SIZE)
```
run the script directly and you will get `kernel_select.json` and `kernel_select.png` in your current folder.
run the script directly and you will get `kernel_select.json` and `kernel_select.png` in your current folder.

View File

@@ -1,3 +1,3 @@
from colossalai.utils.rank_recorder.rank_recorder import recorder
__all__ = ["recorder"]
__all__ = ["recorder"]

View File

@@ -1,18 +1,15 @@
import time
from typing import List, Dict
import atexit
import json
import os
import time
import shutil
import atexit
import time
from typing import Dict, List
import matplotlib.colors as mcolors
import matplotlib.pyplot as plt
import torch
import torch.distributed as dist
import json
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
cmap = list(mcolors.TABLEAU_COLORS.values())
LOG_FOLDER = "record.log"
@@ -20,7 +17,6 @@ MAX_WAIT_TIME = 20
class Event:
def __init__(self, start: int, end: int, name: str, rank: int) -> None:
self.start = start
self.end = end
@@ -29,16 +25,15 @@ class Event:
class Recorder:
def __init__(self) -> None:
self.rank_to_history: Dict[int, List[Event]] = {}
self.base_time = time.time()
self.temp_event = None
self.export_format = 'png'
self.export_name = 'test'
self.export_format = "png"
self.export_name = "test"
self.dpi = 500
self.theme = 'dark_background'
self.theme = "dark_background"
self.figure_width = 30
self.figure_height = 10
self.legend_fontsize = 16
@@ -84,18 +79,18 @@ class Recorder:
def dump_record(self):
rank = dist.get_rank()
rank_to_history = self.rank_to_history
records = {'base_time': self.base_time, 'content': {}}
records = {"base_time": self.base_time, "content": {}}
for record_rank in rank_to_history:
history = rank_to_history[record_rank]
recs = []
for event in history:
rec = {'start': event.start, 'end': event.end, 'name': event.name}
rec = {"start": event.start, "end": event.end, "name": event.name}
recs.append(rec)
records['content'][record_rank] = recs
records["content"][record_rank] = recs
dump_name = f'{rank}.json'
dump_name = f"{rank}.json"
dump_path = os.path.join(LOG_FOLDER, dump_name)
with open(dump_path, 'w', encoding='utf-8') as f:
with open(dump_path, "w", encoding="utf-8") as f:
json.dump(records, f, ensure_ascii=False)
def merge_recode(self):
@@ -117,24 +112,22 @@ class Recorder:
logs_path = [os.path.join(LOG_FOLDER, file) for file in os.listdir(LOG_FOLDER)]
recoders = {}
for path in logs_path:
with open(path, 'r', encoding='utf-8') as f:
with open(path, "r", encoding="utf-8") as f:
recs = json.load(f)
for record_rank in recs['content']:
history = recs['content'][record_rank]
for record_rank in recs["content"]:
history = recs["content"][record_rank]
recoders[record_rank] = []
for rec in history:
recoders[record_rank].append({
'start': rec['start'] - base_time,
'end': rec['end'] - base_time,
'name': rec['name']
})
recoders[record_rank].append(
{"start": rec["start"] - base_time, "end": rec["end"] - base_time, "name": rec["name"]}
)
shutil.rmtree(LOG_FOLDER)
with open(self.export_name + '.json', 'w', encoding='utf-8') as f:
with open(self.export_name + ".json", "w", encoding="utf-8") as f:
json.dump(recoders, f, ensure_ascii=False)
def visualize_record(self):
with open(self.export_name + '.json', 'r', encoding='utf-8') as f:
with open(self.export_name + ".json", "r", encoding="utf-8") as f:
records = json.load(f)
records = dict(records)
ranks = list(sorted(records.keys()))
@@ -147,9 +140,9 @@ class Recorder:
for rank in ranks:
rank_records = records[rank]
for rec in rank_records:
s = rec['start']
e = rec['end']
name = rec['name']
s = rec["start"]
e = rec["end"]
name = rec["name"]
if name not in name_list:
name_list[name] = len(name_list)
bar = plt.barh(rank, width=e - s, height=self.bar_height, left=s, color=cmap[name_list[name]])
@@ -157,8 +150,8 @@ class Recorder:
plots[name] = bar
plt.legend(list(plots.values()), list(plots.keys()), loc="upper left", fontsize=self.legend_fontsize)
plt.yticks(ticks=ranks, labels=[f'Device:{rank}' for rank in ranks], fontsize=self.device_fontsize)
plt.grid(axis='x')
plt.yticks(ticks=ranks, labels=[f"Device:{rank}" for rank in ranks], fontsize=self.device_fontsize)
plt.grid(axis="x")
plt.savefig("{}.{}".format(self.export_name, self.export_format))
def exit_worker(self):

View File

@@ -1 +1 @@
from .tensor_detector import TensorDetector
from .tensor_detector import TensorDetector

View File

@@ -14,7 +14,7 @@ class MLP(nn.Module):
super().__init__()
self.mlp = nn.Sequential(nn.Linear(64, 8),
nn.ReLU(),
nn.Linear(8, 32))
nn.Linear(8, 32))
def forward(self, x):
return self.mlp(x)
```
@@ -125,4 +125,3 @@ Total GPU Memory Allocated on cuda:0 is 14.0 KB
This tool was inspired by https://github.com/Stonesjtu/pytorch_memlab/blob/master/pytorch_memlab/mem_reporter.py
and https://github.com/Oldpan/Pytorch-Memory-Utils

View File

@@ -1,21 +1,19 @@
import gc
import inspect
from collections import defaultdict
from typing import Optional
import torch
import torch.nn as nn
from typing import Optional
from collections import defaultdict
LINE_WIDTH = 108
LINE = '-' * LINE_WIDTH + '\n'
LINE = "-" * LINE_WIDTH + "\n"
class TensorDetector():
def __init__(self,
show_info: bool = True,
log: str = None,
include_cpu: bool = False,
module: Optional[nn.Module] = None):
class TensorDetector:
def __init__(
self, show_info: bool = True, log: str = None, include_cpu: bool = False, module: Optional[nn.Module] = None
):
"""This class is a detector to detect tensor on different devices.
Args:
@@ -57,40 +55,39 @@ class TensorDetector():
def mem_format(self, real_memory_size):
# format the tensor memory into a reasonable magnitude
if real_memory_size >= 2**30:
return str(real_memory_size / (2**30)) + ' GB'
return str(real_memory_size / (2**30)) + " GB"
if real_memory_size >= 2**20:
return str(real_memory_size / (2**20)) + ' MB'
return str(real_memory_size / (2**20)) + " MB"
if real_memory_size >= 2**10:
return str(real_memory_size / (2**10)) + ' KB'
return str(real_memory_size) + ' B'
return str(real_memory_size / (2**10)) + " KB"
return str(real_memory_size) + " B"
def collect_tensors_state(self):
for obj in gc.get_objects():
if torch.is_tensor(obj):
# skip cpu tensor when include_cpu is false and the tensor we have collected before
if (not self.include_cpu) and obj.device == torch.device('cpu'):
if (not self.include_cpu) and obj.device == torch.device("cpu"):
continue
self.detected.append(id(obj))
# skip parameters we had added in __init__ when module is an instance of nn.Module for the first epoch
if id(obj) not in self.tensor_info:
name = type(obj).__name__
# after backward, we want to update the records, to show you the change
if isinstance(self.module, nn.Module) and name == 'Parameter':
if isinstance(self.module, nn.Module) and name == "Parameter":
if obj.grad is not None:
# with grad attached
for par_name, param in self.module.named_parameters():
if param.requires_grad and param.grad.equal(obj.grad):
name = par_name + ' (with grad)'
name = par_name + " (with grad)"
else:
# with no grad attached
# there will be no new parameters created during running
# so it must be in saved_tensor_info
continue
# we can also marked common tensors as tensor(with grad)
if name == 'Tensor' and (obj.is_leaf or obj.retains_grad):
if name == "Tensor" and (obj.is_leaf or obj.retains_grad):
if obj.grad is not None:
name = name + ' (with grad)'
name = name + " (with grad)"
# in fact, common tensor have no grad
# unless you set retain_grad()
if id(obj) in self.saved_tensor_info.keys() and name == self.saved_tensor_info[id(obj)][0]:
@@ -111,10 +108,10 @@ class TensorDetector():
self.devices.append(obj.device)
def print_tensors_state(self):
template_format = '{:3s}{:<30s}{:>10s}{:>20s}{:>10s}{:>20s}{:>15s}'
template_format = "{:3s}{:<30s}{:>10s}{:>20s}{:>10s}{:>20s}{:>15s}"
self.info += LINE
self.info += template_format.format(' ', 'Tensor', 'device', 'shape', 'grad', 'dtype', 'Mem')
self.info += '\n'
self.info += template_format.format(" ", "Tensor", "device", "shape", "grad", "dtype", "Mem")
self.info += "\n"
self.info += LINE
# if a tensor updates this turn, and was recorded before
@@ -124,24 +121,30 @@ class TensorDetector():
minus = outdated + minus
if len(self.order) > 0:
for tensor_id in self.order:
self.info += template_format.format('+', str(self.tensor_info[tensor_id][0]),
str(self.tensor_info[tensor_id][1]),
str(tuple(self.tensor_info[tensor_id][2])),
str(self.tensor_info[tensor_id][3]),
str(self.tensor_info[tensor_id][4]),
str(self.tensor_info[tensor_id][5]))
self.info += '\n'
self.info += template_format.format(
"+",
str(self.tensor_info[tensor_id][0]),
str(self.tensor_info[tensor_id][1]),
str(tuple(self.tensor_info[tensor_id][2])),
str(self.tensor_info[tensor_id][3]),
str(self.tensor_info[tensor_id][4]),
str(self.tensor_info[tensor_id][5]),
)
self.info += "\n"
if len(self.order) > 0 and len(minus) > 0:
self.info += '\n'
self.info += "\n"
if len(minus) > 0:
for tensor_id in minus:
self.info += template_format.format('-', str(self.saved_tensor_info[tensor_id][0]),
str(self.saved_tensor_info[tensor_id][1]),
str(tuple(self.saved_tensor_info[tensor_id][2])),
str(self.saved_tensor_info[tensor_id][3]),
str(self.saved_tensor_info[tensor_id][4]),
str(self.saved_tensor_info[tensor_id][5]))
self.info += '\n'
self.info += template_format.format(
"-",
str(self.saved_tensor_info[tensor_id][0]),
str(self.saved_tensor_info[tensor_id][1]),
str(tuple(self.saved_tensor_info[tensor_id][2])),
str(self.saved_tensor_info[tensor_id][3]),
str(self.saved_tensor_info[tensor_id][4]),
str(self.saved_tensor_info[tensor_id][5]),
)
self.info += "\n"
# deleted the updated tensor
self.saved_tensor_info.pop(tensor_id)
@@ -152,16 +155,16 @@ class TensorDetector():
self.info += LINE
self.info += f"Detect Location: {locate_msg}\n"
for device in self.devices:
if device == torch.device('cpu'):
if device == torch.device("cpu"):
continue
gpu_mem_alloc = self.mem_format(torch.cuda.memory_allocated(device))
self.info += f"Total GPU Memory Allocated on {device} is {gpu_mem_alloc}\n"
self.info += LINE
self.info += '\n\n'
self.info += "\n\n"
if self.show_info:
print(self.info)
if self.log is not None:
with open(self.log + '.log', 'a') as f:
with open(self.log + ".log", "a") as f:
f.write(self.info)
def detect(self, include_cpu=False):

View File

@@ -2,12 +2,12 @@
# -*- encoding: utf-8 -*-
import time
from typing import Tuple
from .cuda import synchronize
class Timer:
"""A timer object which helps to log the execution times, and provides different tools to assess the times.
"""
"""A timer object which helps to log the execution times, and provides different tools to assess the times."""
def __init__(self):
self._started = False
@@ -25,16 +25,14 @@ class Timer:
return time.time()
def start(self):
"""Firstly synchronize cuda, reset the clock and then start the timer.
"""
"""Firstly synchronize cuda, reset the clock and then start the timer."""
self._elapsed = 0
synchronize()
self._start_time = time.time()
self._started = True
def lap(self):
"""lap time and return elapsed time
"""
"""lap time and return elapsed time"""
return self.current_time - self._start_time
def stop(self, keep_in_history: bool = False):
@@ -80,12 +78,11 @@ class Timer:
Note:
Use it only when timer is not in progress
"""
assert not self._started, 'Timer is still in progress'
assert not self._started, "Timer is still in progress"
return self._elapsed
def reset(self):
"""Clear up the timer and its history
"""
"""Clear up the timer and its history"""
self._history = []
self._started = False
self._elapsed = 0