[misc] update pre-commit and run all files (#4752)

* [misc] update pre-commit

* [misc] run pre-commit

* [misc] remove useless configuration files

* [misc] ignore cuda for clang-format
This commit is contained in:
Hongxin Liu
2023-09-19 14:20:26 +08:00
committed by GitHub
parent 3c6b831c26
commit 079bf3cb26
1268 changed files with 50037 additions and 38444 deletions

View File

@@ -1,10 +1,14 @@
from .param_runtime_order import OrderedParamGenerator # isort:skip
from .memory_stats import MemStats # isort:skip
from .memory_monitor import AsyncMemoryMonitor, SyncCudaMemoryMonitor # isort:skip
from .memstats_collector import MemStatsCollector # isort:skip
from .chunk_memstats_collector import ChunkMemStatsCollector # isort:skip
from .param_runtime_order import OrderedParamGenerator # isort:skip
from .memory_stats import MemStats # isort:skip
from .memory_monitor import AsyncMemoryMonitor, SyncCudaMemoryMonitor # isort:skip
from .memstats_collector import MemStatsCollector # isort:skip
from .chunk_memstats_collector import ChunkMemStatsCollector # isort:skip
__all__ = [
'AsyncMemoryMonitor', 'SyncCudaMemoryMonitor', 'MemStatsCollector', 'ChunkMemStatsCollector', 'MemStats',
'OrderedParamGenerator'
"AsyncMemoryMonitor",
"SyncCudaMemoryMonitor",
"MemStatsCollector",
"ChunkMemStatsCollector",
"MemStats",
"OrderedParamGenerator",
]

View File

@@ -8,7 +8,6 @@ from .memstats_collector import MemStatsCollector
class ChunkMemStatsCollector(MemStatsCollector):
def __init__(self, chunk_manager: ChunkManager, memstats: Optional[MemStats] = None) -> None:
"""
@@ -27,10 +26,11 @@ class ChunkMemStatsCollector(MemStatsCollector):
record model data volume on cuda and cpu.
"""
if self._start_flag and not self.use_outside_memstats:
cuda_mem = self._chunk_manager.total_mem['cuda']
cuda_mem = self._chunk_manager.total_mem["cuda"]
self._memstats.record_max_cuda_model_data(cuda_mem)
@property
def cuda_margin_mem(self) -> float:
from colossalai.legacy.utils.memory import colo_device_memory_capacity
return colo_device_memory_capacity(get_current_device()) - self._memstats.max_overall_cuda

View File

@@ -111,6 +111,7 @@ class AsyncMemoryMonitor(MemoryMonitor):
def _measure_usage(self):
from colossalai.legacy.utils import colo_device_memory_used
max_usage = 0
while self.keep_measuring:
max_usage = max(

View File

@@ -1,4 +1,4 @@
from typing import Any, Dict, List, Optional
from typing import List, Optional
import torch
@@ -6,7 +6,6 @@ from .param_runtime_order import OrderedParamGenerator
class MemStats(object):
def __init__(self) -> None:
"""
Store the non model data statistics used for Gemini and GeminiOptimizer.
@@ -92,17 +91,17 @@ class MemStats(object):
return self._param_runtime_order
def non_model_data_list(self, device_type: str) -> List[int]:
if device_type == 'cuda':
if device_type == "cuda":
return self._non_model_data_cuda_list
elif device_type == 'cpu':
elif device_type == "cpu":
return self._non_model_data_cpu_list
else:
raise TypeError
def max_non_model_data(self, device_type: str) -> float:
if device_type == 'cuda':
if device_type == "cuda":
return max(self._non_model_data_cuda_list)
elif device_type == 'cpu':
elif device_type == "cpu":
return max(self._non_model_data_cpu_list)
else:
raise TypeError

View File

@@ -40,11 +40,12 @@ class MemStatsCollector:
Returns:
int: max non model data memory usage of current sampling period
"""
assert not self._start_flag, 'Cannot get mem stats info during collection phase.'
assert self._step_total > 0, 'Cannot get mem stats info before collection phase.'
assert len(self._memstats.non_model_data_list(device_type)) > self._step_idx, \
f"{len(self._memstats.non_model_data_list(device_type))} should be > than step idx {self._step_idx}, "\
assert not self._start_flag, "Cannot get mem stats info during collection phase."
assert self._step_total > 0, "Cannot get mem stats info before collection phase."
assert len(self._memstats.non_model_data_list(device_type)) > self._step_idx, (
f"{len(self._memstats.non_model_data_list(device_type))} should be > than step idx {self._step_idx}, "
f"step total {self._step_total}"
)
next_non_model_data = self._memstats.non_model_data_list(device_type)[self._step_idx]
self._step_idx = (self._step_idx + 1) % self._step_total
return next_non_model_data
@@ -60,9 +61,9 @@ class MemStatsCollector:
def finish_collection(self):
self.sample_overall_data()
# self._step_total = len(self._sampling_time)
self._step_total = len(self._memstats.non_model_data_list('cuda'))
self._step_total = len(self._memstats.non_model_data_list("cuda"))
self._start_flag = False
print(f'finish_collection {self._step_total}')
print(f"finish_collection {self._step_total}")
# deprecated
def record_model_data_volume(self) -> None:
@@ -73,7 +74,7 @@ class MemStatsCollector:
from colossalai.legacy.zero.gemini import StatefulTensor
# The following code work for ZeroInitContext, which is deprecated in v0.1.12
cuda_mem = StatefulTensor.GST_MGR.total_mem['cuda']
cuda_mem = StatefulTensor.GST_MGR.total_mem["cuda"]
self._memstats.record_max_cuda_model_data(cuda_mem)
def sample_overall_data(self) -> None:

View File

@@ -4,7 +4,6 @@ import torch
class ParamGenerator(ABC):
def append(self, param: torch.nn.Parameter):
pass

View File

@@ -10,10 +10,10 @@ from colossalai.utils import _cast_float
from .memory_stats import MemStats
__all__ = ['RuntimeMemTracer']
__all__ = ["RuntimeMemTracer"]
class RuntimeMemTracer():
class RuntimeMemTracer:
"""RuntimeMemTracer for the module training using ColoParameter.
Trace non-model memory usage during fwd+bwd process.

View File

@@ -15,9 +15,9 @@ from .chunk_memstats_collector import ChunkMemStatsCollector
class ModuleInfos:
def __init__(self, module: torch.nn.Module, module_name: str, module_full_name: str,
parent_module: torch.nn.Module):
def __init__(
self, module: torch.nn.Module, module_name: str, module_full_name: str, parent_module: torch.nn.Module
):
self.module = module
self.module_name = module_name
self.module_full_name = module_full_name
@@ -35,14 +35,13 @@ class StaticMemStatsCollector(ChunkMemStatsCollector):
self.module_info_list = []
def init_mem_stats(self, *inputs):
self.register_opnodes_recursively(self.module)
self.refactor_module()
self.module = self.module.cpu()
self.module.train()
data = [MetaTensor(torch.rand(inp.shape, device='meta'), fake_device='cpu') for inp in inputs]
data = [MetaTensor(torch.rand(inp.shape, device="meta"), fake_device="cpu") for inp in inputs]
gm = symbolic_trace(self.module)
interp = MetaInfoProp(gm)
interp.propagate(*data)
@@ -87,12 +86,13 @@ class StaticMemStatsCollector(ChunkMemStatsCollector):
for modInfo in self.module_info_list:
modInfo.parent_module.__setattr__(modInfo.module_name, modInfo.module)
def register_opnodes_recursively(self,
module: torch.nn.Module,
name: str = "",
full_name: str = "",
parent_module: Optional[torch.nn.Module] = None):
def register_opnodes_recursively(
self,
module: torch.nn.Module,
name: str = "",
full_name: str = "",
parent_module: Optional[torch.nn.Module] = None,
):
assert isinstance(module, torch.nn.Module)
for child_name, child in module.named_children():

View File

@@ -14,7 +14,7 @@ def colo_model_optimizer_usage(optim) -> Tuple[int, int]:
"""
if optim is None:
return 0, 0
assert hasattr(optim, 'get_memory_usage'), f"{type(optim)} has no attr get_memory_usage()"
assert hasattr(optim, "get_memory_usage"), f"{type(optim)} has no attr get_memory_usage()"
return optim.get_memory_usage()
@@ -35,16 +35,16 @@ def colo_model_mem_usage(model: torch.nn.Module) -> Tuple[int, int]:
return 0, 0
assert isinstance(t, torch.Tensor)
_cpu_mem_usage, _cuda_mem_usage = 0, 0
if t.device.type == 'cpu':
if t.device.type == "cpu":
_cpu_mem_usage += t.numel() * t.element_size()
elif t.device.type == 'cuda':
elif t.device.type == "cuda":
_cuda_mem_usage += t.numel() * t.element_size()
return _cuda_mem_usage, _cpu_mem_usage
cuda_mem_usage = 0
cpu_mem_usage = 0
for param in model.parameters():
if hasattr(param, 'colo_attr'):
if hasattr(param, "colo_attr"):
t_cuda, t_cpu = param.colo_attr.get_memory_usage()
cuda_mem_usage += t_cuda
cpu_mem_usage += t_cpu