mirror of
https://github.com/hpcaitech/ColossalAI.git
synced 2025-09-10 21:40:02 +00:00
[misc] update pre-commit and run all files (#4752)
* [misc] update pre-commit * [misc] run pre-commit * [misc] remove useless configuration files * [misc] ignore cuda for clang-format
This commit is contained in:
@@ -1,10 +1,14 @@
|
||||
from .param_runtime_order import OrderedParamGenerator # isort:skip
|
||||
from .memory_stats import MemStats # isort:skip
|
||||
from .memory_monitor import AsyncMemoryMonitor, SyncCudaMemoryMonitor # isort:skip
|
||||
from .memstats_collector import MemStatsCollector # isort:skip
|
||||
from .chunk_memstats_collector import ChunkMemStatsCollector # isort:skip
|
||||
from .param_runtime_order import OrderedParamGenerator # isort:skip
|
||||
from .memory_stats import MemStats # isort:skip
|
||||
from .memory_monitor import AsyncMemoryMonitor, SyncCudaMemoryMonitor # isort:skip
|
||||
from .memstats_collector import MemStatsCollector # isort:skip
|
||||
from .chunk_memstats_collector import ChunkMemStatsCollector # isort:skip
|
||||
|
||||
__all__ = [
|
||||
'AsyncMemoryMonitor', 'SyncCudaMemoryMonitor', 'MemStatsCollector', 'ChunkMemStatsCollector', 'MemStats',
|
||||
'OrderedParamGenerator'
|
||||
"AsyncMemoryMonitor",
|
||||
"SyncCudaMemoryMonitor",
|
||||
"MemStatsCollector",
|
||||
"ChunkMemStatsCollector",
|
||||
"MemStats",
|
||||
"OrderedParamGenerator",
|
||||
]
|
||||
|
@@ -8,7 +8,6 @@ from .memstats_collector import MemStatsCollector
|
||||
|
||||
|
||||
class ChunkMemStatsCollector(MemStatsCollector):
|
||||
|
||||
def __init__(self, chunk_manager: ChunkManager, memstats: Optional[MemStats] = None) -> None:
|
||||
"""
|
||||
|
||||
@@ -27,10 +26,11 @@ class ChunkMemStatsCollector(MemStatsCollector):
|
||||
record model data volume on cuda and cpu.
|
||||
"""
|
||||
if self._start_flag and not self.use_outside_memstats:
|
||||
cuda_mem = self._chunk_manager.total_mem['cuda']
|
||||
cuda_mem = self._chunk_manager.total_mem["cuda"]
|
||||
self._memstats.record_max_cuda_model_data(cuda_mem)
|
||||
|
||||
@property
|
||||
def cuda_margin_mem(self) -> float:
|
||||
from colossalai.legacy.utils.memory import colo_device_memory_capacity
|
||||
|
||||
return colo_device_memory_capacity(get_current_device()) - self._memstats.max_overall_cuda
|
||||
|
@@ -111,6 +111,7 @@ class AsyncMemoryMonitor(MemoryMonitor):
|
||||
|
||||
def _measure_usage(self):
|
||||
from colossalai.legacy.utils import colo_device_memory_used
|
||||
|
||||
max_usage = 0
|
||||
while self.keep_measuring:
|
||||
max_usage = max(
|
||||
|
@@ -1,4 +1,4 @@
|
||||
from typing import Any, Dict, List, Optional
|
||||
from typing import List, Optional
|
||||
|
||||
import torch
|
||||
|
||||
@@ -6,7 +6,6 @@ from .param_runtime_order import OrderedParamGenerator
|
||||
|
||||
|
||||
class MemStats(object):
|
||||
|
||||
def __init__(self) -> None:
|
||||
"""
|
||||
Store the non model data statistics used for Gemini and GeminiOptimizer.
|
||||
@@ -92,17 +91,17 @@ class MemStats(object):
|
||||
return self._param_runtime_order
|
||||
|
||||
def non_model_data_list(self, device_type: str) -> List[int]:
|
||||
if device_type == 'cuda':
|
||||
if device_type == "cuda":
|
||||
return self._non_model_data_cuda_list
|
||||
elif device_type == 'cpu':
|
||||
elif device_type == "cpu":
|
||||
return self._non_model_data_cpu_list
|
||||
else:
|
||||
raise TypeError
|
||||
|
||||
def max_non_model_data(self, device_type: str) -> float:
|
||||
if device_type == 'cuda':
|
||||
if device_type == "cuda":
|
||||
return max(self._non_model_data_cuda_list)
|
||||
elif device_type == 'cpu':
|
||||
elif device_type == "cpu":
|
||||
return max(self._non_model_data_cpu_list)
|
||||
else:
|
||||
raise TypeError
|
||||
|
@@ -40,11 +40,12 @@ class MemStatsCollector:
|
||||
Returns:
|
||||
int: max non model data memory usage of current sampling period
|
||||
"""
|
||||
assert not self._start_flag, 'Cannot get mem stats info during collection phase.'
|
||||
assert self._step_total > 0, 'Cannot get mem stats info before collection phase.'
|
||||
assert len(self._memstats.non_model_data_list(device_type)) > self._step_idx, \
|
||||
f"{len(self._memstats.non_model_data_list(device_type))} should be > than step idx {self._step_idx}, "\
|
||||
assert not self._start_flag, "Cannot get mem stats info during collection phase."
|
||||
assert self._step_total > 0, "Cannot get mem stats info before collection phase."
|
||||
assert len(self._memstats.non_model_data_list(device_type)) > self._step_idx, (
|
||||
f"{len(self._memstats.non_model_data_list(device_type))} should be > than step idx {self._step_idx}, "
|
||||
f"step total {self._step_total}"
|
||||
)
|
||||
next_non_model_data = self._memstats.non_model_data_list(device_type)[self._step_idx]
|
||||
self._step_idx = (self._step_idx + 1) % self._step_total
|
||||
return next_non_model_data
|
||||
@@ -60,9 +61,9 @@ class MemStatsCollector:
|
||||
def finish_collection(self):
|
||||
self.sample_overall_data()
|
||||
# self._step_total = len(self._sampling_time)
|
||||
self._step_total = len(self._memstats.non_model_data_list('cuda'))
|
||||
self._step_total = len(self._memstats.non_model_data_list("cuda"))
|
||||
self._start_flag = False
|
||||
print(f'finish_collection {self._step_total}')
|
||||
print(f"finish_collection {self._step_total}")
|
||||
|
||||
# deprecated
|
||||
def record_model_data_volume(self) -> None:
|
||||
@@ -73,7 +74,7 @@ class MemStatsCollector:
|
||||
from colossalai.legacy.zero.gemini import StatefulTensor
|
||||
|
||||
# The following code work for ZeroInitContext, which is deprecated in v0.1.12
|
||||
cuda_mem = StatefulTensor.GST_MGR.total_mem['cuda']
|
||||
cuda_mem = StatefulTensor.GST_MGR.total_mem["cuda"]
|
||||
self._memstats.record_max_cuda_model_data(cuda_mem)
|
||||
|
||||
def sample_overall_data(self) -> None:
|
||||
|
@@ -4,7 +4,6 @@ import torch
|
||||
|
||||
|
||||
class ParamGenerator(ABC):
|
||||
|
||||
def append(self, param: torch.nn.Parameter):
|
||||
pass
|
||||
|
||||
|
@@ -10,10 +10,10 @@ from colossalai.utils import _cast_float
|
||||
|
||||
from .memory_stats import MemStats
|
||||
|
||||
__all__ = ['RuntimeMemTracer']
|
||||
__all__ = ["RuntimeMemTracer"]
|
||||
|
||||
|
||||
class RuntimeMemTracer():
|
||||
class RuntimeMemTracer:
|
||||
"""RuntimeMemTracer for the module training using ColoParameter.
|
||||
|
||||
Trace non-model memory usage during fwd+bwd process.
|
||||
|
@@ -15,9 +15,9 @@ from .chunk_memstats_collector import ChunkMemStatsCollector
|
||||
|
||||
|
||||
class ModuleInfos:
|
||||
|
||||
def __init__(self, module: torch.nn.Module, module_name: str, module_full_name: str,
|
||||
parent_module: torch.nn.Module):
|
||||
def __init__(
|
||||
self, module: torch.nn.Module, module_name: str, module_full_name: str, parent_module: torch.nn.Module
|
||||
):
|
||||
self.module = module
|
||||
self.module_name = module_name
|
||||
self.module_full_name = module_full_name
|
||||
@@ -35,14 +35,13 @@ class StaticMemStatsCollector(ChunkMemStatsCollector):
|
||||
self.module_info_list = []
|
||||
|
||||
def init_mem_stats(self, *inputs):
|
||||
|
||||
self.register_opnodes_recursively(self.module)
|
||||
self.refactor_module()
|
||||
|
||||
self.module = self.module.cpu()
|
||||
self.module.train()
|
||||
|
||||
data = [MetaTensor(torch.rand(inp.shape, device='meta'), fake_device='cpu') for inp in inputs]
|
||||
data = [MetaTensor(torch.rand(inp.shape, device="meta"), fake_device="cpu") for inp in inputs]
|
||||
gm = symbolic_trace(self.module)
|
||||
interp = MetaInfoProp(gm)
|
||||
interp.propagate(*data)
|
||||
@@ -87,12 +86,13 @@ class StaticMemStatsCollector(ChunkMemStatsCollector):
|
||||
for modInfo in self.module_info_list:
|
||||
modInfo.parent_module.__setattr__(modInfo.module_name, modInfo.module)
|
||||
|
||||
def register_opnodes_recursively(self,
|
||||
module: torch.nn.Module,
|
||||
name: str = "",
|
||||
full_name: str = "",
|
||||
parent_module: Optional[torch.nn.Module] = None):
|
||||
|
||||
def register_opnodes_recursively(
|
||||
self,
|
||||
module: torch.nn.Module,
|
||||
name: str = "",
|
||||
full_name: str = "",
|
||||
parent_module: Optional[torch.nn.Module] = None,
|
||||
):
|
||||
assert isinstance(module, torch.nn.Module)
|
||||
|
||||
for child_name, child in module.named_children():
|
||||
|
@@ -14,7 +14,7 @@ def colo_model_optimizer_usage(optim) -> Tuple[int, int]:
|
||||
"""
|
||||
if optim is None:
|
||||
return 0, 0
|
||||
assert hasattr(optim, 'get_memory_usage'), f"{type(optim)} has no attr get_memory_usage()"
|
||||
assert hasattr(optim, "get_memory_usage"), f"{type(optim)} has no attr get_memory_usage()"
|
||||
return optim.get_memory_usage()
|
||||
|
||||
|
||||
@@ -35,16 +35,16 @@ def colo_model_mem_usage(model: torch.nn.Module) -> Tuple[int, int]:
|
||||
return 0, 0
|
||||
assert isinstance(t, torch.Tensor)
|
||||
_cpu_mem_usage, _cuda_mem_usage = 0, 0
|
||||
if t.device.type == 'cpu':
|
||||
if t.device.type == "cpu":
|
||||
_cpu_mem_usage += t.numel() * t.element_size()
|
||||
elif t.device.type == 'cuda':
|
||||
elif t.device.type == "cuda":
|
||||
_cuda_mem_usage += t.numel() * t.element_size()
|
||||
return _cuda_mem_usage, _cpu_mem_usage
|
||||
|
||||
cuda_mem_usage = 0
|
||||
cpu_mem_usage = 0
|
||||
for param in model.parameters():
|
||||
if hasattr(param, 'colo_attr'):
|
||||
if hasattr(param, "colo_attr"):
|
||||
t_cuda, t_cpu = param.colo_attr.get_memory_usage()
|
||||
cuda_mem_usage += t_cuda
|
||||
cpu_mem_usage += t_cpu
|
||||
|
Reference in New Issue
Block a user