mirror of
https://github.com/hpcaitech/ColossalAI.git
synced 2025-09-05 02:51:59 +00:00
[zero] reorganize zero/gemini folder structure (#3424)
* [zero] refactor low-level zero folder structure * [zero] fix legacy zero import path * [zero] fix legacy zero import path * [zero] remove useless import * [zero] refactor gemini folder structure * [zero] refactor gemini folder structure * [zero] refactor legacy zero import path * [zero] refactor gemini folder structure * [zero] refactor gemini folder structure * [zero] refactor gemini folder structure * [zero] refactor legacy zero import path * [zero] fix test import path * [zero] fix test * [zero] fix circular import * [zero] update import
This commit is contained in:
59
colossalai/zero/gemini/memory_tracer/utils.py
Normal file
59
colossalai/zero/gemini/memory_tracer/utils.py
Normal file
@@ -0,0 +1,59 @@
|
||||
from typing import Optional, Tuple
|
||||
|
||||
import torch
|
||||
|
||||
|
||||
def colo_model_optimizer_usage(optim) -> Tuple[int, int]:
|
||||
"""Trace the optimizer memory usage
|
||||
|
||||
Args:
|
||||
optim (ShardedOptimV2): an instance of ShardedOptimver
|
||||
|
||||
Returns:
|
||||
Tuple[int, int]: cuda/cpu memory usage in Byte
|
||||
"""
|
||||
if optim is None:
|
||||
return 0, 0
|
||||
assert hasattr(optim, 'get_memory_usage'), f"{type(optim)} has no attr get_memory_usage()"
|
||||
return optim.get_memory_usage()
|
||||
|
||||
|
||||
def colo_model_mem_usage(model: torch.nn.Module) -> Tuple[int, int]:
|
||||
"""
|
||||
Trace the model memory usage.
|
||||
Args:
|
||||
model (torch.nn.Module): a torch model
|
||||
|
||||
Returns:
|
||||
Tuple[int, int]: cuda memory usage in Byte, cpu memory usage in Byte
|
||||
"""
|
||||
if model is None:
|
||||
return 0, 0
|
||||
|
||||
def _get_tensor_mem_use(t: Optional[torch.Tensor]):
|
||||
if t is None:
|
||||
return 0, 0
|
||||
assert isinstance(t, torch.Tensor)
|
||||
_cpu_mem_usage, _cuda_mem_usage = 0, 0
|
||||
if t.device.type == 'cpu':
|
||||
_cpu_mem_usage += t.numel() * t.element_size()
|
||||
elif t.device.type == 'cuda':
|
||||
_cuda_mem_usage += t.numel() * t.element_size()
|
||||
return _cuda_mem_usage, _cpu_mem_usage
|
||||
|
||||
cuda_mem_usage = 0
|
||||
cpu_mem_usage = 0
|
||||
for param in model.parameters():
|
||||
if hasattr(param, 'colo_attr'):
|
||||
t_cuda, t_cpu = param.colo_attr.get_memory_usage()
|
||||
cuda_mem_usage += t_cuda
|
||||
cpu_mem_usage += t_cpu
|
||||
else:
|
||||
t_cuda, t_cpu = _get_tensor_mem_use(param.data)
|
||||
cuda_mem_usage += t_cuda
|
||||
cpu_mem_usage += t_cpu
|
||||
t_cuda, t_cpu = _get_tensor_mem_use(param.grad)
|
||||
cuda_mem_usage += t_cuda
|
||||
cpu_mem_usage += t_cpu
|
||||
|
||||
return cuda_mem_usage, cpu_mem_usage
|
Reference in New Issue
Block a user