mirror of
https://github.com/hpcaitech/ColossalAI.git
synced 2025-09-08 04:24:47 +00:00
[zero] reorganize zero/gemini folder structure (#3424)
* [zero] refactor low-level zero folder structure * [zero] fix legacy zero import path * [zero] fix legacy zero import path * [zero] remove useless import * [zero] refactor gemini folder structure * [zero] refactor gemini folder structure * [zero] refactor legacy zero import path * [zero] refactor gemini folder structure * [zero] refactor gemini folder structure * [zero] refactor gemini folder structure * [zero] refactor legacy zero import path * [zero] fix test import path * [zero] fix test * [zero] fix circular import * [zero] update import
This commit is contained in:
51
colossalai/zero/gemini/chunk/utils.py
Normal file
51
colossalai/zero/gemini/chunk/utils.py
Normal file
@@ -0,0 +1,51 @@
|
||||
from time import time
|
||||
from typing import Optional
|
||||
|
||||
import torch
|
||||
import torch.distributed as dist
|
||||
import torch.nn as nn
|
||||
|
||||
from colossalai.utils import is_ddp_ignored
|
||||
|
||||
from .manager import ChunkManager
|
||||
from .search_utils import search_chunk_configuration
|
||||
|
||||
|
||||
def safe_div(a, b):
|
||||
if a == 0:
|
||||
return 0
|
||||
return a / b
|
||||
|
||||
|
||||
def init_chunk_manager(model: nn.Module,
|
||||
init_device: Optional[torch.device] = None,
|
||||
hidden_dim: Optional[int] = None,
|
||||
**kwargs) -> ChunkManager:
|
||||
if hidden_dim:
|
||||
search_interval_byte = hidden_dim
|
||||
else:
|
||||
search_interval_byte = 1024 # defaults to 1kb
|
||||
kwargs["search_interval_byte"] = search_interval_byte
|
||||
|
||||
dist.barrier()
|
||||
begin = time()
|
||||
|
||||
config_dict, total_size, wasted_size = search_chunk_configuration(model, **kwargs)
|
||||
|
||||
dist.barrier()
|
||||
end = time()
|
||||
span_s = end - begin
|
||||
mb_size = 1024**2
|
||||
total_size /= mb_size
|
||||
wasted_size /= mb_size
|
||||
|
||||
if dist.get_rank() == 0:
|
||||
print("searching chunk configuration is completed in {:.2f} s.\n".format(span_s),
|
||||
"used number: {:.2f} MB, wasted number: {:.2f} MB\n".format(total_size, wasted_size),
|
||||
"total wasted percentage is {:.2f}%".format(100 * safe_div(wasted_size, total_size + wasted_size)),
|
||||
sep='',
|
||||
flush=True)
|
||||
dist.barrier()
|
||||
|
||||
chunk_manager = ChunkManager(config_dict, init_device)
|
||||
return chunk_manager
|
Reference in New Issue
Block a user