mirror of
https://github.com/hpcaitech/ColossalAI.git
synced 2025-09-03 10:06:44 +00:00
[Gemini] update API of the chunkmemstatscollector. (#2129)
This commit is contained in:
@@ -206,7 +206,6 @@ class ShardedModelV2(nn.Module):
|
||||
f.write(f'cuda reserved {torch.cuda.memory_reserved(get_current_device()) / 1e9} GB\n')
|
||||
f.write(f'cuda max allocated {torch.cuda.max_memory_allocated(get_current_device()) / 1e9} GB\n')
|
||||
f.write('CUDA model data (GB)\n')
|
||||
f.write(str(self._memstats_collector._memstats.model_data_list('cuda')))
|
||||
f.write('\n')
|
||||
f.write('CUDA non model data (GB)\n')
|
||||
f.write(str(self._memstats_collector._memstats.non_model_data_list('cuda')))
|
||||
@@ -256,8 +255,8 @@ class ShardedModelV2(nn.Module):
|
||||
# the way to calculate margin space is based on the assumption that
|
||||
# model data is fixed in cuda during training.
|
||||
# cuda margin space can be used to store OS.
|
||||
self._cuda_margin_space = colo_device_memory_capacity(get_current_device()) - max(
|
||||
self._memstats_collector._memstats.overall_mem_stats('cuda'))
|
||||
self._cuda_margin_space = colo_device_memory_capacity(
|
||||
get_current_device()) - self._memstats_collector._memstats.max_overall_cuda
|
||||
|
||||
@torch.no_grad()
|
||||
def _post_backward_operations(self) -> None:
|
||||
|
@@ -32,6 +32,8 @@ class GeminiZeROHook(ColoParamOpHook):
|
||||
self._gemini_manager.adjust_layout(chunks)
|
||||
for chunk in chunks:
|
||||
self._chunk_manager.access_chunk(chunk)
|
||||
|
||||
# record cuda model data of the current OP
|
||||
self._gemini_manager.record_model_data_volume()
|
||||
|
||||
def post_op(self, params):
|
||||
|
Reference in New Issue
Block a user