mirror of
https://github.com/hpcaitech/ColossalAI.git
synced 2025-07-18 17:31:53 +00:00
[gemini] refactor gemini mgr (#1151)
* refactor gemini mgr * udpate __init__
This commit is contained in:
parent
f8eec98ff5
commit
54aabb8da4
@ -1,4 +1,5 @@
|
|||||||
from .stateful_tensor_mgr import StatefulTensorMgr
|
from .stateful_tensor_mgr import StatefulTensorMgr
|
||||||
from .tensor_placement_policy import TensorPlacementPolicyFactory
|
from .tensor_placement_policy import TensorPlacementPolicyFactory
|
||||||
|
from .gemini_mgr import GeminiManager
|
||||||
|
|
||||||
__all__ = ['StatefulTensorMgr', 'TensorPlacementPolicyFactory']
|
__all__ = ['StatefulTensorMgr', 'TensorPlacementPolicyFactory', 'GeminiManager']
|
||||||
|
@ -1,4 +1,5 @@
|
|||||||
import torch
|
import torch
|
||||||
|
import functools
|
||||||
from .memory_tracer.memstats_collector import MemStatsCollectorV2
|
from .memory_tracer.memstats_collector import MemStatsCollectorV2
|
||||||
from typing import List, Optional, Tuple
|
from typing import List, Optional, Tuple
|
||||||
from time import time
|
from time import time
|
||||||
@ -23,10 +24,12 @@ class GeminiManager:
|
|||||||
self._compute_list: List[Tuple[Chunk, ...]] = []
|
self._compute_list: List[Tuple[Chunk, ...]] = []
|
||||||
self._compute_idx: int = -1
|
self._compute_idx: int = -1
|
||||||
|
|
||||||
self._cpu_gpu_move_volume = 0
|
self._h2d_volume = 0
|
||||||
|
self._d2h_volume = 0
|
||||||
self._layout_time = 0
|
self._layout_time = 0
|
||||||
self._evict_time = 0
|
self._evict_time = 0
|
||||||
self._warmup = True
|
self._warmup = True
|
||||||
|
self._comp_cuda_demand_time = 0
|
||||||
|
|
||||||
def pre_iter(self):
|
def pre_iter(self):
|
||||||
if self._mem_stats_collector and self._warmup:
|
if self._mem_stats_collector and self._warmup:
|
||||||
@ -39,9 +42,11 @@ class GeminiManager:
|
|||||||
self._mem_stats_collector.finish_collection()
|
self._mem_stats_collector.finish_collection()
|
||||||
self._warmup = False
|
self._warmup = False
|
||||||
self._compute_idx = -1
|
self._compute_idx = -1
|
||||||
self._cpu_gpu_move_volume = 0
|
self._h2d_volume = 0
|
||||||
|
self._d2h_volume = 0
|
||||||
self._layout_time = 0
|
self._layout_time = 0
|
||||||
self._evict_time = 0
|
self._evict_time = 0
|
||||||
|
self._comp_cuda_demand_time = 0
|
||||||
|
|
||||||
def adjust_layout(self, chunks: Tuple[Chunk, ...], group_name: str) -> None:
|
def adjust_layout(self, chunks: Tuple[Chunk, ...], group_name: str) -> None:
|
||||||
""" Adjust the layout of statefuil tensor according to the information provided
|
""" Adjust the layout of statefuil tensor according to the information provided
|
||||||
@ -57,22 +62,19 @@ class GeminiManager:
|
|||||||
warmup=self._warmup,
|
warmup=self._warmup,
|
||||||
compute_list=self._compute_list,
|
compute_list=self._compute_list,
|
||||||
compute_idx=self._compute_idx)
|
compute_idx=self._compute_idx)
|
||||||
self._cpu_gpu_move_volume += vol
|
self._d2h_volume += vol
|
||||||
self._evict_time += evict_time
|
self._evict_time += evict_time
|
||||||
# move COMPUTE tensors to CUDA
|
# move COMPUTE tensors to CUDA
|
||||||
self._cpu_gpu_move_volume += cuda_demand
|
self._h2d_volume += cuda_demand
|
||||||
|
|
||||||
@property
|
@functools.lru_cache(maxsize=None)
|
||||||
def cpu_gpu_move_volume(self):
|
|
||||||
return self._cpu_gpu_move_volume
|
|
||||||
|
|
||||||
# @functools.lru_cache(maxsize=None)
|
|
||||||
# TODO: test lru
|
|
||||||
def _get_layout_info(self, compute_idx: int, warmup: bool, chunks: Tuple[Chunk, ...], group_name: str):
|
def _get_layout_info(self, compute_idx: int, warmup: bool, chunks: Tuple[Chunk, ...], group_name: str):
|
||||||
|
start = time()
|
||||||
cuda_demand = 0
|
cuda_demand = 0
|
||||||
for chunk in chunks:
|
for chunk in chunks:
|
||||||
if chunk.device_type == 'cpu' or chunk.is_empty:
|
if chunk.device_type == 'cpu' or chunk.is_empty:
|
||||||
cuda_demand += chunk.mem
|
cuda_demand += chunk.mem
|
||||||
|
self._comp_cuda_demand_time += time() - start
|
||||||
can_evict_chunks = []
|
can_evict_chunks = []
|
||||||
for chunk in self._chunk_manager.chunk_groups[group_name]:
|
for chunk in self._chunk_manager.chunk_groups[group_name]:
|
||||||
if not chunk.is_empty and chunk.device_type == 'cuda' and chunk.can_move_device:
|
if not chunk.is_empty and chunk.device_type == 'cuda' and chunk.can_move_device:
|
||||||
|
@ -102,7 +102,7 @@ class AutoPlacementPolicy(PlacementPolicy):
|
|||||||
total_cuda_model_data = cuda_capacity - max_cuda_non_model_data_per_period
|
total_cuda_model_data = cuda_capacity - max_cuda_non_model_data_per_period
|
||||||
avail_cuda_model_data = total_cuda_model_data - used_cuda_model_data
|
avail_cuda_model_data = total_cuda_model_data - used_cuda_model_data
|
||||||
freed_cuda_model_data = 0
|
freed_cuda_model_data = 0
|
||||||
end = time()
|
|
||||||
if avail_cuda_model_data < cuda_demand:
|
if avail_cuda_model_data < cuda_demand:
|
||||||
# Move cuda_demand - avail_cuda_model_data volume of tensors
|
# Move cuda_demand - avail_cuda_model_data volume of tensors
|
||||||
# to_free_cuda_model_data = cuda_demand - avail_cuda_model_data
|
# to_free_cuda_model_data = cuda_demand - avail_cuda_model_data
|
||||||
@ -111,7 +111,6 @@ class AutoPlacementPolicy(PlacementPolicy):
|
|||||||
if not warmup:
|
if not warmup:
|
||||||
to_free_chunks = self._sort_can_evict_chunks(tuple(to_free_chunks), compute_idx, tuple(compute_list))
|
to_free_chunks = self._sort_can_evict_chunks(tuple(to_free_chunks), compute_idx, tuple(compute_list))
|
||||||
# print(self._sort_can_evict_chunks.cache_info())
|
# print(self._sort_can_evict_chunks.cache_info())
|
||||||
end = time()
|
|
||||||
for chunk in to_free_chunks:
|
for chunk in to_free_chunks:
|
||||||
if freed_cuda_model_data >= to_free_cuda_model_data:
|
if freed_cuda_model_data >= to_free_cuda_model_data:
|
||||||
break
|
break
|
||||||
@ -121,7 +120,7 @@ class AutoPlacementPolicy(PlacementPolicy):
|
|||||||
raise RuntimeError(
|
raise RuntimeError(
|
||||||
f"Adjust layout failed! No enough CUDA memory! Need {to_free_cuda_model_data}, freed {freed_cuda_model_data}"
|
f"Adjust layout failed! No enough CUDA memory! Need {to_free_cuda_model_data}, freed {freed_cuda_model_data}"
|
||||||
)
|
)
|
||||||
return freed_cuda_model_data, end - start
|
return freed_cuda_model_data, time() - start
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
@functools.lru_cache(maxsize=None)
|
@functools.lru_cache(maxsize=None)
|
||||||
|
@ -225,7 +225,7 @@ class ZeroDDP(ColoDDP):
|
|||||||
self.chunk_manager.exec_lazy_release()
|
self.chunk_manager.exec_lazy_release()
|
||||||
self._setup_grads_ptr()
|
self._setup_grads_ptr()
|
||||||
self._logger.debug(
|
self._logger.debug(
|
||||||
f'layout time: {self.gemini_manager._layout_time}, evict time: {self.gemini_manager._evict_time}, PCIE move vol: {self.gemini_manager._cpu_gpu_move_volume}B'
|
f'comp cuda demand time: {self.gemini_manager._comp_cuda_demand_time}, layout time: {self.gemini_manager._layout_time}, evict time: {self.gemini_manager._evict_time}, CPU->CUDA vol: {self.gemini_manager._h2d_volume}B, CUDA->CPU vol: {self.gemini_manager._d2h_volume}'
|
||||||
)
|
)
|
||||||
self.gemini_manager.post_iter()
|
self.gemini_manager.post_iter()
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user