[gemini] polish code (#855)

This commit is contained in:
HELSON 2022-04-25 10:40:14 +08:00 committed by GitHub
parent 29159d9b5b
commit f0e654558f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 6 additions and 8 deletions

View File

@ -42,7 +42,7 @@ class StatefulTensorMgr(object):
by mem_stats_collector, which should belongs to a Sharded Model. by mem_stats_collector, which should belongs to a Sharded Model.
""" """
# find stateful tensor in state COMPUTE # find stateful tensor in state COMPUTE
cuda_demand = 0 cuda_demand = StatefulTensor.GST_MGR.state_mem['cpu'][TensorState.COMPUTE]
move_to_cuda_tensor_list = [] move_to_cuda_tensor_list = []
hold_cuda_tensor_list = [] hold_cuda_tensor_list = []
for tensor in self._stateful_tensor_list: for tensor in self._stateful_tensor_list:
@ -55,7 +55,6 @@ class StatefulTensorMgr(object):
elif tensor.device.type == 'cpu': elif tensor.device.type == 'cpu':
if tensor.state == TensorState.COMPUTE: if tensor.state == TensorState.COMPUTE:
move_to_cuda_tensor_list.append(tensor) move_to_cuda_tensor_list.append(tensor)
cuda_demand += colo_tensor_mem_usage(tensor.payload)[1]
else: else:
raise RuntimeError raise RuntimeError
self._cpu_gpu_move_volume += self._tensor_placement_policy.evict_tensors(hold_cuda_tensor_list, self._cpu_gpu_move_volume += self._tensor_placement_policy.evict_tensors(hold_cuda_tensor_list,
@ -66,7 +65,7 @@ class StatefulTensorMgr(object):
# move COMPUTE tensors to CUDA # move COMPUTE tensors to CUDA
for t in move_to_cuda_tensor_list: for t in move_to_cuda_tensor_list:
colo_model_data_tensor_move_inline(t, get_current_device()) colo_model_data_tensor_move_inline(t, get_current_device())
self._cpu_gpu_move_volume += t.payload.numel() * t.payload.element_size() self._cpu_gpu_move_volume += t.payload_size
@property @property
def cpu_gpu_move_volume(self): def cpu_gpu_move_volume(self):

View File

@ -76,7 +76,6 @@ class AutoTensorPlacementPolicy(TensorPlacementPolicy):
Returns: Returns:
int: the volume of memory that is evicted int: the volume of memory that is evicted
""" """
volume = 0
cuda_capacity = colo_device_memory_capacity(get_current_device()) cuda_capacity = colo_device_memory_capacity(get_current_device())
used_cuda_model_data = StatefulTensor.GST_MGR.total_mem['cuda'] used_cuda_model_data = StatefulTensor.GST_MGR.total_mem['cuda']
if warmup: if warmup:
@ -88,11 +87,12 @@ class AutoTensorPlacementPolicy(TensorPlacementPolicy):
cuda_capacity *= self._steady_cuda_cap_ratio cuda_capacity *= self._steady_cuda_cap_ratio
total_cuda_model_data = cuda_capacity - max_cuda_non_model_data_per_period total_cuda_model_data = cuda_capacity - max_cuda_non_model_data_per_period
avail_cuda_model_data = total_cuda_model_data - used_cuda_model_data avail_cuda_model_data = total_cuda_model_data - used_cuda_model_data
freed_cuda_model_data = 0
if avail_cuda_model_data < cuda_demand: if avail_cuda_model_data < cuda_demand:
# Move cuda_demand - avail_cuda_model_data volume of tensors # Move cuda_demand - avail_cuda_model_data volume of tensors
# to_free_cuda_model_data = cuda_demand - avail_cuda_model_data # to_free_cuda_model_data = cuda_demand - avail_cuda_model_data
to_free_cuda_model_data = cuda_demand - avail_cuda_model_data to_free_cuda_model_data = cuda_demand - avail_cuda_model_data
freed_cuda_model_data = 0
to_free_tensor_list = hold_cuda_tensor_list to_free_tensor_list = hold_cuda_tensor_list
if not warmup: if not warmup:
next_compute_idx = {t: len(compute_list) for t in hold_cuda_tensor_list} next_compute_idx = {t: len(compute_list) for t in hold_cuda_tensor_list}
@ -104,15 +104,14 @@ class AutoTensorPlacementPolicy(TensorPlacementPolicy):
for t in to_free_tensor_list: for t in to_free_tensor_list:
if freed_cuda_model_data >= to_free_cuda_model_data: if freed_cuda_model_data >= to_free_cuda_model_data:
break break
freed_cuda_model_data += colo_tensor_mem_usage(t)[0] freed_cuda_model_data += t.payload_size
colo_model_data_tensor_move_inline(t, torch.device('cpu')) colo_model_data_tensor_move_inline(t, torch.device('cpu'))
volume += t.payload.numel() * t.payload.element_size()
if freed_cuda_model_data < to_free_cuda_model_data: if freed_cuda_model_data < to_free_cuda_model_data:
raise RuntimeError( raise RuntimeError(
f"Adjust layout failed! No enough CUDA memory! Need {to_free_cuda_model_data}, freed {freed_cuda_model_data}" f"Adjust layout failed! No enough CUDA memory! Need {to_free_cuda_model_data}, freed {freed_cuda_model_data}"
) )
return volume return freed_cuda_model_data
class TensorPlacementPolicyFactory: class TensorPlacementPolicyFactory: