mirror of
https://github.com/hpcaitech/ColossalAI.git
synced 2025-09-07 12:01:39 +00:00
[misc] update pre-commit and run all files (#4752)
* [misc] update pre-commit * [misc] run pre-commit * [misc] remove useless configuration files * [misc] ignore cuda for clang-format
This commit is contained in:
@@ -25,28 +25,32 @@ def elementwise_meta_info(temp_mem_scale: float = 0, buffer_mem_scale: float = 0
|
||||
def meta_func(*args, **kwargs) -> Tuple[TrainCycleItem, TrainCycleItem, List[torch.Tensor]]:
|
||||
input_tensor = next(
|
||||
filter(
|
||||
lambda x:
|
||||
(x.type == OperationDataType.ARG or x.type == OperationDataType.PARAM) and x.name != 'softmax_dim',
|
||||
args)).data
|
||||
lambda x: (x.type == OperationDataType.ARG or x.type == OperationDataType.PARAM)
|
||||
and x.name != "softmax_dim",
|
||||
args,
|
||||
)
|
||||
).data
|
||||
output_tensor = next(filter(lambda x: x.type == OperationDataType.OUTPUT, args)).data
|
||||
is_inplace = 1 if kwargs.get('inplace', False) else 0
|
||||
is_inplace = 1 if kwargs.get("inplace", False) else 0
|
||||
|
||||
flop_counter = elementwise_flop_counter(1, 0)
|
||||
# calculate compute cost
|
||||
fwd_compute_cost = flop_counter([input_tensor], [output_tensor])
|
||||
bwd_compute_cost = flop_counter([output_tensor], [input_tensor])
|
||||
|
||||
compute_cost = TrainCycleItem(fwd=fwd_compute_cost,
|
||||
bwd=bwd_compute_cost,
|
||||
total=fwd_compute_cost + bwd_compute_cost)
|
||||
compute_cost = TrainCycleItem(
|
||||
fwd=fwd_compute_cost, bwd=bwd_compute_cost, total=fwd_compute_cost + bwd_compute_cost
|
||||
)
|
||||
|
||||
# calculate memory cost
|
||||
# NOTE: currently in SPMD solver we always believe that there will be a new tensor created in forward
|
||||
# NOTE: if in_place is True, we will not create a new tensor in forward
|
||||
fwd_memory_cost = MemoryCost(activation=activation_size(input_tensor) * (2 - is_inplace),
|
||||
parameter=0,
|
||||
temp=0,
|
||||
buffer=activation_size(input_tensor) * buffer_mem_scale)
|
||||
fwd_memory_cost = MemoryCost(
|
||||
activation=activation_size(input_tensor) * (2 - is_inplace),
|
||||
parameter=0,
|
||||
temp=0,
|
||||
buffer=activation_size(input_tensor) * buffer_mem_scale,
|
||||
)
|
||||
|
||||
# temp_mem_scale is for situation like softmax backward
|
||||
# the buffer will be removed during backward phase
|
||||
@@ -54,20 +58,23 @@ def elementwise_meta_info(temp_mem_scale: float = 0, buffer_mem_scale: float = 0
|
||||
activation=activation_size(input_tensor) - activation_size(input_tensor) * buffer_mem_scale,
|
||||
parameter=0,
|
||||
temp=activation_size(input_tensor) * temp_mem_scale + activation_size(input_tensor) * buffer_mem_scale,
|
||||
buffer=0)
|
||||
buffer=0,
|
||||
)
|
||||
|
||||
# total cost is the sum of forward and backward cost
|
||||
total_cost = MemoryCost(activation=fwd_memory_cost.activation + bwd_memory_cost.activation,
|
||||
parameter=fwd_memory_cost.parameter + bwd_memory_cost.parameter,
|
||||
temp=fwd_memory_cost.temp + bwd_memory_cost.temp,
|
||||
buffer=fwd_memory_cost.buffer + bwd_memory_cost.buffer)
|
||||
total_cost = MemoryCost(
|
||||
activation=fwd_memory_cost.activation + bwd_memory_cost.activation,
|
||||
parameter=fwd_memory_cost.parameter + bwd_memory_cost.parameter,
|
||||
temp=fwd_memory_cost.temp + bwd_memory_cost.temp,
|
||||
buffer=fwd_memory_cost.buffer + bwd_memory_cost.buffer,
|
||||
)
|
||||
|
||||
memory_cost = TrainCycleItem(fwd=fwd_memory_cost, bwd=bwd_memory_cost, total=total_cost)
|
||||
|
||||
# store fwd_in, fwd_buffer, fwd_out
|
||||
fwd_in = []
|
||||
fwd_buffer = [torch.zeros_like(output_tensor, device='meta')]
|
||||
fwd_out = [torch.zeros_like(output_tensor, device='meta')]
|
||||
fwd_buffer = [torch.zeros_like(output_tensor, device="meta")]
|
||||
fwd_out = [torch.zeros_like(output_tensor, device="meta")]
|
||||
|
||||
return compute_cost, memory_cost, fwd_in, fwd_buffer, fwd_out
|
||||
|
||||
|
Reference in New Issue
Block a user