mirror of
https://github.com/hpcaitech/ColossalAI.git
synced 2025-05-28 01:53:37 +00:00
* [fx] modify the calculation of node_size in MetaInfoProp for activation checkpointing usages * [fx] modify the calculation of node_size in MetaInfoProp for activation checkpointing usages * [fx] modify the calculation of node_size in MetaInfoProp for activation checkpointing usages * [fx] merge development into main (#1) * [fx] activation checkpointing using Chen strategies. * [fx] add test for ckpt_solver_chen * [fx] add vanilla activation checkpoint search with test on resnet and densenet * [fx] add a namespace code for solver_chen. * [fx] fix the false interpretation of algorithm 3 in https://arxiv.org/abs/1604.06174. * [fx] fix lowercase naming conventions. * [fx] simplify test for ckpt. * [fx] add rules to linearize computation graphs for searching. (#2) * [fx] modify the calculation of node_size in MetaInfoProp for activation checkpointing usages * [fx] modify the calculation of node_size in MetaInfoProp for activation checkpointing usages * [fx] modify the calculation of node_size in MetaInfoProp for activation checkpointing usages * [fx] merge development into main (#1) * [fx] activation checkpointing using Chen strategies. * [fx] add test for ckpt_solver_chen * [fx] add vanilla activation checkpoint search with test on resnet and densenet * [fx] add a namespace code for solver_chen. * [fx] fix the false interpretation of algorithm 3 in https://arxiv.org/abs/1604.06174. * [fx] fix lowercase naming conventions. * [fx] simplify test for ckpt. * [fx] fix test and algorithm bugs in activation checkpointing. * [fx] polish ckpt_test. * [fx] add rules to linearize computation graphs for searching. * [fx] remove chen_sqrt for sake of simplicity * [fx] remove chen_sqrt for sake of simplicity * [fx] remove chen_sqrt for sake of simplicity * [fx] remove chen_sqrt for sake of simplicity * [fx] fix inconsistencies. * [fx] fix MetaInfoProp. * [fx] fix MetaInfoProp. * [fx] consider MetaInfoProp for inplace operands. * [fx] consider MetaInfoProp for inplace operands. * [fx] consider MetaInfoProp for inplace operands. * [fx] consider MetaInfoProp for inplace operands. * [fx] consider MetaInfoProp for inplace operands. * [fx] add profiler for fx nodes. * [fx] add profiler for fx nodes. * [fx] add profiler for fx nodes. * [fx] add profiler for fx nodes. * [fx] add profiler for fx nodes. * [fx] add profiler for fx nodes. * [fx] add profiler for fx nodes. * [fx] fix error in tests. * [fx] unfix bug. * [fx] unfix bug. * [fx] patch more modules and functions. * [fx] change name of utils.py to profiler.py * [fx] add profiler for rnn. * [fx] add profiler for rnn. * [fx] polish and add more patch for profiler. * [fx] polish and add more patch for profiler.
61 lines
2.2 KiB
Python
61 lines
2.2 KiB
Python
from functools import reduce
|
|
import operator
|
|
from typing import Any, Optional, Tuple
|
|
import torch
|
|
from ..registry import meta_profiler_function
|
|
|
|
|
|
@meta_profiler_function.register(torch.arange)
|
|
@meta_profiler_function.register(torch.finfo)
|
|
@meta_profiler_function.register(torch.permute)
|
|
@meta_profiler_function.register(torch.Tensor.permute)
|
|
@meta_profiler_function.register(torch.Tensor.repeat)
|
|
@meta_profiler_function.register(torch.index_select)
|
|
@meta_profiler_function.register(torch.Tensor.index_select)
|
|
@meta_profiler_function.register(torch.squeeze)
|
|
@meta_profiler_function.register(torch.Tensor.squeeze)
|
|
@meta_profiler_function.register(torch.unsqueeze)
|
|
@meta_profiler_function.register(torch.Tensor.unsqueeze)
|
|
@meta_profiler_function.register(torch.cat)
|
|
@meta_profiler_function.register(torch.concat)
|
|
@meta_profiler_function.register(torch.repeat_interleave)
|
|
@meta_profiler_function.register(torch.Tensor.repeat_interleave)
|
|
@meta_profiler_function.register(torch.flatten)
|
|
@meta_profiler_function.register(torch.Tensor.flatten)
|
|
@meta_profiler_function.register(torch.roll)
|
|
@meta_profiler_function.register(torch.full)
|
|
@meta_profiler_function.register(torch.Tensor.cpu)
|
|
@meta_profiler_function.register(torch.Tensor.cuda)
|
|
@meta_profiler_function.register(torch._assert)
|
|
def torch_zero_flops_op(*args, **kwargs) -> Tuple[int, int]:
|
|
flops = 0
|
|
macs = 0
|
|
return flops, macs
|
|
|
|
|
|
@meta_profiler_function.register(torch.where)
|
|
def torch_where(condition: torch.Tensor, x: Any, y: Any) -> Tuple[int, int]:
|
|
# torch.where returns the broadcasted tensor of condition, x, and y,
|
|
# so hack it by using addition
|
|
flops = condition.numel()
|
|
macs = 0
|
|
return flops, macs
|
|
|
|
|
|
@meta_profiler_function.register(torch.max)
|
|
def torch_max(input: torch.Tensor,
|
|
dim: int = None,
|
|
keepdim: bool = False,
|
|
*,
|
|
out: Optional[torch.Tensor] = None) -> Tuple[int, int]:
|
|
macs = 0
|
|
assert out is None, 'assigning value to out is not supported yet'
|
|
if dim is not None:
|
|
shape = list(input.shape)
|
|
shape.pop(int(dim))
|
|
flops = reduce(operator.mul, shape), macs
|
|
return flops, macs
|
|
else:
|
|
flops = input.numel()
|
|
return flops, macs
|