mirror of
https://github.com/hpcaitech/ColossalAI.git
synced 2026-05-01 04:13:59 +00:00
* [fx] modify the calculation of node_size in MetaInfoProp for activation checkpointing usages * [fx] modify the calculation of node_size in MetaInfoProp for activation checkpointing usages * [fx] modify the calculation of node_size in MetaInfoProp for activation checkpointing usages * [fx] merge development into main (#1) * [fx] activation checkpointing using Chen strategies. * [fx] add test for ckpt_solver_chen * [fx] add vanilla activation checkpoint search with test on resnet and densenet * [fx] add a namespace code for solver_chen. * [fx] fix the false interpretation of algorithm 3 in https://arxiv.org/abs/1604.06174. * [fx] fix lowercase naming conventions. * [fx] simplify test for ckpt. * [fx] add rules to linearize computation graphs for searching. (#2) * [fx] modify the calculation of node_size in MetaInfoProp for activation checkpointing usages * [fx] modify the calculation of node_size in MetaInfoProp for activation checkpointing usages * [fx] modify the calculation of node_size in MetaInfoProp for activation checkpointing usages * [fx] merge development into main (#1) * [fx] activation checkpointing using Chen strategies. * [fx] add test for ckpt_solver_chen * [fx] add vanilla activation checkpoint search with test on resnet and densenet * [fx] add a namespace code for solver_chen. * [fx] fix the false interpretation of algorithm 3 in https://arxiv.org/abs/1604.06174. * [fx] fix lowercase naming conventions. * [fx] simplify test for ckpt. * [fx] fix test and algorithm bugs in activation checkpointing. * [fx] polish ckpt_test. * [fx] add rules to linearize computation graphs for searching. * [fx] remove chen_sqrt for sake of simplicity * [fx] remove chen_sqrt for sake of simplicity * [fx] remove chen_sqrt for sake of simplicity * [fx] remove chen_sqrt for sake of simplicity * [fx] fix inconsistencies. * [fx] fix MetaInfoProp. * [fx] fix MetaInfoProp. * [fx] consider MetaInfoProp for inplace operands. * [fx] consider MetaInfoProp for inplace operands. * [fx] consider MetaInfoProp for inplace operands. * [fx] consider MetaInfoProp for inplace operands. * [fx] consider MetaInfoProp for inplace operands. * [fx] add profiler for fx nodes. * [fx] add profiler for fx nodes. * [fx] add profiler for fx nodes. * [fx] add profiler for fx nodes. * [fx] add profiler for fx nodes. * [fx] add profiler for fx nodes. * [fx] add profiler for fx nodes. * [fx] fix error in tests. * [fx] unfix bug. * [fx] unfix bug.
67 lines
2.1 KiB
Python
67 lines
2.1 KiB
Python
from typing import List, Optional, Tuple
|
|
import torch
|
|
from ..registry import meta_profiler_function
|
|
|
|
|
|
@meta_profiler_function.register(torch.nn.functional.instance_norm)
|
|
def torch_nn_func_instancenorm(
|
|
input: torch.Tensor,
|
|
running_mean: Optional[torch.Tensor] = None,
|
|
running_var: Optional[torch.Tensor] = None,
|
|
weight: Optional[torch.Tensor] = None,
|
|
bias: Optional[torch.Tensor] = None,
|
|
use_input_stats: bool = True,
|
|
momentum: float = 0.1,
|
|
eps: float = 1e-5,
|
|
):
|
|
has_affine = weight is not None
|
|
flops = input.numel() * (5 if has_affine else 4)
|
|
macs = 0
|
|
return flops, macs
|
|
|
|
|
|
@meta_profiler_function.register(torch.nn.functional.group_norm)
|
|
def torch_nn_func_groupnorm(input: torch.Tensor,
|
|
num_groups: int,
|
|
weight: Optional[torch.Tensor] = None,
|
|
bias: Optional[torch.Tensor] = None,
|
|
eps: float = 1e-5) -> Tuple[int, int]:
|
|
has_affine = weight is not None
|
|
flops = input.numel() * (5 if has_affine else 4)
|
|
macs = 0
|
|
return flops, macs
|
|
|
|
|
|
@meta_profiler_function.register(torch.nn.functional.layer_norm)
|
|
def torch_nn_func_layernorm(
|
|
input: torch.Tensor,
|
|
normalized_shape: List[int],
|
|
weight: Optional[torch.Tensor] = None,
|
|
bias: Optional[torch.Tensor] = None,
|
|
eps: float = 1e-5,
|
|
) -> Tuple[int, int]:
|
|
has_affine = weight is not None
|
|
flops = input.numel() * (5 if has_affine else 4)
|
|
macs = 0
|
|
return flops, macs
|
|
|
|
|
|
@meta_profiler_function.register(torch.nn.functional.batch_norm)
|
|
def torch_nn_func_batchnorm(
|
|
input: torch.Tensor,
|
|
running_mean: Optional[torch.Tensor],
|
|
running_var: Optional[torch.Tensor],
|
|
weight: Optional[torch.Tensor] = None,
|
|
bias: Optional[torch.Tensor] = None,
|
|
training: bool = False,
|
|
momentum: float = 0.1,
|
|
eps: float = 1e-5,
|
|
) -> Tuple[int, int]:
|
|
has_affine = weight is not None
|
|
if training:
|
|
flops = input.numel() * (2 if has_affine else 1)
|
|
else:
|
|
flops = input.numel() * (5 if has_affine else 4)
|
|
macs = 0
|
|
return flops, macs
|