fix typo colossalai/auto_parallel autochunk fx/passes etc. (#3808)

This commit is contained in:
digger yu
2023-05-24 09:01:50 +08:00
committed by GitHub
parent 725365f297
commit 7f8203af69
19 changed files with 31 additions and 31 deletions

View File

@@ -148,7 +148,7 @@ class MetaInfoProp:
graph_info.fwd_tmp = buffer_tensors
graph_info.fwd_out = output_tensors
# fetch other memory informations
# fetch other memory information
memory_cost = meta_info.memory_cost
graph_info.fwd_mem_tmp = memory_cost.fwd.temp
graph_info.fwd_mem_out = memory_cost.fwd.activation

View File

@@ -44,7 +44,7 @@ class BatchNormStrategyGenerator(StrategyGenerator):
'''
Compute the computation cost per device with this specific strategy.
Note: compute_cost need to be devided by TFLOPS, now it just shows the computation size.
Note: compute_cost need to be divided by TFLOPS, now it just shows the computation size.
'''
# TODO: a constant coefficient need to be added.
# 1D: (L) * N * Cin

View File

@@ -38,9 +38,9 @@ class ConvStrategyGenerator(StrategyGenerator):
'''
Compute the computation cost per device with this specific strategy.
Note: compute_cost need to be devided by TFLOPS, now it just shows the computation size.
Note: compute_cost need to be divided by TFLOPS, now it just shows the computation size.
'''
# TODO: compute_cost need to be devided by TFLOPS, now it just shows the computation size.
# TODO: compute_cost need to be divided by TFLOPS, now it just shows the computation size.
# 1D: (L) * N * Cout * Cin * kernel
# 2D: (H * W) * N * Cout * Cin * kernel
# 3D: (H * W * D) * N * Cout * Cin * kernel

View File

@@ -34,9 +34,9 @@ class LayerNormGenerator(StrategyGenerator):
'''
Compute the computation cost per device with this specific strategy.
Note: compute_cost need to be devided by TFLOPS, now it just shows the computation size.
Note: compute_cost need to be divided by TFLOPS, now it just shows the computation size.
'''
# TODO: compute_cost need to be devided by TFLOPS, now it just shows the computation size.
# TODO: compute_cost need to be divided by TFLOPS, now it just shows the computation size.
# TODO: a constant coefficient need to be added.
sharded_input_shape = strategy.sharding_specs[self.op_data['input']].get_sharded_shape_per_device()

View File

@@ -17,7 +17,7 @@ class NormalPoolStrategyGenerator(StrategyGenerator):
"""
NormalPoolStrategyGenerator is a generic class to generate strategies for pool operation like MaxPoolxd.
The reason we call this normal pool is AvgPoolxd and MaxPoolxd are taking the kernel size element from image,
and reduce them depening on the operation type.
and reduce them depending on the operation type.
"""
def validate(self) -> bool:
@@ -35,9 +35,9 @@ class NormalPoolStrategyGenerator(StrategyGenerator):
'''
Compute the computation cost per device with this specific strategy.
Note: compute_cost need to be devided by TFLOPS, now it just shows the computation size.
Note: compute_cost need to be divided by TFLOPS, now it just shows the computation size.
'''
# TODO: compute_cost need to be devided by TFLOPS, now it just shows the computation size.
# TODO: compute_cost need to be divided by TFLOPS, now it just shows the computation size.
# 1D: (Lout) * N * C * kernel
# 2D: (H * W) * N * Cout * Cin * kernel
# 3D: (H * W * D) * N * Cout * Cin * kernel