[log] local throughput metrics (#811)

* Revert "[zero] add ZeroTensorShardStrategy (#793)"

This reverts commit 88759e289e.

* [gemini] set cpu memory capacity

* [log] local throughput collecting

* polish

* polish

* polish

* polish code

* polish
This commit is contained in:
Jiarui Fang
2022-04-20 10:05:39 +08:00
committed by GitHub
parent dd92b90a68
commit 61c20b44bc
3 changed files with 21 additions and 11 deletions

View File

@@ -23,10 +23,10 @@ def convert_to_zero_v2(model: nn.Module, optimizer: torch.optim.Optimizer, model
logger = get_dist_logger('convert_to_zero_v2')
logger.info(f'optimizer_config is {optimizer_config}')
logger.info(f'optimizer_config is {optimizer_config}', ranks=[0])
if optimizer_config is None:
optimizer_config = dict()
logger.info(f'model_config is {model_config}')
logger.info(f'model_config is {model_config}', ranks=[0])
if model_config is None:
model_config = dict()

View File

@@ -122,7 +122,8 @@ class ShardedOptimizerV2(ColossalaiOptimizer):
self._register_master_weight()
if self.gpu_margin_mem_ratio != 0.0 and not isinstance(sharded_model._tensor_placement_policy,
AutoTensorPlacementPolicy):
self._logger.warning(f'gpu_margin_mem_ratio is meaningless when tensor_placement_policy is not "auto"')
self._logger.warning(f'gpu_margin_mem_ratio is meaningless when tensor_placement_policy is not "auto"',
ranks=[0])
if self._verbose:
self._logger.debug(