mirror of
https://github.com/hpcaitech/ColossalAI.git
synced 2025-09-05 19:13:01 +00:00
[hotfix] fix typo in hybrid parallel io (#4697)
This commit is contained in:
@@ -39,7 +39,7 @@ except ImportError:
|
||||
_EXTRA_STATE_KEY_SUFFIX = '_extra_state'
|
||||
|
||||
|
||||
class HypridParallelCheckpointIO(GeneralCheckpointIO):
|
||||
class HybridParallelCheckpointIO(GeneralCheckpointIO):
|
||||
"""
|
||||
CheckpointIO for Hybrid Parallel Training.
|
||||
|
||||
@@ -136,7 +136,7 @@ class HypridParallelCheckpointIO(GeneralCheckpointIO):
|
||||
|
||||
param_id = param_info['param2id'][id(working_param)]
|
||||
original_shape = param_info['param2shape'][id(working_param)]
|
||||
state_ = HypridParallelCheckpointIO.gather_from_sharded_optimizer_state(state,
|
||||
state_ = HybridParallelCheckpointIO.gather_from_sharded_optimizer_state(state,
|
||||
working_param,
|
||||
original_shape=original_shape,
|
||||
dp_group=dp_group,
|
||||
@@ -189,7 +189,7 @@ class HypridParallelCheckpointIO(GeneralCheckpointIO):
|
||||
|
||||
# Then collect the sharded parameters & buffers along tp_group.
|
||||
# Only devices with tp_rank == 0 are responsible for model saving.
|
||||
state_dict_shard = HypridParallelCheckpointIO._model_sharder(model, size_per_shard=size_per_shard)
|
||||
state_dict_shard = HybridParallelCheckpointIO._model_sharder(model, size_per_shard=size_per_shard)
|
||||
weights_name, save_index_file = get_model_base_filenames(prefix, use_safetensors)
|
||||
index_file = CheckpointIndexFile(checkpoint)
|
||||
control_saving = (self.tp_rank == 0)
|
||||
@@ -385,7 +385,7 @@ class HypridParallelCheckpointIO(GeneralCheckpointIO):
|
||||
|
||||
# Then collect the sharded states along dp_group(if using zero)/tp_group.
|
||||
# Only devices with (dp_rank == 0 and tp_rank == 0) are responsible for states saving.
|
||||
state_dict_shard = HypridParallelCheckpointIO._optimizer_sharder(
|
||||
state_dict_shard = HybridParallelCheckpointIO._optimizer_sharder(
|
||||
optimizer,
|
||||
use_zero=self.use_zero,
|
||||
dp_group=self.dp_group,
|
||||
|
Reference in New Issue
Block a user