mirror of
https://github.com/hpcaitech/ColossalAI.git
synced 2026-01-06 08:04:16 +00:00
[zero] improve adaptability for not-shard parameters (#708)
* adapt post grad hooks for not-shard parameters * adapt optimizer for not-shard parameters * offload gradients for not-replicated parameters
This commit is contained in:
@@ -36,7 +36,7 @@ def run_model_test(enable_autocast, shard_strategy_class):
|
||||
|
||||
# check whether parameters are identical in ddp
|
||||
for name, p in zero_model.named_parameters():
|
||||
if not p.colo_attr.param_is_sharded and p.is_replicated:
|
||||
if not p.colo_attr.param_is_sharded and p.colo_attr.is_replicated:
|
||||
assert_equal_in_group(p.colo_attr.sharded_data_tensor.payload)
|
||||
|
||||
model = MoeModel().half()
|
||||
|
||||
Reference in New Issue
Block a user