[zero] sharded model support the reuse of fp16 shard (#495)

* sharded model supports reuse fp16 shard * rename variable * polish code * polish code * polish code
2025-10-25 01:40:08 +00:00 · 2022-03-23 14:59:59 +08:00
parent f24b5ed201
commit 9ec1ce6ab1
7 changed files with 62 additions and 42 deletions
--- a/tests/test_zero_data_parallel/test_sharded_optim_v2.py
+++ b/tests/test_zero_data_parallel/test_sharded_optim_v2.py
@@ -18,7 +18,7 @@ from colossalai.zero.sharded_optim._utils import has_inf_or_nan
 from tests.components_to_test.registry import non_distributed_component_funcs
 from torch.nn.parallel import DistributedDataParallel as DDP

-from common import CONFIG, check_sharded_params_padding
+from common import CONFIG, check_sharded_model_params


 def _run_step(model, optimizer, data, label, criterion, enable_autocast=False):
@@ -65,7 +65,8 @@ def _run_test_sharded_optim_v2(cpu_offload, shard_strategy_class, use_cpuadam, g
        zero_model = ShardedModelV2(zero_model,
                                    shard_strategy,
                                    offload_config=dict(device='cpu') if cpu_offload else None,
-                                    use_memory_tracer=gpu_margin_mem_ratio > 0.0)
+                                    use_memory_tracer=gpu_margin_mem_ratio > 0.0,
+                                    reuse_fp16_shard=use_cpuadam)

        model = model_builder(checkpoint=True).half()
        col_model_deepcopy(zero_model, model)
@@ -92,7 +93,7 @@ def _run_test_sharded_optim_v2(cpu_offload, shard_strategy_class, use_cpuadam, g
            data, label = data.cuda(), label.cuda()
            _run_step(apex_model, apex_optimizer, data, label, criterion, False)
            _run_step(zero_model, sharded_optim, data, label, criterion, False)
-            check_sharded_params_padding(model, zero_model, loose=True)
+            check_sharded_model_params(model, zero_model, loose=True, reuse_fp16_shard=use_cpuadam)
            for param in model.parameters():
                assert not has_inf_or_nan(param)