[pipeline/chimera] test chimera | fix bug of initializing (#1615)

* [pipeline/tuning] improve dispatch performance both time and space cost * [pipeline/converge] add interface for testing convergence * [NFC] polish colossalai/utils/multi_tensor_apply/multi_tensor_apply.py code style * Update PipelineBase.py * [pipeline/chimera] reconstruct PipelineBase and Worker to support more feasible custom schedule | finish Chimera * [pipeline/chimera] test chimera | fix bug of initializing
2025-12-05 07:34:25 +00:00 · 2022-09-20 18:00:39 +08:00
parent 504ff1d101
commit 170fa81095
13 changed files with 342 additions and 144 deletions
--- a/tests/test_pipeline/test_cuda_rpc_optimizer.py
+++ b/tests/test_pipeline/test_cuda_rpc_optimizer.py
@@ -7,6 +7,16 @@ from colossalai.pipeline.rpc._pipeline_schedule import FillDrainPipelineEngine,
 from colossalai.testing import assert_close
 from rpc_test_utils import rpc_run, parse_args, RpcTestModel

+# global variable for model created
+feat_num = 100
+h = 100
+
+
+def partition(pp_rank: int, chunk: int, stage_num: int):
+    torch.manual_seed(1024)
+    partition = RpcTestModel(pp_rank, stage_num, feat_num, h)
+    return partition
+

 def run_master(args):
    torch.manual_seed(100)
@@ -20,20 +30,14 @@ def run_master(args):
    optimizer_class = globals()[args.optimizer]

    lr = 1e-3
-
    sample_num = 1024
-    feat_num = 100
-    h = 100
    batch_size = 1024

    assert sample_num % batch_size == 0
-    batch_num = sample_num // batch_size

    input_sample = torch.randn((sample_num, feat_num), device=device)

-    module_partitions = [RpcTestModel(pp_rank, actual_stage_num, feat_num, h) for pp_rank in range(actual_stage_num)]
-
-    engine = OneFOneBPipelineEngine(module_partitions=module_partitions,
+    engine = OneFOneBPipelineEngine(partition_fn=partition,
                                    stage_num=stage_num,
                                    num_microbatches=num_microbatches,
                                    device=device,
@@ -55,7 +59,8 @@ def run_master(args):
            cuda_rpc_result.append(p)

    # compute forward result and backward grad of parameters just in rank_0
-    test_model = nn.Sequential(*module_partitions).to(device)
+    test_model = nn.Sequential(
+        *[partition(pp_rank, chunk, actual_stage_num) for pp_rank in range(actual_stage_num)]).to(device)
    optimizer: Optimizer = optimizer_class(test_model.parameters(), lr=lr)
    input_sample = input_sample.requires_grad_()
    out_val = test_model(input_sample).sum()