[Test/CI] remove test cases to reduce CI duration (#5753)

* [test] smaller gpt2 test case * [test] reduce test cases: tests/test_zero/test_gemini/test_zeroddp_state_dict.py * [test] reduce test cases: tests/test_zero/test_gemini/test_grad_accum.py * [test] reduce test cases tests/test_zero/test_gemini/test_optim.py * Revert "[test] smaller gpt2 test case" Some tests might depend on the size of model (num of chunks) This reverts commit df705a5210. * [test] reduce test cases: tests/test_checkpoint_io/test_gemini_checkpoint_io.py * [CI] smaller test model for two mwo the two modifid cases * [CI] hardcode gpt model for tests/test_zero/test_gemini/test_search.py since we need a fixed answer there
2025-09-16 14:41:53 +00:00 · 2024-06-05 11:29:04 +08:00
parent 79f7a7b211
commit 80c3c8789b
6 changed files with 40 additions and 76 deletions
--- a/tests/test_zero/test_gemini/test_grad_accum.py
+++ b/tests/test_zero/test_gemini/test_grad_accum.py
@@ -15,9 +15,7 @@ from colossalai.zero.gemini.chunk import search_chunk_configuration
 from tests.kit.model_zoo import model_zoo, run_fwd

 PLACEMENT_CONFIGS = [
-    {"placement_policy": "static", "shard_param_frac": 0.0},  # zero2
-    {"placement_policy": "static", "shard_param_frac": 1.0},  # zero3
-    {"placement_policy": "static", "shard_param_frac": 0.5},  # zero3-half
+    {"placement_policy": "static", "shard_param_frac": 0.75},
    {"placement_policy": "auto"},
 ]

@@ -109,7 +107,7 @@ def exam_gemini_grad_acc(
    torch_model = DDP(torch_model, device_ids=[rank])

    set_seed(rank)
-    accum_iter = 4
+    accum_iter = 2
    train_dataloader = DummyDataloader(data_gen_fn)
    for i, data in enumerate(train_dataloader):
        delay_unscale = False if (i + 1) % accum_iter == 0 else True