mirror of
https://github.com/hpcaitech/ColossalAI.git
synced 2025-09-06 19:40:28 +00:00
[moe] support optimizer checkpoint (#5015)
* Refactor MoE Manager setup method * unshard optim ckpt * optim io * update transformer version * update requirements * update ckpt * update ckpt * update ckpt * fix engine * fix engine
This commit is contained in:
@@ -76,7 +76,7 @@ def run_zero_optim_test(local_rank, world_size, stage=1):
|
||||
|
||||
def run_dist(rank, world_size, port):
|
||||
colossalai.launch(config=dict(), rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl")
|
||||
MOE_MANAGER.setup(seed=42, parallel="EP")
|
||||
MOE_MANAGER.setup(parallel="EP")
|
||||
run_zero_optim_test(rank, world_size, stage=1)
|
||||
run_zero_optim_test(rank, world_size, stage=2)
|
||||
|
||||
|
Reference in New Issue
Block a user