mirror of
https://github.com/hpcaitech/ColossalAI.git
synced 2025-10-07 10:04:52 +00:00
[moe] support optimizer checkpoint (#5015)
* Refactor MoE Manager setup method * unshard optim ckpt * optim io * update transformer version * update requirements * update ckpt * update ckpt * update ckpt * fix engine * fix engine
This commit is contained in:
@@ -41,7 +41,7 @@ def fsdp_main(rank, world_size, args):
|
||||
# initialize the process group
|
||||
dist.init_process_group("nccl")
|
||||
|
||||
MOE_MANAGER.setup(seed=42, parallel=None)
|
||||
MOE_MANAGER.setup(parallel=None)
|
||||
|
||||
dp_size = dist.get_world_size()
|
||||
dataset = RandomDataset(
|
||||
|
Reference in New Issue
Block a user