mirror of
https://github.com/hpcaitech/ColossalAI.git
synced 2025-09-03 01:55:12 +00:00
[moe] support optimizer checkpoint (#5015)
* Refactor MoE Manager setup method * unshard optim ckpt * optim io * update transformer version * update requirements * update ckpt * update ckpt * update ckpt * fix engine * fix engine
This commit is contained in:
@@ -88,7 +88,7 @@ def run_zero_test(local_rank, world_size, stage=1):
|
||||
|
||||
def run_dist(rank, world_size, port):
|
||||
colossalai.launch(config=dict(), rank=rank, world_size=world_size, host="localhost", port=port, backend="nccl")
|
||||
MOE_MANAGER.setup(seed=42, parallel="EP")
|
||||
MOE_MANAGER.setup(parallel="EP")
|
||||
seed_all(42 + rank)
|
||||
run_zero_test(rank, world_size, stage=1)
|
||||
run_zero_test(rank, world_size, stage=2)
|
||||
|
Reference in New Issue
Block a user