[devops] update torch version of CI (#3725)

* [test] fix flop tensor test

* [test] fix autochunk test

* [test] fix lazyinit test

* [devops] update torch version of CI

* [devops] enable testmon

* [devops] fix ci

* [devops] fix ci

* [test] fix checkpoint io test

* [test] fix cluster test

* [test] fix timm test

* [devops] fix ci

* [devops] fix ci

* [devops] fix ci

* [devops] fix ci

* [devops] force sync to test ci

* [test] skip fsdp test
This commit is contained in:
Hongxin Liu
2023-05-15 17:20:56 +08:00
committed by GitHub
parent b37797ed3d
commit afb239bbf8
17 changed files with 74 additions and 46 deletions

View File

@@ -39,10 +39,10 @@ def check_low_level_zero_checkpointIO(stage: int):
ckpt_io = LowLevelZeroCheckpointIO()
ckpt_io.save_optimizer(optimizer, optimizer_ckpt_tempfile.name)
new_model = resnet18()
new_optimizer = HybridAdam((new_model.parameters()), lr=0.001)
_, new_optimizer, _, _, _ = booster.boost(new_model, new_optimizer)
if ckpt_io.coordinator.is_master():
new_model = resnet18()
new_optimizer = HybridAdam((new_model.parameters()), lr=0.001)
_, new_optimizer, _, _, _ = booster.boost(new_model, new_optimizer)
ckpt_io.load_optimizer(new_optimizer, optimizer_ckpt_tempfile.name)
check_state_dict_equal(optimizer.state_dict(), new_optimizer.state_dict(), False)

View File

@@ -40,12 +40,12 @@ def check_torch_ddp_checkpointIO():
ckpt_io.save_optimizer(optimizer, optimizer_ckpt_tempfile.name)
ckpt_io.save_lr_scheduler(scheduler, lr_scheduler_ckpt_tempfile.name)
if ckpt_io.coordinator.is_master():
new_model = resnet18()
new_optimizer = SGD((new_model.parameters()), lr=0.001)
new_scheduler = torch.optim.lr_scheduler.StepLR(new_optimizer, step_size=1, gamma=0.1)
_, new_optimizer, _, _, new_scheduler = booster.boost(new_model, new_optimizer, lr_scheduler=new_scheduler)
new_model = resnet18()
new_optimizer = SGD((new_model.parameters()), lr=0.001)
new_scheduler = torch.optim.lr_scheduler.StepLR(new_optimizer, step_size=1, gamma=0.1)
_, new_optimizer, _, _, new_scheduler = booster.boost(new_model, new_optimizer, lr_scheduler=new_scheduler)
if ckpt_io.coordinator.is_master():
ckpt_io.load_optimizer(new_optimizer, optimizer_ckpt_tempfile.name)
check_state_dict_equal(optimizer.state_dict(), new_optimizer.state_dict(), False)