From 4b048a8728f27c88add61c824e5f6ff10125439a Mon Sep 17 00:00:00 2001 From: ver217 Date: Wed, 13 Apr 2022 22:36:11 +0800 Subject: [PATCH] fix prepare grads in sharded optim (#749) --- colossalai/zero/sharded_optim/sharded_optim_v2.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/colossalai/zero/sharded_optim/sharded_optim_v2.py b/colossalai/zero/sharded_optim/sharded_optim_v2.py index acf3d5904..c3c1723d2 100644 --- a/colossalai/zero/sharded_optim/sharded_optim_v2.py +++ b/colossalai/zero/sharded_optim/sharded_optim_v2.py @@ -288,6 +288,8 @@ class ShardedOptimizerV2(ColossalaiOptimizer): def _prepare_grads(self): for group in self.optim.param_groups: for p in group['params']: + if p.colo_attr.saved_grad.is_null(): + continue p.colo_attr.saved_grad.trans_state(TensorState.COMPUTE) # FIXME(ver217): p.data here is an empty tensor on CUDA and has no useful infomation # If we change p.grad directly