[hotfix] fix grad accumulation plus clipping for gemini (#5002)

This commit is contained in:
Baizhou Zhang
2023-11-02 17:59:10 +08:00
committed by GitHub
parent dc003c304c
commit d99b2c961a
4 changed files with 13 additions and 3 deletions

View File

@@ -637,6 +637,7 @@ class Chunk:
# grad chunk is initialized, just reallocate cuda global chunk
self.grad_chunk.cuda_shard = None
self.grad_chunk.is_gathered = True
self.grad_chunk.l2_norm = None
alloc_storage(self.grad_chunk.cuda_global_chunk)
return self.grad_chunk

View File

@@ -343,6 +343,7 @@ class GeminiDDP(ModelWrapper):
grad_chunk = self.chunk_manager.rearrange_accumulated_grad_chunk(chunk)
else:
grad_chunk = chunk.grad_chunk
chunk.grad_chunk.l2_norm = None
# hold -> compute -> hold after bwd
grad_chunk.tensor_trans_state(p, TensorState.COMPUTE)