mirror of
https://github.com/hpcaitech/ColossalAI.git
synced 2025-09-01 17:17:05 +00:00
[hotfix] fix grad accumulation plus clipping for gemini (#5002)
This commit is contained in:
@@ -637,6 +637,7 @@ class Chunk:
|
||||
# grad chunk is initialized, just reallocate cuda global chunk
|
||||
self.grad_chunk.cuda_shard = None
|
||||
self.grad_chunk.is_gathered = True
|
||||
self.grad_chunk.l2_norm = None
|
||||
alloc_storage(self.grad_chunk.cuda_global_chunk)
|
||||
|
||||
return self.grad_chunk
|
||||
|
@@ -343,6 +343,7 @@ class GeminiDDP(ModelWrapper):
|
||||
grad_chunk = self.chunk_manager.rearrange_accumulated_grad_chunk(chunk)
|
||||
else:
|
||||
grad_chunk = chunk.grad_chunk
|
||||
chunk.grad_chunk.l2_norm = None
|
||||
|
||||
# hold -> compute -> hold after bwd
|
||||
grad_chunk.tensor_trans_state(p, TensorState.COMPUTE)
|
||||
|
Reference in New Issue
Block a user