mirror of
https://github.com/hpcaitech/ColossalAI.git
synced 2025-09-19 00:16:51 +00:00
[moe] full test for deepseek and mixtral (pp + sp to fix)
This commit is contained in:
@@ -100,7 +100,7 @@ class BucketStore(BaseStore):
|
||||
|
||||
return self._grad_in_bucket
|
||||
|
||||
def get_flatten_grad(self, dtype=None) -> Tensor:
|
||||
def get_flatten_grad(self) -> Tensor:
|
||||
"""Return the flattened gradients slices in the bucket, the data organization of the flattened tensor:
|
||||
[grad0_rank0, grad1_rank0, ..., grad_0_rank1, grad1_rank1, ....]
|
||||
|
||||
|
@@ -303,7 +303,7 @@ class LowLevelZeroOptimizer(OptimizerWrapper):
|
||||
for bucket_store in self.pg_to_bucket_store.values():
|
||||
bucket_store.build_grad_in_bucket()
|
||||
|
||||
flat_grads = bucket_store.get_flatten_grad(self._dtype)
|
||||
flat_grads = bucket_store.get_flatten_grad()
|
||||
flat_grads /= bucket_store.world_size
|
||||
|
||||
# ready to add other tensors to bucket
|
||||
|
Reference in New Issue
Block a user