mirror of
https://github.com/hpcaitech/ColossalAI.git
synced 2025-09-24 19:17:30 +00:00
[hotfix] moe hybrid parallelism benchmark & follow-up fix (#6048)
* [example] pass use_fp8_comm flag to all plugins * [example] add mixtral benchmark * [moe] refine assertion and check * [moe] fix mixtral & add more tests * [moe] consider checking dp * sp group and moe_dp_group * [mixtral] remove gate tp & add more tests * [deepseek] fix tp & sp for deepseek * [mixtral] minor fix * [deepseek] add deepseek benchmark
This commit is contained in:
@@ -308,7 +308,7 @@ class EPGradScalerIn(torch.autograd.Function):
|
||||
assert len(grad_outputs) == 1
|
||||
grad = grad_outputs[0]
|
||||
if ctx.ep_size != 1:
|
||||
grad = grad * ctx.ep_size
|
||||
grad.mul_(ctx.ep_size)
|
||||
return grad, None
|
||||
|
||||
|
||||
@@ -328,7 +328,7 @@ class EPGradScalerOut(torch.autograd.Function):
|
||||
assert len(grad_outputs) == 1
|
||||
grad = grad_outputs[0]
|
||||
if ctx.ep_size != 1:
|
||||
grad = grad / ctx.ep_size
|
||||
grad.div_(ctx.ep_size)
|
||||
return grad, None
|
||||
|
||||
|
||||
@@ -449,7 +449,4 @@ def all_to_all_uneven(
|
||||
overlap: bool = False,
|
||||
fp8_communication: bool = False,
|
||||
):
|
||||
assert (
|
||||
inputs.requires_grad
|
||||
), "Input must require grad to assure that backward is executed, otherwise it might hang the program."
|
||||
return AllToAllUneven.apply(inputs, input_split_sizes, output_split_sizes, group, overlap, fp8_communication)
|
||||
|
Reference in New Issue
Block a user