[fix] fix typo s/muiti-node /multi-node etc. (#5448)

This commit is contained in:
digger yu
2024-04-07 18:42:15 +08:00
committed by GitHub
parent 15055f9a36
commit a799ca343b
10 changed files with 16 additions and 16 deletions

View File

@@ -96,7 +96,7 @@ def set_openmoe_args(
load_balance_beam_width (int, optional): Expert load balance search's beam width. Defaults to 8.
load_balance_group_swap_factor (float, optional): Expert load balance group swap factor. Longer value encourages less swap. Defaults to 0.4.
enable_kernel (bool, optional): Use kernel optimization. Defaults to False.
enable_comm_overlap (bool, optional): Use communication overlap for MoE. Recommended to enable for muiti-node training. Defaults to False.
enable_comm_overlap (bool, optional): Use communication overlap for MoE. Recommended to enable for multi-node training. Defaults to False.
enable_hierarchical_alltoall (bool, optional): Use hierarchical alltoall for MoE. Defaults to False.
"""
moe_args = dict(

View File

@@ -190,13 +190,13 @@ def parse_args():
parser.add_argument(
"--comm_overlap",
action="store_true",
help="Use communication overlap for MoE. Recommended to enable for muiti-node training.",
help="Use communication overlap for MoE. Recommended to enable for multi-node training.",
)
# hierarchical all-to-all
parser.add_argument(
"--hierarchical_alltoall",
action="store_true",
help="Use hierarchical all-to-all for MoE. Recommended to enable for muiti-node training.",
help="Use hierarchical all-to-all for MoE. Recommended to enable for multi-node training.",
)
args = parser.parse_args()
@@ -366,7 +366,7 @@ def main():
):
coordinator.print_on_master(f"Apply load balance")
apply_load_balance(model, optimizer)
# save ckeckpoint
# save checkpoint
if (step + 1) % args.save_interval == 0:
coordinator.print_on_master(f"Saving model checkpoint to {args.output_path}")
booster.save_model(model, args.output_path, shard=True)