mirror of
https://github.com/hpcaitech/ColossalAI.git
synced 2025-09-05 02:51:59 +00:00
[moe] merge moe into main (#4978)
* update moe module * support openmoe
This commit is contained in:
@@ -16,7 +16,6 @@ from torch.optim.optimizer import Optimizer
|
||||
from torch.utils.data import DataLoader
|
||||
|
||||
from colossalai.context import Config, ConfigException
|
||||
from colossalai.context.moe_context import MOE_CONTEXT
|
||||
from colossalai.interface import OptimizerWrapper
|
||||
from colossalai.legacy.amp import AMP_TYPE, convert_to_amp
|
||||
from colossalai.legacy.amp.naive_amp import NaiveAMPModel
|
||||
@@ -36,7 +35,6 @@ from colossalai.legacy.zero import ShardedOptimizerV2, convert_to_zero_v2
|
||||
from colossalai.legacy.zero.gemini.ophooks import BaseOpHook
|
||||
from colossalai.logging import get_dist_logger
|
||||
from colossalai.utils import get_current_device
|
||||
from colossalai.utils.moe import sync_moe_model_param
|
||||
|
||||
|
||||
def get_default_parser():
|
||||
@@ -323,8 +321,6 @@ def initialize(
|
||||
if not use_zero:
|
||||
if is_using_sequence():
|
||||
sync_model_param(model, ParallelMode.SEQUENCE_DP)
|
||||
elif MOE_CONTEXT.is_initialized:
|
||||
sync_moe_model_param(model)
|
||||
elif is_using_ddp():
|
||||
sync_model_param(model, ParallelMode.DATA)
|
||||
else:
|
||||
@@ -377,14 +373,6 @@ def initialize(
|
||||
"added even though not specified in the configuration",
|
||||
ranks=[0],
|
||||
)
|
||||
elif is_using_ddp() and MOE_CONTEXT.is_initialized:
|
||||
gradient_handler_cfg = [dict(type="MoeGradientHandler")]
|
||||
if verbose:
|
||||
logger.info(
|
||||
"Data parallel training is detected with moe parallel, MoeGradientHandler is automatically "
|
||||
"added even though not specified in the configuration",
|
||||
ranks=[0],
|
||||
)
|
||||
elif is_using_sequence():
|
||||
model = DDP(
|
||||
model,
|
||||
|
Reference in New Issue
Block a user