add moe context, moe utilities and refactor gradient handler (#455)

This commit is contained in:
HELSON
2022-03-18 16:38:32 +08:00
committed by GitHub
parent af185b5519
commit 84fd7c1d4d
11 changed files with 255 additions and 125 deletions

View File

@@ -9,7 +9,6 @@ import torch
import torch.distributed as dist
from colossalai.constants import ALLOWED_MODES, INITIALIZER_MAPPING
from colossalai.context.config import Config
from colossalai.global_variables import moe_env
from colossalai.global_variables import tensor_parallel_env as env
from colossalai.logging import get_dist_logger
from colossalai.registry import DIST_GROUP_INITIALIZER
@@ -407,13 +406,6 @@ class ParallelContext:
# add this config to initialize later
pg_init.append(dict(type=INITIALIZER_MAPPING[tensor_parallel_mode.lower()], **tensor_parallel_cfg))
# initialization for moe environment
if parallel_config is not None and 'moe' in parallel_config:
param = parallel_config['moe']
assert 'size' in param, "Moe model parallel size should be given"
moe_env.setup(param['size'])
pg_init.append(dict(type=INITIALIZER_MAPPING['moe']))
# run initialization of different process groups
for initializer_cfg in pg_init:
cfg = initializer_cfg.copy()