add interleaved pipeline, fix naive amp and update pipeline model initializer (#80)

This commit is contained in:
ver217
2021-12-20 23:26:19 +08:00
committed by GitHub
parent 91c327cb44
commit 8f02a88db2
17 changed files with 544 additions and 170 deletions

View File

@@ -5,7 +5,7 @@ import torch
import torch.multiprocessing as mp
from torch.utils.data import DataLoader
from colossalai.builder.pipeline import PipelineModelInitializer
from colossalai.builder.pipeline import build_pipeline_model_from_cfg
from colossalai.core import global_context
from colossalai.initialize import launch
from colossalai.logging import get_dist_logger
@@ -28,7 +28,7 @@ def run_partition(rank, world_size):
logger.info('finished initialization')
# build model
model = PipelineModelInitializer(global_context.config.model, 1, verbose=True).initialize()
model = build_pipeline_model_from_cfg(global_context.config.model, 1, verbose=True)
assert isinstance(model, torch.nn.Module)
logger.info('model is created')

View File

@@ -8,7 +8,7 @@ import torch
import torch.multiprocessing as mp
import model
from colossalai.builder import PipelineModelInitializer
from colossalai.builder import build_pipeline_model_from_cfg
from colossalai.communication import p2p as p2p_communication
from colossalai.communication.utils import send_tensor_meta, recv_tensor_meta
from colossalai.context.parallel_mode import ParallelMode
@@ -39,7 +39,7 @@ def run_schedule(rank, world_size):
backend='nccl')
# build model
model = PipelineModelInitializer(gpc.config.model, 1).initialize()
model = build_pipeline_model_from_cfg(gpc.config.model, 1)
print_rank_0('model is created')
train_dataset = CIFAR10(