mirror of
https://github.com/hpcaitech/ColossalAI.git
synced 2025-09-15 06:00:07 +00:00
add interleaved pipeline, fix naive amp and update pipeline model initializer (#80)
This commit is contained in:
@@ -3,7 +3,7 @@ from .common import (print_rank_0, sync_model_param_in_dp, is_dp_rank_0,
|
||||
is_tp_rank_0, is_no_pp_or_last_stage, is_using_ddp,
|
||||
is_using_pp, conditional_context, is_model_parallel_parameter,
|
||||
clip_grad_norm_fp32, count_zeros_fp32, copy_tensor_parallel_attributes,
|
||||
param_is_not_tensor_parallel_duplicate)
|
||||
param_is_not_tensor_parallel_duplicate, switch_virtual_pipeline_parallel_rank)
|
||||
from .cuda import get_current_device, synchronize, empty_cache, set_to_cuda
|
||||
from .memory import report_memory_usage
|
||||
from .timer import MultiTimer, Timer
|
||||
@@ -22,5 +22,6 @@ __all__ = ['checkpoint',
|
||||
'Timer', 'MultiTimer',
|
||||
'multi_tensor_applier',
|
||||
'accumulate_gradient',
|
||||
'DataParallelSampler', 'get_dataloader'
|
||||
'DataParallelSampler', 'get_dataloader',
|
||||
'switch_virtual_pipeline_parallel_rank'
|
||||
]
|
||||
|
@@ -249,3 +249,13 @@ def param_is_not_tensor_parallel_duplicate(param):
|
||||
return (hasattr(param, IS_TENSOR_PARALLEL) and
|
||||
getattr(param, IS_TENSOR_PARALLEL)) or (
|
||||
gpc.get_local_rank(ParallelMode.TENSOR) == 0)
|
||||
|
||||
|
||||
@contextmanager
|
||||
def switch_virtual_pipeline_parallel_rank(rank):
|
||||
prev_rank = gpc.virtual_pipeline_parallel_rank
|
||||
try:
|
||||
gpc.set_virtual_pipeline_parallel_rank(rank)
|
||||
yield
|
||||
finally:
|
||||
gpc.set_virtual_pipeline_parallel_rank(prev_rank)
|
||||
|
Reference in New Issue
Block a user