From 15055f9a36d215e5a4fba8658e1b04c895881da6 Mon Sep 17 00:00:00 2001 From: Edenzzzz Date: Sun, 7 Apr 2024 12:06:27 +0800 Subject: [PATCH] [hotfix] quick fixes to make legacy tutorials runnable (#5559) Co-authored-by: Edenzzzz --- colossalai/_analyzer/fx/tracer/tracer.py | 2 +- colossalai/nn/layer/scaled_softmax.py | 8 ++++++++ .../tutorial/auto_parallel/auto_parallel_with_resnet.py | 2 +- examples/tutorial/hybrid_parallel/train.py | 4 ++-- examples/tutorial/large_batch_optimizer/train.py | 6 +++--- examples/tutorial/opt/opt/run_clm.py | 4 ++-- .../tutorial/sequence_parallel/model/layers/bert_layer.py | 2 +- examples/tutorial/sequence_parallel/train.py | 4 ++-- 8 files changed, 20 insertions(+), 12 deletions(-) mode change 100755 => 100644 examples/tutorial/opt/opt/run_clm.py diff --git a/colossalai/_analyzer/fx/tracer/tracer.py b/colossalai/_analyzer/fx/tracer/tracer.py index 17dce7672..36e8780af 100644 --- a/colossalai/_analyzer/fx/tracer/tracer.py +++ b/colossalai/_analyzer/fx/tracer/tracer.py @@ -237,7 +237,7 @@ class ColoTracer(Tracer): # override the tracer to support custom modules and checkpointing if self.trace_act_ckpt: orig_ckpt_func_apply = torch.utils.checkpoint.CheckpointFunction.apply - orig_ckpt_func_without_reentrant = torch.utils.checkpoint._checkpoint_without_reentrant + orig_ckpt_func_without_reentrant = torch.utils.checkpoint._checkpoint_without_reentrant_generator def checkpoint(run_function, preserve_rng_state=False, *args): self.ckpt_regions.append(self.ckpt_idx) diff --git a/colossalai/nn/layer/scaled_softmax.py b/colossalai/nn/layer/scaled_softmax.py index a8d72ddd9..2e802db2d 100644 --- a/colossalai/nn/layer/scaled_softmax.py +++ b/colossalai/nn/layer/scaled_softmax.py @@ -8,6 +8,14 @@ import torch.nn as nn from colossalai.kernel.kernel_loader import ScaledMaskedSoftmaxLoader, ScaledUpperTriangleMaskedSoftmaxLoader +# NOTE: These kernels are compiled on specific GPU arch and not widely applicable. +# try: +# from colossalai._C import scaled_masked_softmax as scaled_masked_softmax, scaled_upper_triangle_masked_softmax_cuda as scaled_upper_triang_masked_softmax +# except ImportError: + +scaled_masked_softmax = None +scaled_upper_triang_masked_softmax = None + class AttnMaskType(enum.Enum): padding = 1 diff --git a/examples/tutorial/auto_parallel/auto_parallel_with_resnet.py b/examples/tutorial/auto_parallel/auto_parallel_with_resnet.py index 3c5b786b5..42b43bbc0 100644 --- a/examples/tutorial/auto_parallel/auto_parallel_with_resnet.py +++ b/examples/tutorial/auto_parallel/auto_parallel_with_resnet.py @@ -17,7 +17,7 @@ def synthesize_data(): def main(): - colossalai.launch_from_torch(config="./config.py") + colossalai.legacy.launch_from_torch(config="./config.py") logger = get_dist_logger() diff --git a/examples/tutorial/hybrid_parallel/train.py b/examples/tutorial/hybrid_parallel/train.py index 95f1bf8ee..70445dff9 100644 --- a/examples/tutorial/hybrid_parallel/train.py +++ b/examples/tutorial/hybrid_parallel/train.py @@ -41,9 +41,9 @@ class DummyDataloader: def main(): # launch from torch - parser = colossalai.get_default_parser() + parser = colossalai.legacy.get_default_parser() args = parser.parse_args() - colossalai.launch_from_torch(config=args.config) + colossalai.legacy.launch_from_torch(config=args.config) # get logger logger = get_dist_logger() diff --git a/examples/tutorial/large_batch_optimizer/train.py b/examples/tutorial/large_batch_optimizer/train.py index dd114b5af..c78292e0c 100644 --- a/examples/tutorial/large_batch_optimizer/train.py +++ b/examples/tutorial/large_batch_optimizer/train.py @@ -37,14 +37,14 @@ class DummyDataloader: def main(): # initialize distributed setting - parser = colossalai.get_default_parser() + parser = colossalai.legacy.get_default_parser() parser.add_argument( "--optimizer", choices=["lars", "lamb"], help="Choose your large-batch optimizer", required=True ) args = parser.parse_args() # launch from torch - colossalai.launch_from_torch(config=args.config) + colossalai.legacy.launch_from_torch(config=args.config) # get logger logger = get_dist_logger() @@ -73,7 +73,7 @@ def main(): ) # initialize - engine, train_dataloader, test_dataloader, _ = colossalai.initialize( + engine, train_dataloader, test_dataloader, _ = colossalai.legacy.initialize( model=model, optimizer=optimizer, criterion=criterion, diff --git a/examples/tutorial/opt/opt/run_clm.py b/examples/tutorial/opt/opt/run_clm.py old mode 100755 new mode 100644 index 3f0d04879..ae8a0f4a0 --- a/examples/tutorial/opt/opt/run_clm.py +++ b/examples/tutorial/opt/opt/run_clm.py @@ -72,7 +72,7 @@ def get_time_stamp(): def parse_args(): - parser = colossalai.get_default_parser() + parser = colossalai.legacy.get_default_parser() parser.add_argument("-s", "--synthetic", action="store_true") parser.add_argument( "--dataset_name", @@ -289,7 +289,7 @@ class DummyDataloader: def main(): args = parse_args() disable_existing_loggers() - colossalai.launch_from_torch(config=dict()) + colossalai.legacy.launch_from_torch(config=dict()) logger = get_dist_logger() is_main_process = dist.get_rank() == 0 diff --git a/examples/tutorial/sequence_parallel/model/layers/bert_layer.py b/examples/tutorial/sequence_parallel/model/layers/bert_layer.py index 1ef16ee6a..c7782bf04 100644 --- a/examples/tutorial/sequence_parallel/model/layers/bert_layer.py +++ b/examples/tutorial/sequence_parallel/model/layers/bert_layer.py @@ -1,9 +1,9 @@ import torch import torch.nn as nn -from colossalai.kernel.cuda_native import LayerNorm from colossalai.kernel.jit import bias_dropout_add_fused_inference, bias_dropout_add_fused_train from colossalai.legacy.nn.layer.parallel_sequence import TransformerSelfAttentionRing +from colossalai.nn.layer.layernorm import MixedFusedLayerNorm as LayerNorm from .dropout import get_bias_dropout_add from .mlp import TransformerMLP diff --git a/examples/tutorial/sequence_parallel/train.py b/examples/tutorial/sequence_parallel/train.py index f25fc8189..431af3586 100644 --- a/examples/tutorial/sequence_parallel/train.py +++ b/examples/tutorial/sequence_parallel/train.py @@ -48,7 +48,7 @@ def pipeline_data_process_func(stage_output, micro_batch_data): def main(): # initialize parse_args() - colossalai.launch_from_torch(config="./config.py", seed=1234, backend="nccl") + colossalai.legacy.launch_from_torch(config="./config.py", seed=1234, backend="nccl") logger = get_dist_logger() @@ -136,7 +136,7 @@ def main(): logger.info(f"LR Scheduler is built with {warmup_steps} warmup steps and {gpc.config.DECAY_ITERS} decay steps") # # init - engine, *dummy = colossalai.initialize(model, optimizer, criterion, verbose=True) + engine, *dummy = colossalai.legacy.initialize(model, optimizer, criterion, verbose=True) # build timer timer = MultiTimer()