diff --git a/colossalai/legacy/__init__.py b/colossalai/legacy/__init__.py index 4d6ad357a..678a5def5 100644 --- a/colossalai/legacy/__init__.py +++ b/colossalai/legacy/__init__.py @@ -1,4 +1,11 @@ -from .initialize import initialize, launch, launch_from_openmpi, launch_from_slurm, launch_from_torch +from .initialize import ( + get_default_parser, + initialize, + launch, + launch_from_openmpi, + launch_from_slurm, + launch_from_torch, +) __all__ = [ "launch", @@ -6,4 +13,5 @@ __all__ = [ "launch_from_slurm", "launch_from_torch", "initialize", + "get_default_parser", ] diff --git a/examples/community/roberta/pretraining/arguments.py b/examples/community/roberta/pretraining/arguments.py index 35b809d80..3428db4cb 100644 --- a/examples/community/roberta/pretraining/arguments.py +++ b/examples/community/roberta/pretraining/arguments.py @@ -1,10 +1,10 @@ -import colossalai +import argparse __all__ = ["parse_args"] def parse_args(): - parser = colossalai.get_default_parser() + parser = argparse.ArgumentParser() parser.add_argument( "--distplan", diff --git a/examples/images/vit/args.py b/examples/images/vit/args.py index 7d54020f8..9de4743ef 100644 --- a/examples/images/vit/args.py +++ b/examples/images/vit/args.py @@ -1,8 +1,8 @@ -from colossalai import get_default_parser +import argparse def parse_demo_args(): - parser = get_default_parser() + parser = argparse.ArgumentParser() parser.add_argument( "--model_name_or_path", type=str, @@ -52,7 +52,7 @@ def parse_demo_args(): def parse_benchmark_args(): - parser = get_default_parser() + parser = argparse.ArgumentParser() parser.add_argument( "--model_name_or_path", diff --git a/examples/images/vit/run_benchmark.sh b/examples/images/vit/run_benchmark.sh index 41eab9c5a..ad41a2837 100644 --- a/examples/images/vit/run_benchmark.sh +++ b/examples/images/vit/run_benchmark.sh @@ -11,9 +11,9 @@ for PLUGIN in "torch_ddp" "torch_ddp_fp16" "low_level_zero" "gemini" "hybrid_par do MODEL_PATH="google/vit-base-patch16-224" -torchrun \ - --standalone \ - --nproc_per_node 4 \ +colossalai run \ + --nproc_per_node ${GPUNUM} \ + --master_port 29505 \ vit_benchmark.py \ --model_name_or_path ${MODEL_PATH} \ --mem_cap ${MEMCAP} \ diff --git a/examples/images/vit/run_demo.sh b/examples/images/vit/run_demo.sh index 9efe14759..8eead0661 100644 --- a/examples/images/vit/run_demo.sh +++ b/examples/images/vit/run_demo.sh @@ -35,9 +35,9 @@ WEIGHT_DECAY=0.05 WARMUP_RATIO=0.3 # run the script for demo -torchrun \ - --standalone \ +colossalai run \ --nproc_per_node ${GPUNUM} \ + --master_port 29505 \ vit_train_demo.py \ --model_name_or_path ${MODEL} \ --output_path ${OUTPUT_PATH} \ diff --git a/examples/images/vit/test_ci.sh b/examples/images/vit/test_ci.sh index 570147606..fc1f2b7a2 100644 --- a/examples/images/vit/test_ci.sh +++ b/examples/images/vit/test_ci.sh @@ -5,9 +5,9 @@ BS=8 for PLUGIN in "torch_ddp" "torch_ddp_fp16" "low_level_zero" "gemini" "hybrid_parallel" do -torchrun \ - --standalone \ +colossalai run \ --nproc_per_node 4 \ + --master_port 29505 \ vit_benchmark.py \ --model_name_or_path "google/vit-base-patch16-224" \ --plugin ${PLUGIN} \ diff --git a/examples/language/gpt/titans/train_gpt.py b/examples/language/gpt/titans/train_gpt.py index b9d802f01..565cf1e01 100644 --- a/examples/language/gpt/titans/train_gpt.py +++ b/examples/language/gpt/titans/train_gpt.py @@ -1,3 +1,4 @@ +import argparse import contextlib import os @@ -29,7 +30,7 @@ VOCAB_SIZE = 50257 def main(): - parser = colossalai.get_default_parser() + parser = argparse.ArgumentParser() parser.add_argument("--from_torch", default=False, action="store_true") parser.add_argument("--use_dummy_dataset", default=False, action="store_true") args = parser.parse_args() diff --git a/examples/language/opt/args.py b/examples/language/opt/args.py index 1ec19094e..fc3d42fae 100644 --- a/examples/language/opt/args.py +++ b/examples/language/opt/args.py @@ -1,8 +1,8 @@ -from colossalai import get_default_parser +import argparse def parse_demo_args(): - parser = get_default_parser() + parser = argparse.ArgumentParser() parser.add_argument( "--model_name_or_path", type=str, @@ -39,7 +39,7 @@ def parse_demo_args(): def parse_benchmark_args(): - parser = get_default_parser() + parser = argparse.ArgumentParser() parser.add_argument( "--model_name_or_path", type=str, diff --git a/examples/language/opt/run_benchmark.sh b/examples/language/opt/run_benchmark.sh index b94ee61f2..b79d6c134 100644 --- a/examples/language/opt/run_benchmark.sh +++ b/examples/language/opt/run_benchmark.sh @@ -16,9 +16,9 @@ for GPUNUM in 1 4 do MODLE_PATH="facebook/opt-${MODEL}" -torchrun \ - --standalone \ +colossalai run \ --nproc_per_node ${GPUNUM} \ + --master_port 29505 \ opt_benchmark.py \ --model_name_or_path ${MODLE_PATH} \ --mem_cap ${MEMCAP} \ diff --git a/examples/language/opt/run_demo.sh b/examples/language/opt/run_demo.sh index 07b429cec..fe49d794f 100644 --- a/examples/language/opt/run_demo.sh +++ b/examples/language/opt/run_demo.sh @@ -30,9 +30,9 @@ WEIGHT_DECAY=0.01 WARMUP_RATIO=0.1 # run the script for demo -torchrun \ - --standalone \ +colossalai run \ --nproc_per_node ${GPUNUM} \ + --master_port 29505 \ opt_train_demo.py \ --model_name_or_path ${MODEL} \ --output_path ${OUTPUT_PATH} \ diff --git a/examples/language/opt/test_ci.sh b/examples/language/opt/test_ci.sh index fa14f52b7..2e3a645ca 100644 --- a/examples/language/opt/test_ci.sh +++ b/examples/language/opt/test_ci.sh @@ -7,9 +7,9 @@ do for GPUNUM in 1 4 do -torchrun \ - --standalone \ +colossalai run \ --nproc_per_node ${GPUNUM} \ + --master_port 29505 \ opt_benchmark.py \ --model_name_or_path "facebook/opt-125m" \ --plugin ${PLUGIN} \ diff --git a/examples/language/palm/run.sh b/examples/language/palm/run.sh index 2a846e81a..0b9871c77 100644 --- a/examples/language/palm/run.sh +++ b/examples/language/palm/run.sh @@ -8,6 +8,6 @@ export PLACEMENT='cpu' export USE_SHARD_INIT=False export BATCH_SIZE=1 -env OMP_NUM_THREADS=12 torchrun --standalone --nproc_per_node=${GPUNUM} --master_port 29501 train.py \ +env OMP_NUM_THREADS=12 colossalai run --nproc_per_node ${GPUNUM} --master_port 29505 train.py \ --dummy_data=True --tp_degree=${TPDEGREE} --batch_size=${BATCH_SIZE} --plugin='gemini' \ --placement ${PLACEMENT} --shardinit ${USE_SHARD_INIT} --distplan ${DISTPAN} 2>&1 | tee run.log diff --git a/examples/language/palm/test_ci.sh b/examples/language/palm/test_ci.sh index 4de6a44e5..6bcd140fe 100644 --- a/examples/language/palm/test_ci.sh +++ b/examples/language/palm/test_ci.sh @@ -4,6 +4,6 @@ for BATCH_SIZE in 2 do for GPUNUM in 1 4 do -env OMP_NUM_THREADS=12 torchrun --standalone --nproc_per_node=${GPUNUM} --standalone train.py --dummy_data=True --batch_size=${BATCH_SIZE} --plugin='gemini' 2>&1 | tee run.log +env OMP_NUM_THREADS=12 colossalai run --nproc_per_node ${GPUNUM} --master_port 29505 train.py --dummy_data=True --batch_size=${BATCH_SIZE} --plugin='gemini' 2>&1 | tee run.log done done diff --git a/examples/language/palm/train.py b/examples/language/palm/train.py index e7af88c55..7af02e24e 100644 --- a/examples/language/palm/train.py +++ b/examples/language/palm/train.py @@ -1,3 +1,4 @@ +import argparse import gzip from contextlib import nullcontext from functools import partial @@ -33,7 +34,7 @@ SEQ_LEN = 1024 def parse_args(): - parser = colossalai.get_default_parser() + parser = argparse.ArgumentParser() parser.add_argument( "--distplan", type=str,