mirror of
https://github.com/hpcaitech/ColossalAI.git
synced 2025-09-16 14:41:53 +00:00
[example] reorganize for community examples (#3557)
This commit is contained in:
37
examples/community/roberta/pretraining/run_pretrain.sh
Normal file
37
examples/community/roberta/pretraining/run_pretrain.sh
Normal file
@@ -0,0 +1,37 @@
|
||||
#!/usr/bin/env sh
|
||||
|
||||
root_path=$PWD
|
||||
PY_FILE_PATH="$root_path/run_pretraining.py"
|
||||
|
||||
tensorboard_path="$root_path/tensorboard"
|
||||
log_path="$root_path/exp_log"
|
||||
ckpt_path="$root_path/ckpt"
|
||||
|
||||
|
||||
mkdir -p $tensorboard_path
|
||||
mkdir -p $log_path
|
||||
mkdir -p $ckpt_path
|
||||
|
||||
export PYTHONPATH=$PWD
|
||||
|
||||
env OMP_NUM_THREADS=40 colossalai run --hostfile ./hostfile \
|
||||
--include GPU002,GPU003,GPU004,GPU007 \
|
||||
--nproc_per_node=8 \
|
||||
$PY_FILE_PATH \
|
||||
--master_addr GPU007 \
|
||||
--master_port 20024 \
|
||||
--lr 2.0e-4 \
|
||||
--train_micro_batch_size_per_gpu 190 \
|
||||
--eval_micro_batch_size_per_gpu 20 \
|
||||
--epoch 15 \
|
||||
--data_path_prefix /h5 \
|
||||
--eval_data_path_prefix /eval_h5 \
|
||||
--tokenizer_path /roberta \
|
||||
--bert_config /roberta/config.json \
|
||||
--tensorboard_path $tensorboard_path \
|
||||
--log_path $log_path \
|
||||
--ckpt_path $ckpt_path \
|
||||
--log_interval 50 \
|
||||
--mlm bert \
|
||||
--wandb \
|
||||
--checkpoint_activations \
|
Reference in New Issue
Block a user