ColossalAI/applications/Chat/tests/test_train.sh
Wenhao Chen da4f7b855f
[chat] fix bugs and add unit tests (#4213)
* style: rename replay buffer

Experience replay is typically for off policy algorithms.
Use this name in PPO maybe misleading.

* fix: fix wrong zero2 default arg

* test: update experience tests

* style: rename zero_pad fn

* fix: defer init in CycledDataLoader

* test: add benchmark test

* style: rename internal fn of generation

* style: rename internal fn of lora

* fix: remove unused loss fn

* fix: remove unused utils fn

* refactor: remove generate_with_actor fn

* fix: fix type annotation

* test: add models tests

* fix: skip llama due to long execution time

* style: modify dataset

* style: apply formatter

* perf: update reward dataset

* fix: fix wrong IGNORE_INDEX in sft dataset

* fix: remove DataCollatorForSupervisedDataset

* test: add dataset tests

* style: apply formatter

* style: rename test_ci to test_train

* feat: add llama in inference

* test: add inference tests

* test: change test scripts directory

* fix: update ci

* fix: fix typo

* fix: skip llama due to oom

* fix: fix file mod

* style: apply formatter

* refactor: remove duplicated llama_gptq

* style: apply formatter

* to: update rm test

* feat: add tokenizer arg

* feat: add download model script

* test: update train tests

* fix: modify gemini load and save pretrained

* test: update checkpoint io test

* to: modify nproc_per_node

* fix: do not remove existing dir

* fix: modify save path

* test: add random choice

* fix: fix sft path

* fix: enlarge nproc_per_node to avoid oom

* fix: add num_retry

* fix: make lora config of rm and critic consistent

* fix: add warning about lora weights

* fix: skip some gpt2 tests

* fix: remove grad ckpt in rm and critic due to errors

* refactor: directly use Actor in train_sft

* test: add more arguments

* fix: disable grad ckpt when using lora

* fix: fix save_pretrained and related tests

* test: enable zero2 tests

* revert: remove useless fn

* style: polish code

* test: modify test args
2023-08-02 10:17:36 +08:00

229 lines
8.4 KiB
Bash
Executable File

#!/usr/bin/env bash
set_n_least_used_CUDA_VISIBLE_DEVICES() {
local n=${1:-"9999"}
echo "GPU Memory Usage:"
local FIRST_N_GPU_IDS=$(nvidia-smi --query-gpu=memory.used --format=csv |
tail -n +2 |
nl -v 0 |
tee /dev/tty |
sort -g -k 2 |
awk '{print $1}' |
head -n $n)
export CUDA_VISIBLE_DEVICES=$(echo $FIRST_N_GPU_IDS | sed 's/ /,/g')
echo "Now CUDA_VISIBLE_DEVICES is set to:"
echo "CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES"
}
set_n_least_used_CUDA_VISIBLE_DEVICES 4
set -xu
if [ -z "$SFT_DATASET" ]; then
echo "Please set \$SFT_DATASET to the path to sft dataset."
exit 1
fi
if [ -z "$PROMPT_PATH" ]; then
echo "Please set \$PROMPT_PATH to the path to prompts csv."
exit 1
fi
if [ -z "$PRETRAIN_DATASET" ]; then
echo "Please set \$PRETRAIN_DATASET to the path to alpaca data."
exit 1
fi
NUM_RETRY=3
BASE_DIR=$(dirname $(dirname $(realpath $BASH_SOURCE)))
EXAMPLES_DIR=$BASE_DIR/examples
MODELS_DIR=$BASE_DIR/examples/models_config
MODELS=('gpt2' 'bloom' 'opt' 'llama')
STRATEGIES=('ddp' 'colossalai_gemini' 'colossalai_zero2')
export OMP_NUM_THREADS=8
# install requirements
pip install -r $EXAMPLES_DIR/requirements.txt
python $EXAMPLES_DIR/download_model.py --model-dir $MODELS_DIR --config-only
get_pretrain() {
local model=$1
if [[ $model == "gpt2" ]]; then
echo "gpt2"
elif [[ $model == "bloom" ]]; then
echo "bigscience/bloom-560m"
elif [[ $model == "opt" ]]; then
echo "facebook/opt-350m"
else
echo "Unknown model $model"
exit 1
fi
}
random_choice() {
local arr=("$@")
local len=${#arr[@]}
local idx=$((RANDOM % len))
echo ${arr[$idx]}
}
echo "[Test]: testing sft ..."
# FIXME: This is a hack to skip tests that are not working
# - gpt2-ddp: RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation
# - llama-*: These tests can be passed locally, skipped for long execution time
SKIPPED_TESTS=(
"gpt2-ddp"
"llama-ddp"
"llama-colossalai_gemini"
"llama-colossalai_zero2"
)
GRAD_CKPTS=('' '--grad_checkpoint')
for lora_rank in '0' '4'; do
for model in ${MODELS[@]}; do
strategies=($(shuf -e "${STRATEGIES[@]}"))
for strategy in ${strategies[@]}; do
if [[ " ${SKIPPED_TESTS[*]} " =~ " $model-$strategy-$lora_rank " ]]; then
echo "[Test]: Skipped $model-$strategy-$lora_rank"
continue
elif [[ " ${SKIPPED_TESTS[*]} " =~ " $model-$strategy " ]]; then
echo "[Test]: Skipped $model-$strategy"
continue
fi
pretrain=$(get_pretrain $model)
pretrain_model=""
if [[ $lora_rank -gt 0 ]]; then
pretrain_model="--pretrain $pretrain"
fi
grad_ckpt=$(random_choice "${GRAD_CKPTS[@]}")
for i in $(seq $NUM_RETRY); do
echo "[Test]: $model-$strategy-$lora_rank, attempt $i"
torchrun --standalone --nproc_per_node=4 $EXAMPLES_DIR/train_sft.py \
$pretrain_model --tokenizer $MODELS_DIR/$model \
--model $model --strategy $strategy --lora_rank $lora_rank $grad_ckpt \
--dataset $SFT_DATASET --max_datasets_size 8 \
--max_epochs 1 --batch_size 1 --accumulation_steps 1 \
--save_path $EXAMPLES_DIR/rlhf_models/sft_ckpt_${model}_${lora_rank}
passed=$?
if [ $passed -eq 0 ]; then
break
fi
done
if [ $passed -ne 0 ]; then
echo "[Test]: Failed $model-$strategy-$lora_rank"
exit 1
fi
done
done
done
echo "[Test]: testing reward model ..."
# FIXME: This is a hack to skip tests that are not working
# - gpt2-ddp: RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation
# - llama-*: These tests can be passed locally, skipped for long execution time
SKIPPED_TESTS=(
"gpt2-ddp"
"llama-ddp"
"llama-colossalai_gemini"
"llama-colossalai_zero2"
)
LOSS_FNS=('log_sig' 'log_exp')
DATASETS=('Anthropic/hh-rlhf' 'Dahoas/rm-static')
for lora_rank in '0' '4'; do
for model in ${MODELS[@]}; do
strategies=($(shuf -e "${STRATEGIES[@]}"))
for strategy in ${strategies[@]}; do
if [[ " ${SKIPPED_TESTS[*]} " =~ " $model-$strategy-$lora_rank " ]]; then
echo "[Test]: Skipped $model-$strategy-$lora_rank"
continue
elif [[ " ${SKIPPED_TESTS[*]} " =~ " $model-$strategy " ]]; then
echo "[Test]: Skipped $model-$strategy"
continue
fi
pretrain=$(get_pretrain $model)
pretrain_model=""
if [[ $lora_rank -gt 0 ]]; then
pretrain_model="--pretrain $pretrain"
fi
loss_fn=$(random_choice "${LOSS_FNS[@]}")
dataset=$(random_choice "${DATASETS[@]}")
subset=$(if [[ $dataset == "Dahoas/rm-static" ]]; then echo "None"; else echo "harmless-base"; fi)
for i in $(seq $NUM_RETRY); do
echo "[Test]: $model-$strategy-$lora_rank, attempt $i"
torchrun --standalone --nproc_per_node=4 $EXAMPLES_DIR/train_reward_model.py \
$pretrain_model --tokenizer $MODELS_DIR/$model \
--model $model --strategy $strategy --lora_rank $lora_rank --loss_fn $loss_fn \
--dataset $dataset --subset $subset --test True --batch_size 1 \
--save_path $EXAMPLES_DIR/rlhf_models/rm_ckpt_${model}_${lora_rank}.pt
passed=$?
if [ $passed -eq 0 ]; then
break
fi
done
if [ $passed -ne 0 ]; then
echo "[Test]: Failed to train reward model $model-$strategy-$lora_rank"
exit 1
fi
done
done
done
echo "[Test]: testing RLHF ..."
# FIXME: This is a hack to skip tests that are not working
# - gpt2-ddp: RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation
# - llama-*: These tests can be passed locally, skipped for long execution time
SKIPPED_TESTS=(
"gpt2-ddp"
"llama-ddp"
"llama-colossalai_gemini"
"llama-colossalai_zero2"
)
for model in ${MODELS[@]}; do
for lora_rank in '0' '4'; do
strategies=($(shuf -e "${STRATEGIES[@]}"))
for strategy in ${strategies[@]}; do
if [[ " ${SKIPPED_TESTS[*]} " =~ " $model-$strategy-$lora_rank " ]]; then
echo "[Test]: Skipped $model-$strategy-$lora_rank"
continue
elif [[ " ${SKIPPED_TESTS[*]} " =~ " $model-$strategy " ]]; then
echo "[Test]: Skipped $model-$strategy"
continue
fi
rm_pretrain=$(get_pretrain $model)
rm_pretrain_model=""
if [[ $lora_rank -gt 0 ]]; then
rm_pretrain_model="--rm_pretrain $rm_pretrain"
fi
for i in $(seq $NUM_RETRY); do
echo "[Test]: $model-$strategy-$lora_rank, attempt $i"
torchrun --standalone --nproc_per_node=4 $EXAMPLES_DIR/train_prompts.py \
--prompt_dataset $PROMPT_PATH --pretrain_dataset $PRETRAIN_DATASET \
--strategy $strategy --model $model --tokenizer $MODELS_DIR/$model \
--num_episodes 1 --num_collect_steps 1 --num_update_steps 1 \
--experience_batch_size 2 --train_batch_size 1 --lora_rank $lora_rank \
--pretrain $EXAMPLES_DIR/rlhf_models/sft_ckpt_${model}_${lora_rank} \
$rm_pretrain_model --rm_path $EXAMPLES_DIR/rlhf_models/rm_ckpt_${model}_${lora_rank}.pt \
--save_path $EXAMPLES_DIR/rlhf_models/actor_checkpoint_prompts.pt
passed=$?
if [ $passed -eq 0 ]; then
break
fi
done
if [ $passed -ne 0 ]; then
echo "[Test]: Failed to train RLHF $model-$strategy-$lora_rank"
exit 1
fi
done
rm -rf $EXAMPLES_DIR/rlhf_models/sft_ckpt_${model}_${lora_rank}
rm $EXAMPLES_DIR/rlhf_models/rm_ckpt_${model}_${lora_rank}.pt
done
done
rm $EXAMPLES_DIR/rlhf_models/actor_checkpoint_prompts.pt