mirror of
https://github.com/hpcaitech/ColossalAI.git
synced 2025-09-07 03:52:01 +00:00
Add GRPO and Support RLVR for PPO (#6186)
* add grpo, support rlvr * add grpo, support rlvr * tested deepseek r1 pipeline * add ci * verify grpo r1 * verify grpo r1 * update readme, remove unused code * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * remove path * clean code * fix circular import * fix ci OOM * fix ci OOM * skip kto tp, fix qwen generation --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
This commit is contained in:
@@ -24,7 +24,12 @@ if [ -z "$SFT_DATASET" ]; then
|
||||
fi
|
||||
|
||||
if [ -z "$PROMPT_DATASET" ]; then
|
||||
echo "Please set \$PROMPT_DATASET to the path to prompts."
|
||||
echo "Please set \$PROMPT_DATASET to the path to prompts dataset."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ -z "$PROMPT_RLVR_DATASET" ]; then
|
||||
echo "Please set \$PROMPT_RLVR_DATASET to the path to prompts dataset with gt_answer labels."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
@@ -69,6 +74,8 @@ get_data_input_dirs() {
|
||||
echo "$SFT_DATASET"
|
||||
elif [[ $data_type == "prompt" ]]; then
|
||||
echo "$PROMPT_DATASET"
|
||||
elif [[ $data_type == "prompt_rlvr" ]]; then
|
||||
echo "$PROMPT_RLVR_DATASET"
|
||||
elif [[ $data_type == "preference" ]]; then
|
||||
echo "$PREFERENCE_DATASET"
|
||||
elif [[ $data_type == "kto" ]]; then
|
||||
@@ -123,6 +130,10 @@ python $TEST_DIR/generate_dummy_datasets_for_testing.py \
|
||||
--data_dir $(get_data_input_dirs prompt) \
|
||||
--data_type "prompt"
|
||||
|
||||
python $TEST_DIR/generate_dummy_datasets_for_testing.py \
|
||||
--data_dir $(get_data_input_dirs prompt_rlvr) \
|
||||
--data_type "prompt_rlvr"
|
||||
|
||||
python $TEST_DIR/generate_dummy_datasets_for_testing.py \
|
||||
--data_dir $(get_data_input_dirs kto) \
|
||||
--data_type "kto"
|
||||
@@ -266,6 +277,52 @@ for model in ${MODELS[@]}; do
|
||||
done
|
||||
|
||||
|
||||
echo "[Test]: testing prepare_prompt_dataset.py (with verifiable reward)..."
|
||||
|
||||
# FIXME: This is a hack to skip tests that are not working
|
||||
SKIPPED_TESTS=(
|
||||
)
|
||||
|
||||
# test prepare_prompt_dataset
|
||||
for model in ${MODELS[@]}; do
|
||||
data_type="prompt_rlvr"
|
||||
if [[ " ${SKIPPED_TESTS[*]} " =~ " $model-$data_type " ]]; then
|
||||
echo "[Test]: Skipped $model-$data_type"
|
||||
continue
|
||||
fi
|
||||
cache_dir=$DATA_SAVE_PATH/tokenized_${model}_${data_type}/cache
|
||||
jsonl_dir=$DATA_SAVE_PATH/tokenized_${model}_${data_type}/jsonl
|
||||
arrow_dir=$DATA_SAVE_PATH/tokenized_${model}_${data_type}/arrow
|
||||
data_input_dirs=$(get_data_input_dirs $data_type)
|
||||
tokenizer_dir=$(get_tokenizer_dirs $model)
|
||||
conversation_template=$(get_conversation_template_config $model)
|
||||
for i in $(seq $NUM_RETRY); do
|
||||
rm -rf $cache_dir
|
||||
rm -rf $jsonl_dir
|
||||
rm -rf $arrow_dir
|
||||
echo "[Test]: $model-$data_type, attempt $i"
|
||||
python $EXAMPLES_DIR/data_preparation_scripts/prepare_dataset.py \
|
||||
--type prompt \
|
||||
--data_input_dirs $data_input_dirs \
|
||||
--conversation_template_config $conversation_template \
|
||||
--tokenizer_dir $tokenizer_dir \
|
||||
--data_cache_dir $cache_dir \
|
||||
--data_jsonl_output_dir $jsonl_dir \
|
||||
--data_arrow_output_dir $arrow_dir \
|
||||
--max_length 400 \
|
||||
--num_samples_per_datafile 100 \
|
||||
--num_spliced_dataset_bins 1
|
||||
passed=$?
|
||||
if [ $passed -eq 0 ]; then
|
||||
break
|
||||
fi
|
||||
done
|
||||
if [ $passed -ne 0 ]; then
|
||||
echo "[Test]: Failed $model-$data_type"
|
||||
exit 1
|
||||
fi
|
||||
done
|
||||
|
||||
echo "[Test]: testing prepare_kto_dataset.py ..."
|
||||
|
||||
# FIXME: This is a hack to skip tests that are not working
|
||||
|
Reference in New Issue
Block a user