From 31fe84237b22d183a7de9db153f3b6fb739732f6 Mon Sep 17 00:00:00 2001 From: HELSON Date: Thu, 29 Dec 2022 23:00:14 +0800 Subject: [PATCH] [example] fix benchmark.sh for gpt example (#2229) --- examples/language/gpt/benchmark.sh | 8 ++++---- examples/language/gpt/run.sh | 12 ++++++------ 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/examples/language/gpt/benchmark.sh b/examples/language/gpt/benchmark.sh index ad519bf2b..7ecc0c052 100644 --- a/examples/language/gpt/benchmark.sh +++ b/examples/language/gpt/benchmark.sh @@ -1,6 +1,6 @@ -for MODEL_NAME in "GPT2small" +for MODEL_TYPE in "gpt2_medium" do -for BATCH_SIZE in 8 +for BATCH_SIZE in 16 do for GPUNUM in 1 2 4 8 do @@ -11,8 +11,8 @@ then continue fi echo "****************** Begin ***************************" -echo "* benchmrking MODEL_NAME ${MODEL_NAME} BS ${BATCH_SIZE} BS ${BS} GPUNUM ${GPUNUM} TPDEGREE ${TPDEGREE}" -bash ./run.sh +echo "* benchmrking MODEL_TYPE ${MODEL_TYPE} BS ${BATCH_SIZE} BS ${BS} GPUNUM ${GPUNUM} TPDEGREE ${TPDEGREE}" +MODEL_TYPE=${MODEL_TYPE} BATCH_SIZE=${BATCH_SIZE} GPUNUM=${GPUNUM} TPDEGREE=${TPDEGREE} bash ./run.sh echo "****************** Finished ***************************" echo "" echo "" diff --git a/examples/language/gpt/run.sh b/examples/language/gpt/run.sh index b0a1e35b6..c41574313 100644 --- a/examples/language/gpt/run.sh +++ b/examples/language/gpt/run.sh @@ -1,13 +1,13 @@ # distplan in ["colossalai", "zero1", "zero2", "torch_ddp", "torch_zero"] -export DISTPAN={$DISTPAN:-"colossalai"} +export DISTPAN=${DISTPAN:-"colossalai"} # The following options only valid when DISTPAN="colossalai" -export TPDEGREE=${TPDEGREE:-1} export GPUNUM=${GPUNUM:-1} -export PLACEMENT=${PLACEMENT:'const'} -export USE_SHARD_INIT=${USE_SHARD_INIT:False} -export BATCH_SIZE=${BATCH_SIZE:-8} -export MODEL_TYPE=${MODEL_TYPE:"gpt2_medium"} +export TPDEGREE=${TPDEGREE:-1} +export PLACEMENT=${PLACEMENT:-"const"} +export USE_SHARD_INIT=${USE_SHARD_INIT:-False} +export BATCH_SIZE=${BATCH_SIZE:-16} +export MODEL_TYPE=${MODEL_TYPE:-"gpt2_medium"} mkdir -p logs torchrun --standalone --nproc_per_node=${GPUNUM} train_gpt_demo.py --tp_degree=${TPDEGREE} --model_type=${MODEL_TYPE} --batch_size=${BATCH_SIZE} --placement ${PLACEMENT} --shardinit ${USE_SHARD_INIT} --distplan ${DISTPAN} 2>&1 | tee ./logs/${MODEL_TYPE}_${DISTPAN}_gpu_${GPUNUM}_bs_${BATCH_SIZE}_tp_${TPDEGREE}.log