[example] update vit example for hybrid parallel plugin (#4641)

* update vit example for hybrid plugin

* reset tp/pp size

* fix dataloader iteration bug

* update optimizer passing in evaluation/add grad_accum

* change criterion

* wrap tqdm

* change grad_accum to grad_checkpoint

* fix pbar
This commit is contained in:
Baizhou Zhang
2023-09-07 17:38:45 +08:00
committed by GitHub
parent 660eed9124
commit 295b38fecf
10 changed files with 246 additions and 192 deletions

View File

@@ -2,18 +2,15 @@ set -xe
pip install -r requirements.txt
BS=8
for PLUGIN in "torch_ddp" "torch_ddp_fp16" "low_level_zero" "gemini"
do
for GPUNUM in 1 4
for PLUGIN in "torch_ddp" "torch_ddp_fp16" "low_level_zero" "gemini" "hybrid_parallel"
do
torchrun \
--standalone \
--nproc_per_node ${GPUNUM} \
--nproc_per_node 4 \
vit_benchmark.py \
--model_name_or_path "google/vit-base-patch16-224" \
--plugin ${PLUGIN} \
--batch_size ${BS}
done
done