mirror of
https://github.com/hpcaitech/ColossalAI.git
synced 2025-09-11 13:59:08 +00:00
[pipeline]: support arbitrary batch size in forward_only mode (#5201)
* fix: remove drop last in val & test dataloader * feat: add run_forward_only, support arbitrary bs * chore: modify ci script
This commit is contained in:
@@ -1,8 +1,17 @@
|
||||
#!/bin/bash
|
||||
set -xe
|
||||
set -x
|
||||
|
||||
pip install -r requirements.txt
|
||||
|
||||
FAIL_LIMIT=3
|
||||
|
||||
for plugin in "torch_ddp" "torch_ddp_fp16" "gemini" "low_level_zero" "hybrid_parallel"; do
|
||||
torchrun --standalone --nproc_per_node 4 finetune.py --target_f1 0.86 --plugin $plugin --model_type "bert"
|
||||
for i in $(seq 1 $FAIL_LIMIT); do
|
||||
torchrun --standalone --nproc_per_node 4 finetune.py --target_f1 0.86 --plugin $plugin --model_type "bert" && break
|
||||
echo "Failed $i times"
|
||||
if [ $i -eq $FAIL_LIMIT ]; then
|
||||
echo "Failed $FAIL_LIMIT times, exiting"
|
||||
exit 1
|
||||
fi
|
||||
done
|
||||
done
|
||||
|
Reference in New Issue
Block a user