mirror of
https://github.com/hpcaitech/ColossalAI.git
synced 2025-05-06 15:38:26 +00:00
[hotfix] add DISTPAN argument for benchmark (#2412)
* change the benchmark config file * change config * revert config file * rename distpan to distplan
This commit is contained in:
parent
7d5640b9db
commit
d84e747975
@ -1,5 +1,5 @@
|
|||||||
for MODEL_TYPE in "gpt2_medium"; do
|
for MODEL_TYPE in "gpt2_medium"; do
|
||||||
for DISPAN in "colossalai"; do
|
for DISTPLAN in "colossalai"; do
|
||||||
for BATCH_SIZE in 16; do
|
for BATCH_SIZE in 16; do
|
||||||
for GPUNUM in 1 2 4 8; do
|
for GPUNUM in 1 2 4 8; do
|
||||||
for TPDEGREE in 1 2 4 8; do
|
for TPDEGREE in 1 2 4 8; do
|
||||||
@ -8,8 +8,8 @@ for MODEL_TYPE in "gpt2_medium"; do
|
|||||||
fi
|
fi
|
||||||
for PLACEMENT in "cpu" "auto"; do
|
for PLACEMENT in "cpu" "auto"; do
|
||||||
echo "****************** Begin ***************************"
|
echo "****************** Begin ***************************"
|
||||||
echo "+ benchmrking MODEL ${MODEL_TYPE} DISPAN ${DISPAN} GPU ${GPUNUM} BS ${BATCH_SIZE} TP ${TPDEGREE} POLICY ${PLACEMENT}"
|
echo "+ benchmrking MODEL ${MODEL_TYPE} DISTPLAN ${DISTPLAN} GPU ${GPUNUM} BS ${BATCH_SIZE} TP ${TPDEGREE} POLICY ${PLACEMENT}"
|
||||||
MODEL_TYPE=${MODEL_TYPE} DISPAN=${DISPAN} BATCH_SIZE=${BATCH_SIZE} GPUNUM=${GPUNUM} TPDEGREE=${TPDEGREE} PLACEMENT=${PLACEMENT} \
|
MODEL_TYPE=${MODEL_TYPE} DISTPLAN=${DISTPLAN} BATCH_SIZE=${BATCH_SIZE} GPUNUM=${GPUNUM} TPDEGREE=${TPDEGREE} PLACEMENT=${PLACEMENT} \
|
||||||
bash ./run_gemini.sh
|
bash ./run_gemini.sh
|
||||||
echo "****************** Finished ***************************"
|
echo "****************** Finished ***************************"
|
||||||
echo ""
|
echo ""
|
||||||
|
@ -1,8 +1,8 @@
|
|||||||
set -x
|
set -x
|
||||||
# distplan in ["colossalai", "zero1", "zero2", "torch_ddp", "torch_zero"]
|
# distplan in ["colossalai", "zero1", "zero2", "torch_ddp", "torch_zero"]
|
||||||
export DISTPAN=${DISTPAN:-"colossalai"}
|
export DISTPLAN=${DISTPLAN:-"colossalai"}
|
||||||
|
|
||||||
# The following options only valid when DISTPAN="colossalai"
|
# The following options only valid when DISTPLAN="colossalai"
|
||||||
export GPUNUM=${GPUNUM:-1}
|
export GPUNUM=${GPUNUM:-1}
|
||||||
export TPDEGREE=${TPDEGREE:-1}
|
export TPDEGREE=${TPDEGREE:-1}
|
||||||
export PLACEMENT=${PLACEMENT:-"cpu"}
|
export PLACEMENT=${PLACEMENT:-"cpu"}
|
||||||
@ -20,5 +20,5 @@ torchrun --standalone --nproc_per_node=${GPUNUM} ./train_gpt_demo.py \
|
|||||||
--batch_size=${BATCH_SIZE} \
|
--batch_size=${BATCH_SIZE} \
|
||||||
--placement=${PLACEMENT} \
|
--placement=${PLACEMENT} \
|
||||||
--shardinit=${USE_SHARD_INIT} \
|
--shardinit=${USE_SHARD_INIT} \
|
||||||
--distplan=${DISTPAN} \
|
--distplan=${DISTPLAN} \
|
||||||
2>&1 | tee ./gemini_logs/${MODEL_TYPE}_${DISTPAN}_gpu_${GPUNUM}_bs_${BATCH_SIZE}_tp_${TPDEGREE}_${PLACEMENT}.log
|
2>&1 | tee ./gemini_logs/${MODEL_TYPE}_${DISTPLAN}_gpu_${GPUNUM}_bs_${BATCH_SIZE}_tp_${TPDEGREE}_${PLACEMENT}.log
|
||||||
|
@ -290,9 +290,11 @@ def main():
|
|||||||
from torch.distributed.optim import ZeroRedundancyOptimizer
|
from torch.distributed.optim import ZeroRedundancyOptimizer
|
||||||
optimizer = ZeroRedundancyOptimizer(model.parameters(), optimizer_class=torch.optim.Adam, lr=0.01)
|
optimizer = ZeroRedundancyOptimizer(model.parameters(), optimizer_class=torch.optim.Adam, lr=0.01)
|
||||||
elif args.distplan.startswith("zero"):
|
elif args.distplan.startswith("zero"):
|
||||||
|
model = model.half()
|
||||||
partition_flag = args.distplan == "zero2"
|
partition_flag = args.distplan == "zero2"
|
||||||
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
|
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
|
||||||
optimizer = LowLevelZeroOptimizer(optimizer,
|
optimizer = LowLevelZeroOptimizer(optimizer,
|
||||||
|
reduce_bucket_size=12 * 1024 * 1024,
|
||||||
overlap_communication=True,
|
overlap_communication=True,
|
||||||
partition_grad=partition_flag,
|
partition_grad=partition_flag,
|
||||||
verbose=True)
|
verbose=True)
|
||||||
|
Loading…
Reference in New Issue
Block a user