mirror of
https://github.com/hpcaitech/ColossalAI.git
synced 2025-09-01 17:17:05 +00:00
[builder] raise Error when CUDA_HOME is not set (#2213)
This commit is contained in:
@@ -106,3 +106,8 @@ Touch the bar of model scale and batch size.
|
||||
| gpt2_20b | 8 | auto | 2 | 16 | 99.871 |
|
||||
| gpt2_20b | 8 | cpu | 2 | 64 | 125.170 |
|
||||
| gpt2_20b | 8 | const | 2 | 32 | 105.415 |
|
||||
|
||||
|
||||
| model | #GPU | policy | TP | batch per DP | Tflops |
|
||||
| ---------- | --------- |--------- |--------- |--------- |--------- |
|
||||
| gpt2_20b | 8 | cpu | 2 | 8 | 46.895 |
|
||||
|
@@ -2,12 +2,12 @@
|
||||
export DISTPAN="colossalai"
|
||||
|
||||
# The following options only valid when DISTPAN="colossalai"
|
||||
export TPDEGREE=2
|
||||
export TPDEGREE=4
|
||||
export GPUNUM=8
|
||||
export PLACEMENT='cpu'
|
||||
export USE_SHARD_INIT=False
|
||||
export BATCH_SIZE=64
|
||||
export MODEL_TYPE="gpt2_20b"
|
||||
export BATCH_SIZE=32
|
||||
# export MODEL_TYPE="gpt2_24b"
|
||||
|
||||
mkdir -p logs
|
||||
env OMP_NUM_THREADS=16 torchrun --standalone --nproc_per_node=${GPUNUM} train_gpt_demo.py --tp_degree=${TPDEGREE} --model_type=${MODEL_TYPE} --batch_size=${BATCH_SIZE} --placement ${PLACEMENT} --shardinit ${USE_SHARD_INIT} --distplan ${DISTPAN} 2>&1 | tee ./logs/${MODEL_TYPE}_${DISTPAN}_gpu_${GPUNUM}_bs_${BATCH_SIZE}_tp_${TPDEGREE}.log
|
||||
|
@@ -218,7 +218,7 @@ def main():
|
||||
model = gemini_zero_dpp(model, pg, args.placement)
|
||||
|
||||
# build highly optimized cpu optimizer
|
||||
optimizer = GeminiAdamOptimizer(model, lr=1e-3, initial_scale=2**5)
|
||||
optimizer = GeminiAdamOptimizer(model, lr=1e-3, initial_scale=2**5, gpu_margin_mem_ratio=0.6)
|
||||
logger.info(get_mem_info(prefix='After init optim, '), ranks=[0])
|
||||
else:
|
||||
model = model_builder(args.model_type)(checkpoint=True).cuda()
|
||||
|
Reference in New Issue
Block a user