mirror of
https://github.com/hpcaitech/ColossalAI.git
synced 2025-09-12 20:54:35 +00:00
[example] titans for gpt (#2484)
This commit is contained in:
31
examples/language/gpt/titans/configs/gpt3_zero3_pp1d.py
Normal file
31
examples/language/gpt/titans/configs/gpt3_zero3_pp1d.py
Normal file
@@ -0,0 +1,31 @@
|
||||
from model import GPT3_pipeline_hybrid
|
||||
|
||||
from colossalai.nn.optimizer import HybridAdam
|
||||
from colossalai.zero.shard_utils import TensorShardStrategy
|
||||
|
||||
BATCH_SIZE = 192
|
||||
NUM_EPOCHS = 60
|
||||
SEQ_LEN = 2048
|
||||
NUM_MICRO_BATCHES = 192
|
||||
HIDDEN_SIZE = 12288
|
||||
TENSOR_SHAPE = (BATCH_SIZE // NUM_MICRO_BATCHES, SEQ_LEN, HIDDEN_SIZE)
|
||||
|
||||
# if you do no want zero, just comment out this dictionary
|
||||
zero = dict(model_config=dict(tensor_placement_policy='cuda', shard_strategy=TensorShardStrategy()),
|
||||
optimizer_config=dict(initial_scale=2**16))
|
||||
|
||||
optimizer = dict(
|
||||
type=HybridAdam,
|
||||
lr=0.00015,
|
||||
weight_decay=1e-2,
|
||||
)
|
||||
|
||||
model = dict(type=GPT3_pipeline_hybrid, checkpoint=True, num_chunks=1)
|
||||
|
||||
# pipeline parallel: modify integer value for the number of pipeline stages
|
||||
# tensor parallel: modify size to set the tensor parallel size, usually the number of GPUs per node
|
||||
# for the current model implementation, mode can only be 1D or None
|
||||
parallel = dict(
|
||||
pipeline=1,
|
||||
tensor=dict(size=2, mode='1d'), # for the current model implementation, mode can only be 1D or None
|
||||
)
|
Reference in New Issue
Block a user