mirror of
https://github.com/hpcaitech/ColossalAI.git
synced 2025-09-08 20:40:34 +00:00
@@ -12,11 +12,11 @@ TENSOR_SHAPE = (BATCH_SIZE // NUM_MICRO_BATCHES, SEQ_LEN, HIDDEN_SIZE)
|
||||
|
||||
# if you do no want zero, just comment out this dictionary
|
||||
zero = dict(model_config=dict(tensor_placement_policy='cuda', shard_strategy=TensorShardStrategy()),
|
||||
optimizer_config=dict(initial_scale=2**16))
|
||||
optimizer_config=dict(initial_scale=2**5))
|
||||
|
||||
optimizer = dict(
|
||||
type=HybridAdam,
|
||||
lr=0.00015,
|
||||
lr=0.000015,
|
||||
weight_decay=1e-2,
|
||||
)
|
||||
|
||||
|
Reference in New Issue
Block a user