[hotfix] gpt example titans bug #2493 (#2494)

This commit is contained in:
Jiarui Fang
2023-01-18 12:04:18 +08:00
committed by GitHub
parent d565a24849
commit e327e95144
4 changed files with 58 additions and 49 deletions

View File

@@ -12,11 +12,11 @@ TENSOR_SHAPE = (BATCH_SIZE // NUM_MICRO_BATCHES, SEQ_LEN, HIDDEN_SIZE)
# if you do no want zero, just comment out this dictionary
zero = dict(model_config=dict(tensor_placement_policy='cuda', shard_strategy=TensorShardStrategy()),
optimizer_config=dict(initial_scale=2**16))
optimizer_config=dict(initial_scale=2**5))
optimizer = dict(
type=HybridAdam,
lr=0.00015,
lr=0.000015,
weight_decay=1e-2,
)