diff --git a/configs/deepspeed/ds_config_gptj.json b/configs/deepspeed/ds_config_gptj.json index 3e933966..6f9b2961 100644 --- a/configs/deepspeed/ds_config_gptj.json +++ b/configs/deepspeed/ds_config_gptj.json @@ -24,5 +24,25 @@ "allgather_partitions": true, "allgather_bucket_size": 5e8, "contiguous_gradients": true - } - } \ No newline at end of file + }, + "optimizer": { + "type": "AdamW", + "params": { + "lr": "auto", + "betas": [ + 0.9, + 0.999 + ], + "eps": 1e-08 + } + }, + "scheduler": { + "type": "WarmupLR", + "params": { + "warmup_min_lr": 0, + "warmup_max_lr": "auto", + "warmup_num_steps": "auto", + "warmup_type": "linear" + } + } +} \ No newline at end of file