From df2d5f7e46d01a98e6b12c8d8a9e4764d324fa3e Mon Sep 17 00:00:00 2001 From: Zach Nussbaum Date: Tue, 4 Apr 2023 20:58:08 +0000 Subject: [PATCH] feat: gpt-j config --- configs/deepspeed/ds_config_gptj.json | 28 +++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) create mode 100644 configs/deepspeed/ds_config_gptj.json diff --git a/configs/deepspeed/ds_config_gptj.json b/configs/deepspeed/ds_config_gptj.json new file mode 100644 index 00000000..dc856e1b --- /dev/null +++ b/configs/deepspeed/ds_config_gptj.json @@ -0,0 +1,28 @@ +{ + "train_batch_size": "auto", + "gradient_accumulation_steps": 4, + "train_micro_batch_size_per_gpu": "auto", + "fp16": { + "enabled": "auto", + "min_loss_scale": 1, + "loss_scale_window": 1000, + "hysteresis": 2, + "initial_scale_power": 32 + }, + "bf16": { + "enabled": "auto" + }, + "gradient_clipping": 1.0, + "zero_optimization": { + "stage": 2, + "offload_param": { + "device": "none" + }, + "offload_optimizer": { + "device": "none" + }, + "allgather_partitions": true, + "allgather_bucket_size": 5e8, + "contiguous_gradients": true + } + } \ No newline at end of file