diff --git a/ds_config_zero3.json b/ds_config_zero3.json index 4e34160..16bde0c 100644 --- a/ds_config_zero3.json +++ b/ds_config_zero3.json @@ -1,6 +1,6 @@ { "train_micro_batch_size_per_gpu": 1, - "gradient_accumulation_steps": 7, + "gradient_accumulation_steps": 1, "zero_optimization": { "stage": 3, "overlap_comm": true,