diff --git a/ds_config_zero3_lora.json b/ds_config_zero3_lora.json index da93499..2dd05ad 100644 --- a/ds_config_zero3_lora.json +++ b/ds_config_zero3_lora.json @@ -1,15 +1,14 @@ { "train_micro_batch_size_per_gpu": 1, - "gradient_accumulation_steps": 4, "bf16": { "enabled": true }, "fp16": { "enabled": false }, "zero_optimization": { "stage": 3, "overlap_comm": true, - "contiguous_gradients": true, + "contiguous_gradients": false, "allgather_partitions": true, "reduce_scatter": true, - "round_robin_gradients": true, + "round_robin_gradients": false, "reduce_bucket_size": 150000000, "stage3_prefetch_bucket_size": 100000000, "stage3_param_persistence_threshold": 1000000,