34 lines
890 B
JSON
34 lines
890 B
JSON
{
|
||
"train_micro_batch_size_per_gpu": 1,
|
||
"gradient_accumulation_steps": 64,
|
||
"steps_per_print": 0,
|
||
"gradient_clipping": 1.0,
|
||
|
||
"fp16": { "enabled": false },
|
||
"bf16": { "enabled": true },
|
||
|
||
"zero_optimization": {
|
||
"stage": 3,
|
||
"overlap_comm": true,
|
||
"contiguous_gradients": true,
|
||
"reduce_scatter": true,
|
||
"reduce_bucket_size": 50000000,
|
||
"stage3_prefetch_bucket_size": 50000000,
|
||
"stage3_param_persistence_threshold": 100000,
|
||
|
||
/* 关键:保存最终权重时做 16bit 聚合,方便直接 from_pretrained 加载 */
|
||
"stage3_gather_16bit_weights_on_model_save": true
|
||
},
|
||
|
||
"activation_checkpointing": {
|
||
"partition_activations": true,
|
||
"contiguous_memory_optimization": true,
|
||
"cpu_checkpointing": false,
|
||
"number_checkpoints": 36,
|
||
"profile": false,
|
||
"synchronize_checkpoint_boundary": true
|
||
},
|
||
|
||
"wall_clock_breakdown": false
|
||
}
|