diff --git a/mm-zero3.sh b/mm-zero3.sh index 487c963..c8e9578 100755 --- a/mm-zero3.sh +++ b/mm-zero3.sh @@ -10,6 +10,7 @@ deepspeed --hostfile hostfile \ --learning_rate 2e-5 --weight_decay 0.1 --warmup_ratio 0.02 \ --max_steps 62 \ --log_interval 1 \ + --gradient_checkpointing \ --bf16 \ --deepspeed /home/test/jd_train/ds_config_zero3.json \ --report_to none \