diff --git a/merge_zero3_safetensors.sh b/merge_zero3_safetensors.sh index c55d23b..92ca08d 100755 --- a/merge_zero3_safetensors.sh +++ b/merge_zero3_safetensors.sh @@ -2,8 +2,8 @@ set -euo pipefail # ===== 可调参数 ===== -CKPT_ROOT="/home/test/checkpoints/q3-32b-lora" # 若实际是 .../checkpoint-62/global_step62,请把 CKPT_ROOT 改成 .../checkpoint-62 -TAG="global_step110" +CKPT_ROOT="/home/test/checkpoints/q3-32b-ds4/checkpoint-60" # 若实际是 .../checkpoint-62/global_step62,请把 CKPT_ROOT 改成 .../checkpoint-62 +TAG="global_step60" HOSTS=(tn01 tn02 tn03 tn04 tn05 tn06) AGGREGATOR_HOST="tn06" # 本脚本运行/汇总所在机器 EXPECTED_SHARDS_PER_HOST=4 # 每机应写出分片数(按你的并行布局)