diff --git a/run_ds.sh b/run_ds.sh index fbce445..5e045ef 100755 --- a/run_ds.sh +++ b/run_ds.sh @@ -23,10 +23,10 @@ EVAL_DATA_GLOB="${EVAL_DATA_GLOB:-$HOME/datasets/my_corpus/test.jsonl}" OUTDIR="${OUTDIR:-$HOME/checkpoints/run-qwen3-8b}" SEQ_LEN="${SEQ_LEN:-4096}" LR="${LR:-2e-5}" -GAS="${GAS:-64}" +GAS="${GAS:-1}" LOG_STEPS="${LOG_STEPS:-10}" -SAVE_STEPS="${SAVE_STEPS:-500}" -MAX_STEPS="${MAX_STEPS:-10000}" +SAVE_STEPS="${SAVE_STEPS:-10}" +MAX_STEPS="${MAX_STEPS:-62}" # 轻量校验(只在发起节点做;各 rank 在脚本里也会 mkdir) [ -d "$MODEL_NAME_OR_PATH" ] || { echo "ERR: model not found at $MODEL_NAME_OR_PATH"; exit 1; }