This commit is contained in:
hailin 2025-09-09 18:16:46 +08:00
parent a44b1549fa
commit e79703129a
4 changed files with 466 additions and 286 deletions

View File

@ -2,7 +2,7 @@ WANDB_BASE_URL=https://wandb.szaiai.com
WANDB_API_KEY=local-701636f51b4741d3862007df5cf7f12cca53d8d1
WANDB_PROJECT=ds-qwen3
WANDB_ENTITY=hailin
WANDB_GROUP=q3-32b-ds4-2025-09-05
WANDB_GROUP=q3-32b-ds4-2025-09-04
WANDB_NAME=q3-32b-lr2e-5-train2
WANDB_RESUME=allow
WANDB_INIT_TIMEOUT=300

View File

@ -1,4 +1,4 @@
deepspeed --hostfile hostfile \
FORCE_COLOR=1 deepspeed --hostfile hostfile \
--num_nodes 6 --num_gpus 4 \
train_sft_lora.py \
--model_name_or_path /home/test/Qwen3-32B \
@ -11,6 +11,5 @@ deepspeed --hostfile hostfile \
--learning_rate 1e-4 \
--warmup_ratio 0.03 \
--lora_r 16 --lora_alpha 32 --lora_dropout 0.05 \
--lora_target auto \
--deepspeed /home/test/jd_train/ds_config_zero3_lora.json \
--report_to wandb --wandb_project ds-qwen3-lora

File diff suppressed because it is too large Load Diff

View File

@ -3,7 +3,7 @@ export WANDB_BASE_URL=https://wandb.szaiai.com
export WANDB_API_KEY=local-701636f51b4741d3862007df5cf7f12cca53d8d1
export WANDB_PROJECT=ds-qwen3
export WANDB_GROUP=q3-32b-ds4-2025-09-04 # 如果训练时没用 WANDB_RUN_GROUP这里只是“期望值”
export MATCH_NAME_REGEX='q3-32b-ds4($|/|-)' # 回退方案:按名字匹配
export MATCH_NAME_REGEX='^q3-32b-ds4' # 回退方案:按名字匹配
python3 - <<'PY'
import os, re, wandb