From f0c606c5d167d52056d4dfef7ed4c98e922db862 Mon Sep 17 00:00:00 2001 From: hailin Date: Tue, 9 Sep 2025 09:24:23 +0800 Subject: [PATCH] . --- .deepspeed_env | 2 +- mm-zero3.sh | 4 ++-- train_sft_lora.py | 3 --- 3 files changed, 3 insertions(+), 6 deletions(-) diff --git a/.deepspeed_env b/.deepspeed_env index a6ec8af..3a81de0 100644 --- a/.deepspeed_env +++ b/.deepspeed_env @@ -2,6 +2,6 @@ WANDB_BASE_URL=https://wandb.szaiai.com WANDB_API_KEY=local-701636f51b4741d3862007df5cf7f12cca53d8d1 WANDB_PROJECT=ds-qwen3 WANDB_GROUP=q3-1.7b-ds4-2025-09-05 -WANDB_RUN_ID=q3-31.7b-lr2e-5-train1 +WANDB_RUN_ID=q3-31.7b-lr2e-5-train2 WANDB_RESUME=allow export WANDB_DIR=/tmp/$USER/wandb diff --git a/mm-zero3.sh b/mm-zero3.sh index b58c261..054bcca 100755 --- a/mm-zero3.sh +++ b/mm-zero3.sh @@ -31,11 +31,11 @@ deepspeed --hostfile hostfile \ --model_name_or_path /home/test/Qwen3-32B \ --data_glob "/home/test/datasets/my_corpus/train.jsonl" \ --output_dir /home/test/checkpoints/q3-32b-ds4 \ - --seq_len 1024 \ + --seq_len 512 \ --per_device_train_batch_size 1 \ --gradient_accumulation_steps 1 \ --learning_rate 2e-5 --weight_decay 0.1 --warmup_ratio 0.02 \ - --max_steps 62 \ + --max_steps 20 \ --log_interval 1 \ --gradient_checkpointing \ --bf16 \ diff --git a/train_sft_lora.py b/train_sft_lora.py index c6fa521..107f9bc 100644 --- a/train_sft_lora.py +++ b/train_sft_lora.py @@ -1,6 +1,3 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- - import os os.environ.pop("PYTHONNOUSERSITE", None) os.environ.setdefault("TOKENIZERS_PARALLELISM", "false")