.

2025-08-08 17:43:49 +08:00 · 2025-08-08 17:43:49 +08:00 · 5868300787
parent d0ad05f4fb
commit 5868300787
4 changed files with 26 additions and 34 deletions
--- a/README.txt
+++ b/README.txt
@ -1,8 +1,9 @@
-mamba env create -f hf-train-cu118.yaml --strict-channel-priority
+mamba env create -f env.yaml --strict-channel-priority
 mamba activate hf-train-cu118
+python -m pip install --no-deps -r requirements-hf.txt

-pip install --upgrade pip
-pip install --no-deps -r requirements-hf.txt --constraint constraints-cu118.txt
-
-# 需要 deepspeed 时再装：
-DS_BUILD_OPS=0 pip install "deepspeed==0.14.*"  # 先不编译 CUDA 内核
+# deepspeed（选一）
+DS_BUILD_OPS=0 python -m pip install "deepspeed==0.14.*"
+# 或
+mamba install -y cuda-toolkit=11.8 cmake ninja
+python -m pip install "deepspeed==0.14.*"
--- a/constraints-cu118.txt
+++ b/constraints-cu118.txt
@ -1,3 +0,0 @@
-torch==2.1.2
-torchvision==0.16.2
-torchaudio==2.1.2
--- a/hf-train-cu118.yaml
+++ b/hf-train-cu118.yaml
@ -1,32 +1,31 @@
 name: hf-train-cu118
-channels:
-  - pytorch
-  - nvidia
-  - conda-forge
+channels: [pytorch, nvidia, conda-forge]
 dependencies:
  - python=3.10
  - pip
-  # ---- Torch 栈：固定 2.1.2 + cu118 ----
+  # ---- Torch 栈：2.1.2 + cu118 ----
  - pytorch=2.1.2
  - torchvision=0.16.2
  - torchaudio=2.1.2
  - pytorch-cuda=11.8
-  # ---- 避坑：Numpy 钉在 1.26.* ----
+
+  # ---- 数值栈 ----
  - numpy=1.26.*
-  # ---- 常用科学/系统库 ----
  - pandas
  - scipy
  - pyarrow
-  - uvicorn
-  - git
-  # ---- HF 主栈 + 其运行时依赖（全部走 conda，不让 pip 动依赖）----
-  - transformers>=4.40
+
+  # ---- HF 主栈 ----
+  - transformers>=4.40,<5
  - accelerate>=0.30
  - datasets>=2.18
  - evaluate>=0.4
  - safetensors>=0.4
  - sentencepiece>=0.1.99
  - tokenizers=0.19.*
+
+  # ---- 依赖/工具 ----
+  - protobuf<5
  - huggingface_hub>=0.23
  - tqdm>=4.66
  - scikit-learn>=1.4
@ -41,4 +40,9 @@ dependencies:
  - xxhash
  - aiohttp
  - psutil
-
+  # 可选（按需）
+  # - einops
+  # - ninja
+  # - cmake
+  # - xformers==0.0.22.post3  # 若真需要
+  # - fastapi                # 若要配合 uvicorn 跑服务
--- a/requirements-hf.txt
+++ b/requirements-hf.txt
@ -1,15 +1,5 @@
-# requirements-hf.txt  （HF 生态）
-transformers>=4.40,<5.0
-accelerate>=0.30
-datasets>=2.18
-evaluate>=0.4
-safetensors>=0.4
-sentencepiece>=0.1.99
-tokenizers>=0.19,<0.21
-huggingface_hub>=0.23
-tqdm>=4.66
 peft>=0.11
 bitsandbytes>=0.43
-tensorboard>=2.16
-scikit-learn>=1.4
-# deepspeed 单独装，别放进来
+# 可选：
+# xformers==0.0.22.post3
+# flash-attn