This commit is contained in:
hailin 2025-08-08 17:43:49 +08:00
parent d0ad05f4fb
commit 5868300787
4 changed files with 26 additions and 34 deletions

View File

@ -1,8 +1,9 @@
mamba env create -f hf-train-cu118.yaml --strict-channel-priority mamba env create -f env.yaml --strict-channel-priority
mamba activate hf-train-cu118 mamba activate hf-train-cu118
python -m pip install --no-deps -r requirements-hf.txt
pip install --upgrade pip # deepspeed选一
pip install --no-deps -r requirements-hf.txt --constraint constraints-cu118.txt DS_BUILD_OPS=0 python -m pip install "deepspeed==0.14.*"
# 或
# 需要 deepspeed 时再装: mamba install -y cuda-toolkit=11.8 cmake ninja
DS_BUILD_OPS=0 pip install "deepspeed==0.14.*" # 先不编译 CUDA 内核 python -m pip install "deepspeed==0.14.*"

View File

@ -1,3 +0,0 @@
torch==2.1.2
torchvision==0.16.2
torchaudio==2.1.2

View File

@ -1,32 +1,31 @@
name: hf-train-cu118 name: hf-train-cu118
channels: channels: [pytorch, nvidia, conda-forge]
- pytorch
- nvidia
- conda-forge
dependencies: dependencies:
- python=3.10 - python=3.10
- pip - pip
# ---- Torch 栈:固定 2.1.2 + cu118 ---- # ---- Torch 栈:2.1.2 + cu118 ----
- pytorch=2.1.2 - pytorch=2.1.2
- torchvision=0.16.2 - torchvision=0.16.2
- torchaudio=2.1.2 - torchaudio=2.1.2
- pytorch-cuda=11.8 - pytorch-cuda=11.8
# ---- 避坑Numpy 钉在 1.26.* ----
# ---- 数值栈 ----
- numpy=1.26.* - numpy=1.26.*
# ---- 常用科学/系统库 ----
- pandas - pandas
- scipy - scipy
- pyarrow - pyarrow
- uvicorn
- git # ---- HF 主栈 ----
# ---- HF 主栈 + 其运行时依赖(全部走 conda不让 pip 动依赖)---- - transformers>=4.40,<5
- transformers>=4.40
- accelerate>=0.30 - accelerate>=0.30
- datasets>=2.18 - datasets>=2.18
- evaluate>=0.4 - evaluate>=0.4
- safetensors>=0.4 - safetensors>=0.4
- sentencepiece>=0.1.99 - sentencepiece>=0.1.99
- tokenizers=0.19.* - tokenizers=0.19.*
# ---- 依赖/工具 ----
- protobuf<5
- huggingface_hub>=0.23 - huggingface_hub>=0.23
- tqdm>=4.66 - tqdm>=4.66
- scikit-learn>=1.4 - scikit-learn>=1.4
@ -41,4 +40,9 @@ dependencies:
- xxhash - xxhash
- aiohttp - aiohttp
- psutil - psutil
# 可选(按需)
# - einops
# - ninja
# - cmake
# - xformers==0.0.22.post3 # 若真需要
# - fastapi # 若要配合 uvicorn 跑服务

View File

@ -1,15 +1,5 @@
# requirements-hf.txt HF 生态)
transformers>=4.40,<5.0
accelerate>=0.30
datasets>=2.18
evaluate>=0.4
safetensors>=0.4
sentencepiece>=0.1.99
tokenizers>=0.19,<0.21
huggingface_hub>=0.23
tqdm>=4.66
peft>=0.11 peft>=0.11
bitsandbytes>=0.43 bitsandbytes>=0.43
tensorboard>=2.16 # 可选:
scikit-learn>=1.4 # xformers==0.0.22.post3
# deepspeed 单独装,别放进来 # flash-attn