diff --git a/README.txt b/README.txt index cb4374a..22d69c5 100644 --- a/README.txt +++ b/README.txt @@ -1,8 +1,9 @@ -mamba env create -f hf-train-cu118.yaml --strict-channel-priority +mamba env create -f env.yaml --strict-channel-priority mamba activate hf-train-cu118 +python -m pip install --no-deps -r requirements-hf.txt -pip install --upgrade pip -pip install --no-deps -r requirements-hf.txt --constraint constraints-cu118.txt - -# 需要 deepspeed 时再装: -DS_BUILD_OPS=0 pip install "deepspeed==0.14.*" # 先不编译 CUDA 内核 +# deepspeed(选一) +DS_BUILD_OPS=0 python -m pip install "deepspeed==0.14.*" +# 或 +mamba install -y cuda-toolkit=11.8 cmake ninja +python -m pip install "deepspeed==0.14.*" \ No newline at end of file diff --git a/constraints-cu118.txt b/constraints-cu118.txt deleted file mode 100644 index 53947a6..0000000 --- a/constraints-cu118.txt +++ /dev/null @@ -1,3 +0,0 @@ -torch==2.1.2 -torchvision==0.16.2 -torchaudio==2.1.2 diff --git a/hf-train-cu118.yaml b/hf-train-cu118.yaml index 107cabe..3debbd6 100644 --- a/hf-train-cu118.yaml +++ b/hf-train-cu118.yaml @@ -1,32 +1,31 @@ name: hf-train-cu118 -channels: - - pytorch - - nvidia - - conda-forge +channels: [pytorch, nvidia, conda-forge] dependencies: - python=3.10 - pip - # ---- Torch 栈:固定 2.1.2 + cu118 ---- + # ---- Torch 栈:2.1.2 + cu118 ---- - pytorch=2.1.2 - torchvision=0.16.2 - torchaudio=2.1.2 - pytorch-cuda=11.8 - # ---- 避坑:Numpy 钉在 1.26.* ---- + + # ---- 数值栈 ---- - numpy=1.26.* - # ---- 常用科学/系统库 ---- - pandas - scipy - pyarrow - - uvicorn - - git - # ---- HF 主栈 + 其运行时依赖(全部走 conda,不让 pip 动依赖)---- - - transformers>=4.40 + + # ---- HF 主栈 ---- + - transformers>=4.40,<5 - accelerate>=0.30 - datasets>=2.18 - evaluate>=0.4 - safetensors>=0.4 - sentencepiece>=0.1.99 - tokenizers=0.19.* + + # ---- 依赖/工具 ---- + - protobuf<5 - huggingface_hub>=0.23 - tqdm>=4.66 - scikit-learn>=1.4 @@ -41,4 +40,9 @@ dependencies: - xxhash - aiohttp - psutil - + # 可选(按需) + # - einops + # - ninja + # - cmake + # - xformers==0.0.22.post3 # 若真需要 + # - fastapi # 若要配合 uvicorn 跑服务 diff --git a/requirements-hf.txt b/requirements-hf.txt index da833f0..ba5987e 100644 --- a/requirements-hf.txt +++ b/requirements-hf.txt @@ -1,15 +1,5 @@ -# requirements-hf.txt (HF 生态) -transformers>=4.40,<5.0 -accelerate>=0.30 -datasets>=2.18 -evaluate>=0.4 -safetensors>=0.4 -sentencepiece>=0.1.99 -tokenizers>=0.19,<0.21 -huggingface_hub>=0.23 -tqdm>=4.66 peft>=0.11 bitsandbytes>=0.43 -tensorboard>=2.16 -scikit-learn>=1.4 -# deepspeed 单独装,别放进来 +# 可选: +# xformers==0.0.22.post3 +# flash-attn \ No newline at end of file