This commit is contained in:
hailin 2025-07-02 18:58:05 +08:00
parent c4142cc566
commit 32f02ee68e
1 changed files with 11 additions and 4 deletions

View File

@ -15,7 +15,7 @@ ENV USE_CUDA=1 \
ARG MAX_JOBS=90
ENV DEBIAN_FRONTEND=noninteractive \
PYTHONUNBUFFERED=1 LANG=C.UTF-8 LC_ALL=C.UTF-8 \
USE_CUDA=1 USE_DISTRIBUTED=0 BUILD_TEST=0 TORCH_CUDA_ARCH_LIST="7.5;8.0;8.6;9.0"
TORCH_CUDA_ARCH_LIST="7.5;8.0;8.6;9.0"
RUN apt-get update && apt-get install -y --no-install-recommends \
python3 python3-dev python3-pip python3-distutils git cmake ninja-build \
@ -30,7 +30,9 @@ RUN git clone --recursive -b v2.7.1 https://github.com/pytorch/pytorch.git
WORKDIR /opt/pytorch
ENV MAX_JOBS=${MAX_JOBS}
RUN python3 setup.py bdist_wheel # ≈5060 min 首编
RUN echo "Building PyTorch with USE_DISTRIBUTED=$USE_DISTRIBUTED" && \
python3 setup.py bdist_wheel && \
python3 -c "from torch.distributed import Backend; print('✅ Build success. GLOO =', Backend.GLOO)"
###############################################################################
# Stage 1 ─ builder-extras用自编 Torch 装 TV / flashinfer / sglang并收集轮子
@ -105,7 +107,12 @@ COPY --from=builder-extras /usr/local/cuda/lib64/libcupti.so /usr/lib/x86_64-lin
RUN ldconfig
COPY --from=builder-extras /wheels /tmp/wheels
RUN python3 -m pip install --no-cache-dir /tmp/wheels/* && rm -rf /tmp/wheels
#RUN python3 -m pip install --no-cache-dir /tmp/wheels/* && rm -rf /tmp/wheels
# ✅ 优先装你自编的 torch避免被 PyPI 上的覆盖
RUN python3 -m pip install --no-cache-dir /tmp/wheels/torch*.whl && \
python3 -m pip install --no-cache-dir /tmp/wheels/* && \
python3 -c "from torch.distributed import Backend; print('✅ Runtime torch distributed OK, GLOO =', Backend.GLOO)" && \
rm -rf /tmp/wheels
# 安装运行时漏掉的依赖
RUN python3 -m pip install --no-cache-dir pydantic orjson psutil pyzmq pynvml transformers==4.48.3 uvicorn fastapi IPython aiohttp setproctitle uvloop sentencepiece triton