This commit is contained in:
hailin 2025-09-20 20:42:53 +08:00
parent cd9aa7b98b
commit c757ea02fc
1 changed files with 5 additions and 4 deletions

View File

@ -46,7 +46,7 @@ RUN echo "Building PyTorch with USE_DISTRIBUTED=$USE_DISTRIBUTED" && \
ARG CUDA_VERSION=12.6.1
FROM nvidia/cuda:${CUDA_VERSION}-devel-ubuntu22.04 AS builder-extras
ENV TORCH_CUDA_ARCH_LIST="8.0;8.6;8.9"
ENV TORCH_CUDA_ARCH_LIST=8.0,8.6,8.9
ENV DEBIAN_FRONTEND=noninteractive PYTHONUNBUFFERED=1 LANG=C.UTF-8 LC_ALL=C.UTF-8
RUN apt-get update && apt-get install -y --no-install-recommends \
@ -95,10 +95,11 @@ ENV FLASHINFER_CUDA_ARCH_LIST=8.0,8.6,8.9
# 先做 AOT 预编译,再直接打 wheel不隔离使用同一份自编 torch
RUN python3 -m pip install --no-cache-dir numpy requests build "cuda-python>=12.0,<13" "nvidia-nvshmem-cu12" ninja pynvml && \
python3 -m flashinfer.aot && \
bash -lc 'unset TORCH_CUDA_ARCH_LIST; \
FLASHINFER_CUDA_ARCH_LIST=8.0,8.6,8.9 python3 -m flashinfer.aot' && \
python3 -m build --no-isolation --wheel && \
ls -lh dist/ \
&& python3 -m pip install --no-cache-dir --no-deps dist/*.whl
ls -lh dist/ && \
python3 -m pip install --no-cache-dir --no-deps dist/*.whl
COPY ./sglang /sgl/sglang