.

2025-09-20 20:42:53 +08:00 · 2025-09-20 20:42:53 +08:00 · c757ea02fc
parent cd9aa7b98b
commit c757ea02fc
1 changed files with 5 additions and 4 deletions
--- a/9
+++ b/9
@ -46,7 +46,7 @@ RUN echo "Building PyTorch with USE_DISTRIBUTED=$USE_DISTRIBUTED" && \
 ARG CUDA_VERSION=12.6.1
 FROM nvidia/cuda:${CUDA_VERSION}-devel-ubuntu22.04 AS builder-extras

-ENV TORCH_CUDA_ARCH_LIST="8.0;8.6;8.9"
+ENV TORCH_CUDA_ARCH_LIST=8.0,8.6,8.9
 ENV DEBIAN_FRONTEND=noninteractive PYTHONUNBUFFERED=1 LANG=C.UTF-8 LC_ALL=C.UTF-8

 RUN apt-get update && apt-get install -y --no-install-recommends \
@ -95,10 +95,11 @@ ENV FLASHINFER_CUDA_ARCH_LIST=8.0,8.6,8.9

 # 先做 AOT 预编译，再直接打 wheel（不隔离，使用同一份自编 torch）
 RUN python3 -m pip install --no-cache-dir numpy requests build "cuda-python>=12.0,<13" "nvidia-nvshmem-cu12" ninja pynvml && \
-    python3 -m flashinfer.aot && \
+    bash -lc 'unset TORCH_CUDA_ARCH_LIST; \
+              FLASHINFER_CUDA_ARCH_LIST=8.0,8.6,8.9 python3 -m flashinfer.aot' && \
    python3 -m build --no-isolation --wheel && \
-    ls -lh dist/ \
-    && python3 -m pip install --no-cache-dir --no-deps dist/*.whl
+    ls -lh dist/ && \
+    python3 -m pip install --no-cache-dir --no-deps dist/*.whl

 COPY ./sglang /sgl/sglang