From c757ea02fc7c378d9d1682d9d619bcb06f1500a8 Mon Sep 17 00:00:00 2001
From: hailin <hailin@gdzx.xyz>
Date: Sat, 20 Sep 2025 20:42:53 +0800
Subject: [PATCH] .

---
 Dockerfile | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index 8a3cee8e3..a98667cba 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -46,7 +46,7 @@ RUN echo "Building PyTorch with USE_DISTRIBUTED=$USE_DISTRIBUTED" && \
 ARG CUDA_VERSION=12.6.1
 FROM nvidia/cuda:${CUDA_VERSION}-devel-ubuntu22.04 AS builder-extras
 
-ENV TORCH_CUDA_ARCH_LIST="8.0;8.6;8.9"
+ENV TORCH_CUDA_ARCH_LIST=8.0,8.6,8.9
 ENV DEBIAN_FRONTEND=noninteractive PYTHONUNBUFFERED=1 LANG=C.UTF-8 LC_ALL=C.UTF-8
 
 RUN apt-get update && apt-get install -y --no-install-recommends \
@@ -95,10 +95,11 @@ ENV FLASHINFER_CUDA_ARCH_LIST=8.0,8.6,8.9
 
 # 先做 AOT 预编译，再直接打 wheel（不隔离，使用同一份自编 torch）
 RUN python3 -m pip install --no-cache-dir numpy requests build "cuda-python>=12.0,<13" "nvidia-nvshmem-cu12" ninja pynvml && \
-    python3 -m flashinfer.aot && \
+    bash -lc 'unset TORCH_CUDA_ARCH_LIST; \
+              FLASHINFER_CUDA_ARCH_LIST=8.0,8.6,8.9 python3 -m flashinfer.aot' && \
     python3 -m build --no-isolation --wheel && \
-    ls -lh dist/ \
-    && python3 -m pip install --no-cache-dir --no-deps dist/*.whl
+    ls -lh dist/ && \
+    python3 -m pip install --no-cache-dir --no-deps dist/*.whl
 
 COPY ./sglang /sgl/sglang