diff --git a/Dockerfile b/Dockerfile index 8f1e00b09..8a3cee8e3 100644 --- a/Dockerfile +++ b/Dockerfile @@ -91,7 +91,7 @@ WORKDIR /opt/flashinfer # 覆盖你的目标算力:3090=8.6,4090=8.9,H100=9.0a;可按需增/减 -ENV FLASHINFER_CUDA_ARCH_LIST="8.0;8.6;8.9" +ENV FLASHINFER_CUDA_ARCH_LIST=8.0,8.6,8.9 # 先做 AOT 预编译,再直接打 wheel(不隔离,使用同一份自编 torch) RUN python3 -m pip install --no-cache-dir numpy requests build "cuda-python>=12.0,<13" "nvidia-nvshmem-cu12" ninja pynvml && \