From 2cd903e89dd8768af3cdb66b4e1619107dbc2833 Mon Sep 17 00:00:00 2001
From: hailin <hailin@gdzx.xyz>
Date: Thu, 18 Sep 2025 12:26:40 +0800
Subject: [PATCH] .

---
 Dockerfile | 23 +++++++++++------------
 1 file changed, 11 insertions(+), 12 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index 0ebb798ef..d814f5c8a 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -59,20 +59,19 @@ RUN set -e && \
     find /tmp/torch_dist -name 'torch-*.whl' -print | xargs -r python3 -m pip install --no-cache-dir
 
 
-
 # ── 编译 torchvision 0.22.1 (依赖本地 torch) ────────────────────────────────
 WORKDIR /opt
 RUN git clone -b v0.22.1 https://github.com/pytorch/vision.git
 WORKDIR /opt/vision
 RUN python3 setup.py bdist_wheel
 
+
 # ── 编译 flashinfer (主分支支持 torch 2.7 / cu126) ─────────────────────────
 WORKDIR /opt
 RUN git clone  --recursive -b v0.3.1 https://github.com/flashinfer-ai/flashinfer.git
 WORKDIR /opt/flashinfer
 
 
-
 # 覆盖你的目标算力：3090=8.6，4090=8.9，H100=9.0a；可按需增/减
 ENV FLASHINFER_CUDA_ARCH_LIST="7.5 8.0 8.6 8.9 9.0a"
 
@@ -83,11 +82,7 @@ RUN python3 -m pip install --no-cache-dir numpy requests build "cuda-python>=12.
     ls -lh dist/
 
 
-
-# RUN pip install . && \
-#     python3 -m pip wheel . --no-deps -w dist/
-
-# ── 下载 vllm 预编译 wheel，避免编译 flash-attn ───────────────────────────────
+    # ── 下载 vllm 预编译 wheel，避免编译 flash-attn ───────────────────────────────
 WORKDIR /opt
 RUN pip download --only-binary=:all: --no-deps vllm==0.9.1 -d /tmp/vllm_wheels
 
@@ -99,8 +94,15 @@ RUN python3 -m pip install ".[srt,openai]" --no-build-isolation && \
     python3 -m pip wheel ".[srt,openai]" --no-deps -w /tmp/sg_wheels
 
 
-# ── 🔄 下载 sgl-kernel（与 sglang 同步）───────────────────────────────────────
-RUN pip download --only-binary=:all: --no-deps sgl-kernel==0.3.9.post2 -d /tmp/sgl_kernel_wheels
+# # ── 🔄 下载 sgl-kernel（与 sglang 同步）───────────────────────────────────────
+# RUN pip download --only-binary=:all: --no-deps sgl-kernel==0.3.9.post2 -d /tmp/sgl_kernel_wheels
+
+# ── 用你本地源码编 sgl-kernel==0.3.9.post2（与自编 torch 完全 ABI 对齐） ──────
+WORKDIR /sgl/sglang/sgl-kernel
+RUN bash -lc 'export CMAKE_PREFIX_PATH="$(python3 -c "import torch; print(torch.utils.cmake_prefix_path)")" \
+  && export TORCH_CUDA_ARCH_LIST="7.5;8.0;8.6;8.9;9.0" SGL_KERNEL_COMPILE_THREADS=1 CMAKE_BUILD_PARALLEL_LEVEL=${MAX_JOBS:-96} FORCE_CUDA=1 \
+  && python3 -m pip wheel . --no-deps --no-build-isolation -w /tmp/sgl_kernel_wheels'
+
 
 # ── 收集所有 wheel 到 /wheels ──────────────────────────────────────────────
 RUN mkdir -p /wheels && \
@@ -211,9 +213,6 @@ RUN ls -lh /tmp/wheels && \
     python3 -c "import gradio, sys; print('✅ Gradio version =', gradio.__version__)" && \
     rm -rf /tmp/wheels
 
-# RUN PIP_NO_INDEX= PIP_FIND_LINKS= python3 -m pip install --no-cache-dir --no-deps \
-#     openai-harmony==0.0.4 \
-#     flashinfer-python==0.3.1
 
 # ✅ 安装 Prometheus client
 RUN python3 -m pip install --no-cache-dir prometheus_client