.

2025-09-18 12:26:40 +08:00 · 2025-09-18 12:26:40 +08:00 · 2cd903e89d
parent 1464202d78
commit 2cd903e89d
1 changed files with 11 additions and 12 deletions
--- a/23
+++ b/23
@ -59,20 +59,19 @@ RUN set -e && \
    find /tmp/torch_dist -name 'torch-*.whl' -print | xargs -r python3 -m pip install --no-cache-dir


-
 # ── 编译 torchvision 0.22.1 (依赖本地 torch) ────────────────────────────────
 WORKDIR /opt
 RUN git clone -b v0.22.1 https://github.com/pytorch/vision.git
 WORKDIR /opt/vision
 RUN python3 setup.py bdist_wheel

+
 # ── 编译 flashinfer (主分支支持 torch 2.7 / cu126) ─────────────────────────
 WORKDIR /opt
 RUN git clone  --recursive -b v0.3.1 https://github.com/flashinfer-ai/flashinfer.git
 WORKDIR /opt/flashinfer


-
 # 覆盖你的目标算力：3090=8.6，4090=8.9，H100=9.0a；可按需增/减
 ENV FLASHINFER_CUDA_ARCH_LIST="7.5 8.0 8.6 8.9 9.0a"

@ -83,11 +82,7 @@ RUN python3 -m pip install --no-cache-dir numpy requests build "cuda-python>=12.
    ls -lh dist/


-
-# RUN pip install . && \
-#     python3 -m pip wheel . --no-deps -w dist/
-
-# ── 下载 vllm 预编译 wheel，避免编译 flash-attn ───────────────────────────────
+    # ── 下载 vllm 预编译 wheel，避免编译 flash-attn ───────────────────────────────
 WORKDIR /opt
 RUN pip download --only-binary=:all: --no-deps vllm==0.9.1 -d /tmp/vllm_wheels

@ -99,8 +94,15 @@ RUN python3 -m pip install ".[srt,openai]" --no-build-isolation && \
    python3 -m pip wheel ".[srt,openai]" --no-deps -w /tmp/sg_wheels


-# ── 🔄 下载 sgl-kernel（与 sglang 同步）───────────────────────────────────────
-RUN pip download --only-binary=:all: --no-deps sgl-kernel==0.3.9.post2 -d /tmp/sgl_kernel_wheels
+# # ── 🔄 下载 sgl-kernel（与 sglang 同步）───────────────────────────────────────
+# RUN pip download --only-binary=:all: --no-deps sgl-kernel==0.3.9.post2 -d /tmp/sgl_kernel_wheels
+
+# ── 用你本地源码编 sgl-kernel==0.3.9.post2（与自编 torch 完全 ABI 对齐） ──────
+WORKDIR /sgl/sglang/sgl-kernel
+RUN bash -lc 'export CMAKE_PREFIX_PATH="$(python3 -c "import torch; print(torch.utils.cmake_prefix_path)")" \
+  && export TORCH_CUDA_ARCH_LIST="7.5;8.0;8.6;8.9;9.0" SGL_KERNEL_COMPILE_THREADS=1 CMAKE_BUILD_PARALLEL_LEVEL=${MAX_JOBS:-96} FORCE_CUDA=1 \
+  && python3 -m pip wheel . --no-deps --no-build-isolation -w /tmp/sgl_kernel_wheels'
+

 # ── 收集所有 wheel 到 /wheels ──────────────────────────────────────────────
 RUN mkdir -p /wheels && \
@ -211,9 +213,6 @@ RUN ls -lh /tmp/wheels && \
    python3 -c "import gradio, sys; print('✅ Gradio version =', gradio.__version__)" && \
    rm -rf /tmp/wheels

-# RUN PIP_NO_INDEX= PIP_FIND_LINKS= python3 -m pip install --no-cache-dir --no-deps \
-#     openai-harmony==0.0.4 \
-#     flashinfer-python==0.3.1

 # ✅ 安装 Prometheus client
 RUN python3 -m pip install --no-cache-dir prometheus_client