From 2cd903e89dd8768af3cdb66b4e1619107dbc2833 Mon Sep 17 00:00:00 2001 From: hailin Date: Thu, 18 Sep 2025 12:26:40 +0800 Subject: [PATCH] . --- Dockerfile | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/Dockerfile b/Dockerfile index 0ebb798ef..d814f5c8a 100644 --- a/Dockerfile +++ b/Dockerfile @@ -59,20 +59,19 @@ RUN set -e && \ find /tmp/torch_dist -name 'torch-*.whl' -print | xargs -r python3 -m pip install --no-cache-dir - # ── 编译 torchvision 0.22.1 (依赖本地 torch) ──────────────────────────────── WORKDIR /opt RUN git clone -b v0.22.1 https://github.com/pytorch/vision.git WORKDIR /opt/vision RUN python3 setup.py bdist_wheel + # ── 编译 flashinfer (主分支支持 torch 2.7 / cu126) ───────────────────────── WORKDIR /opt RUN git clone --recursive -b v0.3.1 https://github.com/flashinfer-ai/flashinfer.git WORKDIR /opt/flashinfer - # 覆盖你的目标算力:3090=8.6,4090=8.9,H100=9.0a;可按需增/减 ENV FLASHINFER_CUDA_ARCH_LIST="7.5 8.0 8.6 8.9 9.0a" @@ -83,11 +82,7 @@ RUN python3 -m pip install --no-cache-dir numpy requests build "cuda-python>=12. ls -lh dist/ - -# RUN pip install . && \ -# python3 -m pip wheel . --no-deps -w dist/ - -# ── 下载 vllm 预编译 wheel,避免编译 flash-attn ─────────────────────────────── + # ── 下载 vllm 预编译 wheel,避免编译 flash-attn ─────────────────────────────── WORKDIR /opt RUN pip download --only-binary=:all: --no-deps vllm==0.9.1 -d /tmp/vllm_wheels @@ -99,8 +94,15 @@ RUN python3 -m pip install ".[srt,openai]" --no-build-isolation && \ python3 -m pip wheel ".[srt,openai]" --no-deps -w /tmp/sg_wheels -# ── 🔄 下载 sgl-kernel(与 sglang 同步)─────────────────────────────────────── -RUN pip download --only-binary=:all: --no-deps sgl-kernel==0.3.9.post2 -d /tmp/sgl_kernel_wheels +# # ── 🔄 下载 sgl-kernel(与 sglang 同步)─────────────────────────────────────── +# RUN pip download --only-binary=:all: --no-deps sgl-kernel==0.3.9.post2 -d /tmp/sgl_kernel_wheels + +# ── 用你本地源码编 sgl-kernel==0.3.9.post2(与自编 torch 完全 ABI 对齐) ────── +WORKDIR /sgl/sglang/sgl-kernel +RUN bash -lc 'export CMAKE_PREFIX_PATH="$(python3 -c "import torch; print(torch.utils.cmake_prefix_path)")" \ + && export TORCH_CUDA_ARCH_LIST="7.5;8.0;8.6;8.9;9.0" SGL_KERNEL_COMPILE_THREADS=1 CMAKE_BUILD_PARALLEL_LEVEL=${MAX_JOBS:-96} FORCE_CUDA=1 \ + && python3 -m pip wheel . --no-deps --no-build-isolation -w /tmp/sgl_kernel_wheels' + # ── 收集所有 wheel 到 /wheels ────────────────────────────────────────────── RUN mkdir -p /wheels && \ @@ -211,9 +213,6 @@ RUN ls -lh /tmp/wheels && \ python3 -c "import gradio, sys; print('✅ Gradio version =', gradio.__version__)" && \ rm -rf /tmp/wheels -# RUN PIP_NO_INDEX= PIP_FIND_LINKS= python3 -m pip install --no-cache-dir --no-deps \ -# openai-harmony==0.0.4 \ -# flashinfer-python==0.3.1 # ✅ 安装 Prometheus client RUN python3 -m pip install --no-cache-dir prometheus_client