This commit is contained in:
parent
1464202d78
commit
2cd903e89d
23
Dockerfile
23
Dockerfile
|
|
@ -59,20 +59,19 @@ RUN set -e && \
|
|||
find /tmp/torch_dist -name 'torch-*.whl' -print | xargs -r python3 -m pip install --no-cache-dir
|
||||
|
||||
|
||||
|
||||
# ── 编译 torchvision 0.22.1 (依赖本地 torch) ────────────────────────────────
|
||||
WORKDIR /opt
|
||||
RUN git clone -b v0.22.1 https://github.com/pytorch/vision.git
|
||||
WORKDIR /opt/vision
|
||||
RUN python3 setup.py bdist_wheel
|
||||
|
||||
|
||||
# ── 编译 flashinfer (主分支支持 torch 2.7 / cu126) ─────────────────────────
|
||||
WORKDIR /opt
|
||||
RUN git clone --recursive -b v0.3.1 https://github.com/flashinfer-ai/flashinfer.git
|
||||
WORKDIR /opt/flashinfer
|
||||
|
||||
|
||||
|
||||
# 覆盖你的目标算力:3090=8.6,4090=8.9,H100=9.0a;可按需增/减
|
||||
ENV FLASHINFER_CUDA_ARCH_LIST="7.5 8.0 8.6 8.9 9.0a"
|
||||
|
||||
|
|
@ -83,11 +82,7 @@ RUN python3 -m pip install --no-cache-dir numpy requests build "cuda-python>=12.
|
|||
ls -lh dist/
|
||||
|
||||
|
||||
|
||||
# RUN pip install . && \
|
||||
# python3 -m pip wheel . --no-deps -w dist/
|
||||
|
||||
# ── 下载 vllm 预编译 wheel,避免编译 flash-attn ───────────────────────────────
|
||||
# ── 下载 vllm 预编译 wheel,避免编译 flash-attn ───────────────────────────────
|
||||
WORKDIR /opt
|
||||
RUN pip download --only-binary=:all: --no-deps vllm==0.9.1 -d /tmp/vllm_wheels
|
||||
|
||||
|
|
@ -99,8 +94,15 @@ RUN python3 -m pip install ".[srt,openai]" --no-build-isolation && \
|
|||
python3 -m pip wheel ".[srt,openai]" --no-deps -w /tmp/sg_wheels
|
||||
|
||||
|
||||
# ── 🔄 下载 sgl-kernel(与 sglang 同步)───────────────────────────────────────
|
||||
RUN pip download --only-binary=:all: --no-deps sgl-kernel==0.3.9.post2 -d /tmp/sgl_kernel_wheels
|
||||
# # ── 🔄 下载 sgl-kernel(与 sglang 同步)───────────────────────────────────────
|
||||
# RUN pip download --only-binary=:all: --no-deps sgl-kernel==0.3.9.post2 -d /tmp/sgl_kernel_wheels
|
||||
|
||||
# ── 用你本地源码编 sgl-kernel==0.3.9.post2(与自编 torch 完全 ABI 对齐) ──────
|
||||
WORKDIR /sgl/sglang/sgl-kernel
|
||||
RUN bash -lc 'export CMAKE_PREFIX_PATH="$(python3 -c "import torch; print(torch.utils.cmake_prefix_path)")" \
|
||||
&& export TORCH_CUDA_ARCH_LIST="7.5;8.0;8.6;8.9;9.0" SGL_KERNEL_COMPILE_THREADS=1 CMAKE_BUILD_PARALLEL_LEVEL=${MAX_JOBS:-96} FORCE_CUDA=1 \
|
||||
&& python3 -m pip wheel . --no-deps --no-build-isolation -w /tmp/sgl_kernel_wheels'
|
||||
|
||||
|
||||
# ── 收集所有 wheel 到 /wheels ──────────────────────────────────────────────
|
||||
RUN mkdir -p /wheels && \
|
||||
|
|
@ -211,9 +213,6 @@ RUN ls -lh /tmp/wheels && \
|
|||
python3 -c "import gradio, sys; print('✅ Gradio version =', gradio.__version__)" && \
|
||||
rm -rf /tmp/wheels
|
||||
|
||||
# RUN PIP_NO_INDEX= PIP_FIND_LINKS= python3 -m pip install --no-cache-dir --no-deps \
|
||||
# openai-harmony==0.0.4 \
|
||||
# flashinfer-python==0.3.1
|
||||
|
||||
# ✅ 安装 Prometheus client
|
||||
RUN python3 -m pip install --no-cache-dir prometheus_client
|
||||
|
|
|
|||
Loading…
Reference in New Issue