############################################################################### # Stage 0 ─ builder-torch:编译 PyTorch 2.7.1 (+cu126) ############################################################################### ARG CUDA_VERSION=12.6.1 FROM nvidia/cuda:${CUDA_VERSION}-devel-ubuntu22.04 AS builder-torch ARG MAX_JOBS=24 # 按机器核心数调整 ENV DEBIAN_FRONTEND=noninteractive \ PYTHONUNBUFFERED=1 LANG=C.UTF-8 LC_ALL=C.UTF-8 \ USE_CUDA=1 USE_DISTRIBUTED=0 BUILD_TEST=0 TORCH_CUDA_ARCH_LIST="7.5;8.0;8.6;9.0" RUN apt-get update && apt-get install -y --no-install-recommends \ python3 python3-pip python3-distutils git cmake ninja-build \ libopenblas-dev libopenmpi-dev \ libnccl2=2.22.3-1+cuda12.6 \ libnccl-dev=2.22.3-1+cuda12.6 \ libjpeg-dev libpng-dev ca-certificates && \ python3 -m pip install --no-cache-dir --upgrade pip wheel setuptools WORKDIR /opt RUN git clone --recursive -b v2.7.1 https://github.com/pytorch/pytorch.git WORKDIR /opt/pytorch ENV MAX_JOBS=${MAX_JOBS} RUN python3 setup.py bdist_wheel # ≈50‒60 min 首编 ############################################################################### # Stage 1 ─ builder-extras:用自编 Torch 装 TV / flashinfer / sglang,并收集轮子 ############################################################################### ARG CUDA_VERSION=12.6.1 FROM nvidia/cuda:${CUDA_VERSION}-devel-ubuntu22.04 AS builder-extras ENV DEBIAN_FRONTEND=noninteractive PYTHONUNBUFFERED=1 LANG=C.UTF-8 LC_ALL=C.UTF-8 RUN apt-get update && apt-get install -y --no-install-recommends \ python3 python3-pip python3-distutils git build-essential \ cmake ninja-build libjpeg-dev libpng-dev ca-certificates \ libopenmpi-dev \ libnccl2=2.22.3-1+cuda12.6 \ libnccl-dev=2.22.3-1+cuda12.6 && \ python3 -m pip install --no-cache-dir --upgrade pip wheel setuptools # ── 安装自编 torch 轮子 ────────────────────────────────────────────────────── COPY --from=builder-torch /opt/pytorch/dist/torch-2.7.1+cu126*.whl /tmp/ RUN python3 -m pip install --no-cache-dir /tmp/torch-2.7.1+cu126*.whl && rm /tmp/*.whl # ── 编译 torchvision 0.22.1 (依赖本地 torch) ──────────────────────────────── WORKDIR /opt RUN git clone -b v0.22.1 https://github.com/pytorch/vision.git WORKDIR /opt/vision RUN python3 setup.py bdist_wheel # ── 编译 flashinfer (主分支支持 torch 2.7 / cu126) ───────────────────────── WORKDIR /opt RUN git clone https://github.com/Dao-AILab/flashinfer.git WORKDIR /opt/flashinfer/python RUN python3 setup.py bdist_wheel # ── 编译你本地 sglang 源码并打 wheel ─────────────────────────────────────── COPY ./sglang /sgl/sglang WORKDIR /sgl/sglang/python RUN python3 -m pip install ".[srt,openai]" --no-build-isolation && \ python3 -m pip wheel ".[srt,openai]" --no-deps -w /tmp/sg_wheels # ── 收集所有 wheel 到 /wheels ────────────────────────────────────────────── RUN mkdir -p /wheels && \ cp /opt/pytorch/dist/torch-2.7.1+cu126*.whl /wheels/ && \ cp /opt/vision/dist/torchvision-0.22.1+cu126*.whl /wheels/ && \ cp /opt/flashinfer/python/dist/flashinfer_python-*.whl /wheels/ && \ cp /tmp/sg_wheels/sglang-*.whl /wheels/ ############################################################################### # Stage 2 ─ runtime:极简运行镜像,仅离线安装 wheel ############################################################################### ARG CUDA_VERSION=12.6.1 FROM nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu22.04 ENV DEBIAN_FRONTEND=noninteractive PYTHONUNBUFFERED=1 LANG=C.UTF-8 LC_ALL=C.UTF-8 RUN apt-get update && apt-get install -y --no-install-recommends \ python3 python3-pip python3-distutils ca-certificates && \ rm -rf /var/lib/apt/lists/* && \ python3 -m pip install --no-cache-dir --upgrade pip COPY --from=builder-extras /wheels /tmp/wheels RUN python3 -m pip install --no-cache-dir /tmp/wheels/* && rm -rf /tmp/wheels # 仅做 CI / CPU 自检 —— 输出帮助后退出 0 CMD ["python3", "-m", "sglang.launch_server", "--help"]