sglang.0.4.8.post1/Dockerfile

###############################################################################
# Stage 0 ─ builder-torch：编译 PyTorch 2.7.1 (+cu126)
###############################################################################
ARG CUDA_VERSION=12.6.1
FROM nvidia/cuda:${CUDA_VERSION}-devel-ubuntu22.04 AS builder-torch

ENV USE_CUDA=1 \
    USE_DISTRIBUTED=1 \
    USE_MPI=1 \
    USE_GLOO=1 \
    USE_NCCL=1 \
    USE_SYSTEM_NCCL=1 \
    BUILD_TEST=0

ARG MAX_JOBS=90
ENV DEBIAN_FRONTEND=noninteractive \
    PYTHONUNBUFFERED=1 LANG=C.UTF-8 LC_ALL=C.UTF-8 \
    TORCH_CUDA_ARCH_LIST="7.5;8.0;8.6;9.0"

RUN apt-get update && apt-get install -y --no-install-recommends \
      python3 python3-dev python3-pip python3-distutils git cmake ninja-build \
      libopenblas-dev libopenmpi-dev \
      libnccl2=2.22.3-1+cuda12.6 \
      libnccl-dev=2.22.3-1+cuda12.6 \
      libjpeg-dev libpng-dev ca-certificates && \
    python3 -m pip install --no-cache-dir --upgrade pip wheel setuptools sympy pyyaml typing-extensions numpy

WORKDIR /opt
RUN git clone --recursive -b v2.7.1 https://github.com/pytorch/pytorch.git

WORKDIR /opt/pytorch
ENV MAX_JOBS=${MAX_JOBS}
RUN echo "Building PyTorch with USE_DISTRIBUTED=$USE_DISTRIBUTED" && \
    python3 setup.py bdist_wheel

###############################################################################
# Stage 1 ─ builder-extras：用自编 Torch 装 TV / flashinfer / sglang，并收集轮子
###############################################################################
ARG CUDA_VERSION=12.6.1
FROM nvidia/cuda:${CUDA_VERSION}-devel-ubuntu22.04 AS builder-extras

ENV DEBIAN_FRONTEND=noninteractive PYTHONUNBUFFERED=1 LANG=C.UTF-8 LC_ALL=C.UTF-8

RUN apt-get update && apt-get install -y --no-install-recommends \
      python3 python3-pip python3-distutils python3.10-dev git build-essential \
      cmake ninja-build libjpeg-dev libpng-dev ca-certificates \
      libopenmpi-dev libopenblas-dev\
      libnccl2=2.22.3-1+cuda12.6 \
      libnccl-dev=2.22.3-1+cuda12.6 && \
    python3 -m pip install --no-cache-dir --upgrade pip wheel setuptools

# ── 安装自编 torch 轮子 ──────────────────────────────────────────────────────
COPY --from=builder-torch /opt/pytorch/dist /tmp/torch_dist
RUN set -e && \
    echo "==> Files in /tmp/torch_dist:" && ls -lh /tmp/torch_dist && \
    find /tmp/torch_dist -name 'torch-*.whl' -print | xargs -r python3 -m pip install --no-cache-dir


# ── 编译 torchvision 0.22.1 (依赖本地 torch) ────────────────────────────────
WORKDIR /opt
RUN git clone -b v0.22.1 https://github.com/pytorch/vision.git
WORKDIR /opt/vision
RUN python3 setup.py bdist_wheel

# ── 编译 flashinfer (主分支支持 torch 2.7 / cu126) ─────────────────────────
WORKDIR /opt
RUN git clone https://github.com/flashinfer-ai/flashinfer.git
WORKDIR /opt/flashinfer

RUN pip install . && \
    python3 -m pip wheel . --no-deps -w dist/


# # ── 安装 vllm（跳过编译，直接装） ─────────────────────────────────────────────
# WORKDIR /opt
# RUN pip install setuptools wheel setuptools_scm && \
#     pip install git+https://github.com/vllm-project/vllm.git@main --no-deps && \
#     python3 -m pip wheel vllm -w /tmp/vllm_wheels --no-deps

# ── 下载 vllm 预编译 wheel，避免编译 flash-attn ───────────────────────────────
WORKDIR /opt
RUN pip download --only-binary=:all: --no-deps vllm==0.9.1 -d /tmp/vllm_wheels

# ── 编译你本地 sglang 源码并打 wheel ───────────────────────────────────────
COPY ./sglang /sgl/sglang
WORKDIR /sgl/sglang/python
RUN python3 -m pip install ".[srt,openai]" --no-build-isolation && \
    python3 -m pip wheel ".[srt,openai]" --no-deps -w /tmp/sg_wheels


# ── 收集所有 wheel 到 /wheels ──────────────────────────────────────────────
RUN mkdir -p /wheels && \
    cp /tmp/torch_dist/torch*.whl /wheels/ && \
    cp /opt/vision/dist/torchvision-*.whl /wheels/ && \
    cp /opt/flashinfer/dist/flashinfer_python-*.whl /wheels/ && \
    cp /tmp/vllm_wheels/vllm-*.whl /wheels/ && \
    cp /tmp/sg_wheels/sglang-*.whl /wheels/ && \
    pip wheel filelock typing-extensions sympy fsspec jinja2 networkx -w /wheels

# ── ✅ 再打包 runtime 阶段必需依赖 ────────────────────────────────────────────
RUN pip wheel \
    pydantic orjson psutil pyzmq pynvml \
    transformers==4.52.0 uvicorn fastapi IPython aiohttp \
    setproctitle uvloop sentencepiece triton pillow cachetools  \
    -w /wheels

###############################################################################
# Stage 2 ─ runtime：极简运行镜像，仅离线安装 wheel
###############################################################################
ARG CUDA_VERSION=12.6.1
FROM nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu22.04

ENV DEBIAN_FRONTEND=noninteractive PYTHONUNBUFFERED=1 LANG=C.UTF-8 LC_ALL=C.UTF-8

RUN apt-get update && apt-get install -y --no-install-recommends \
        python3 python3-pip python3-distutils ca-certificates \
        libopenblas-dev libgomp1 libcupti-dev libnuma1 libopenmpi-dev openmpi-bin libnuma-dev libpng16-16 libjpeg8 && \
    rm -rf /var/lib/apt/lists/* && \
    python3 -m pip install --no-cache-dir --upgrade pip

# 👉 拷贝 cupti 动态库（避免写死版本号）
COPY --from=builder-extras /usr/local/cuda/lib64/libcupti.so.12 /usr/lib/x86_64-linux-gnu/
COPY --from=builder-extras /usr/local/cuda/lib64/libcupti.so /usr/lib/x86_64-linux-gnu/

# 👇建议在后面补上
RUN ldconfig

COPY --from=builder-extras /wheels /tmp/wheels
#RUN python3 -m pip install --no-cache-dir /tmp/wheels/* && rm -rf /tmp/wheels
# ✅ 优先装你自编的 torch，避免被 PyPI 上的覆盖
RUN ls -lh /tmp/wheels && \
    python3 -m pip install --no-cache-dir --no-deps /tmp/wheels/torch*.whl && \
    python3 -m pip install --no-cache-dir --no-deps /tmp/wheels/vllm-*.whl && \
    python3 -m pip install --no-cache-dir --no-deps /tmp/wheels/* && \
    python3 -c "from torch.distributed import Backend; print('✅ Runtime torch distributed OK, GLOO =', Backend.GLOO)" && \
    rm -rf /tmp/wheels

# # 安装运行时漏掉的依赖
# RUN python3 -m pip install --no-cache-dir pydantic orjson psutil pyzmq pynvml transformers==4.48.3 uvicorn fastapi IPython aiohttp setproctitle uvloop sentencepiece triton

# ✅ 离线安装全部依赖（包含所有运行时必需包）
# RUN python3 -m pip install --no-cache-dir --no-deps /tmp/wheels/* && \
#     python3 -c "from torch.distributed import Backend; print('✅ Runtime torch distributed OK, GLOO =', Backend.GLOO)" && \
#     rm -rf /tmp/wheels

# ✅ 添加 Tini（推荐）
ENV TINI_VERSION=v0.19.0
ADD https://github.com/krallin/tini/releases/download/${TINI_VERSION}/tini /tini
RUN chmod +x /tini
ENTRYPOINT ["/tini", "--"]

# 仅做 CI / CPU 自检 —— 输出帮助后退出 0
CMD ["python3", "-m", "sglang.launch_server", "--help"]