sglang.0.4.8.post1/Dockerfile

56 lines
2.6 KiB
Docker
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

###############################################################################
# Stage 0 — builderCUDA 12.6.1 + nvcc/gcc用来
# 1) 下载 torch-2.7.1+cu124 / torchvision-0.22.1+cu124 wheel
# 2) 下载 flashinfer-python (torch2.7/cu124) wheel
# 3) 编译本地 sglang 源码并打 wheel
###############################################################################
ARG CUDA_VERSION=12.6.1
ARG CUINDEX=124 # 使用 cu124 官方 wheel
FROM nvidia/cuda:${CUDA_VERSION}-devel-ubuntu22.04 AS builder
ENV DEBIAN_FRONTEND=noninteractive \
PYTHONUNBUFFERED=1 LANG=C.UTF-8 LC_ALL=C.UTF-8
RUN apt-get update && apt-get install -y --no-install-recommends \
python3 python3-pip python3-distutils build-essential git ca-certificates && \
python3 -m pip install --no-cache-dir --upgrade pip setuptools wheel
# ---- ① 预下载 PyTorch / TorchVision cu124 wheel --------------------------
RUN python3 -m pip download --no-deps -d /tmp/wheels \
--index-url https://download.pytorch.org/whl/cu${CUINDEX} \
torch==2.7.1 torchvision==0.22.1
# ---- ② 预下载 flashinfer (torch2.7 / cu124) ------------------------------
RUN python3 -m pip download --no-deps -d /tmp/wheels \
--find-links https://flashinfer.ai/whl/cu${CUINDEX}/torch2.7/flashinfer-python \
flashinfer-python
# ---- ③ 安装依赖,供后续 sglang 编译 --------------------------------------
RUN python3 -m pip install --no-cache-dir /tmp/wheels/*
# ---- ④ COPY 本地 sglang 源码并编 wheel ------------------------------------
COPY ./sglang /sgl-workspace/sglang
WORKDIR /sgl-workspace/sglang/python
RUN python3 -m pip wheel '.[srt,openai]' --no-deps -w /tmp/wheels
###############################################################################
# Stage 1 — runtime极简运行镜像无编译链离线装 wheel
###############################################################################
ARG CUDA_VERSION=12.6.1
FROM nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu22.04
ENV DEBIAN_FRONTEND=noninteractive \
PYTHONUNBUFFERED=1 LANG=C.UTF-8 LC_ALL=C.UTF-8
RUN apt-get update && apt-get install -y --no-install-recommends \
python3 python3-pip python3-distutils ca-certificates && \
rm -rf /var/lib/apt/lists/* && \
python3 -m pip install --no-cache-dir --upgrade pip
# ---- 一次性离线安装 torch / TV / flashinfer / sglang ----------------------
COPY --from=builder /tmp/wheels /tmp/wheels
RUN python3 -m pip install --no-cache-dir /tmp/wheels/* && rm -rf /tmp/wheels
# ---- 仅打印帮助CPU 机器也能跑) ----------------------------------------
CMD ["python3", "-m", "sglang.launch_server", "--help"]