sglang.0.4.8.post1/Dockerfile

59 lines
2.0 KiB
Docker

############################
# Stage-0: 构建依赖轮子 #
############################
ARG CUDA_VERSION=12.8.1
FROM nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu22.04 AS builder
# ---- Python & pip ----
RUN apt-get update && \
apt-get install -y --no-install-recommends python3 python3-pip && \
ln -sf /usr/bin/python3 /usr/bin/python && \
python -m pip install --no-cache-dir --upgrade pip
# ---- PyTorch + SGLang ----
ARG TORCH_VER=2.7.1
ARG TV_VER=0.22.1
RUN case "$CUDA_VERSION" in \
12.6.1) CUINDEX=126 ;; \
12.8.1) CUINDEX=128 ;; \
*) echo "Unsupported CUDA version: $CUDA_VERSION" && exit 1 ;; \
esac && \
python -m pip install --no-cache-dir \
torch==${TORCH_VER}+cu${CUINDEX} \
torchvision==${TV_VER}+cu${CUINDEX} \
--extra-index-url https://download.pytorch.org/whl/cu${CUINDEX} && \
python -m pip install --no-cache-dir sglang==0.4.8.post1 && \
# CUDA 12.x 需要新版 NCCL
python -m pip install --no-cache-dir nvidia-nccl-cu12==2.27.3 --force-reinstall --no-deps && \
# ✅ 直接用 PyPI 上带 cp310 标签的二进制轮子,省去编译
python -m pip install --no-cache-dir sgl-kernel==0.0.2.post17
############################
# Stage-1: 运行镜像 #
############################
FROM nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu22.04
ENV DEBIAN_FRONTEND=noninteractive \
PYTHONUNBUFFERED=1
RUN apt-get update && \
apt-get install -y --no-install-recommends python3 python3-distutils && \
ln -sf /usr/bin/python3 /usr/bin/python && \
rm -rf /var/lib/apt/lists/*
# 复制依赖
COPY --from=builder /usr/local/lib/python3.*/dist-packages /usr/local/lib/python3.*/dist-packages
COPY --from=builder /usr/local/bin /usr/local/bin
# 拷贝模型(示例)
COPY ./Alibaba/Qwen3-8B /root/.cradle/Alibaba/Qwen3-8B
EXPOSE 30000
CMD ["python3", "-m", "sglang.launch_server", \
"--host", "0.0.0.0", \
"--port", "30000", \
"--model-path", "/root/.cradle/Alibaba/Qwen3-8B/", \
"--tp", "1", \
"--api-key", "token-abc123"]