63 lines
2.3 KiB
Docker
63 lines
2.3 KiB
Docker
############################
|
|
# Stage-0: 构建镜像 #
|
|
############################
|
|
ARG CUDA_VERSION=12.8.1 # 仅 12.6.1 / 12.8.1 经测试可用
|
|
FROM nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu22.04 AS builder
|
|
|
|
# ---- Python & pip ----
|
|
RUN apt-get update && \
|
|
apt-get install -y --no-install-recommends python3 python3-pip && \
|
|
ln -sf /usr/bin/python3 /usr/bin/python && \
|
|
python -m pip install --no-cache-dir --upgrade pip
|
|
|
|
# ---- 安装 PyTorch + SGLang ----
|
|
ARG TORCH_VER=2.7.1 # cu128 轮子从 2.7.* 开始提供
|
|
ARG TV_VER=0.22.1 # 0.22.1 与 torch-2.7.1 完全兼容
|
|
RUN case "$CUDA_VERSION" in \
|
|
12.6.1) CUINDEX=126 ;; \
|
|
12.8.1) CUINDEX=128 ;; \
|
|
*) echo "Unsupported CUDA version: $CUDA_VERSION" && exit 1 ;; \
|
|
esac && \
|
|
python -m pip install --no-cache-dir \
|
|
torch==${TORCH_VER}+cu${CUINDEX} \
|
|
torchvision==${TV_VER}+cu${CUINDEX} \
|
|
--extra-index-url https://download.pytorch.org/whl/cu${CUINDEX} && \
|
|
python -m pip install --no-cache-dir sglang==0.4.8.post1 && \
|
|
# 12.8 额外装官方 sgl_kernel & NCCL
|
|
python -m pip install --no-cache-dir nvidia-nccl-cu12==2.27.3 --force-reinstall --no-deps && \
|
|
python -m pip install --no-cache-dir \
|
|
https://github.com/sgl-project/whl/releases/download/v0.2.0/sgl_kernel-0.2.0+cu128-cp310-abi3-manylinux2014_x86_64.whl \
|
|
--force-reinstall --no-deps
|
|
|
|
############################
|
|
# Stage-1: 运行镜像 #
|
|
############################
|
|
FROM nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu22.04
|
|
|
|
ENV DEBIAN_FRONTEND=noninteractive \
|
|
PYTHONUNBUFFERED=1
|
|
|
|
# ---- 基础 Python ----
|
|
RUN apt-get update && \
|
|
apt-get install -y --no-install-recommends python3 python3-distutils && \
|
|
ln -sf /usr/bin/python3 /usr/bin/python && \
|
|
rm -rf /var/lib/apt/lists/*
|
|
|
|
# ---- 复制依赖轮子 ----
|
|
COPY --from=builder /usr/local/lib/python3.*/dist-packages /usr/local/lib/python3.*/dist-packages
|
|
COPY --from=builder /usr/local/bin /usr/local/bin
|
|
|
|
# ---- 拷贝模型权重 ----
|
|
COPY ./Alibaba/Qwen3-8B /root/.cradle/Alibaba/Qwen3-8B
|
|
|
|
# ---- 暴露端口 ----
|
|
EXPOSE 30000
|
|
|
|
# ---- 启动命令 ----
|
|
CMD ["python3", "-m", "sglang.launch_server", \
|
|
"--host", "0.0.0.0", \
|
|
"--port", "30000", \
|
|
"--model-path", "/root/.cradle/Alibaba/Qwen3-8B/", \
|
|
"--tp", "1", \
|
|
"--api-key", "token-abc123"]
|