############################ # Stage-0: 取运行依赖轮子 # ############################ ARG CUDA_VERSION=12.8.1 # 仅 12.6.1 / 12.8.1 经测试可用 FROM nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu22.04 AS builder # ---- Python & pip ---- RUN apt-get update && \ apt-get install -y --no-install-recommends python3 python3-pip && \ ln -sf /usr/bin/python3 /usr/bin/python && \ python -m pip install --no-cache-dir --upgrade pip # ---- 安装 PyTorch + SGLang ---- ARG TORCH_VER=2.3.0 # 如需别的版本自行改 RUN case "$CUDA_VERSION" in \ 12.6.1) CUINDEX=126 ;; \ 12.8.1) CUINDEX=128 ;; \ *) echo "Unsupported CUDA version: $CUDA_VERSION" && exit 1 ;; \ esac && \ python -m pip install --no-cache-dir \ torch==${TORCH_VER}+cu${CUINDEX} \ torchvision==0.18.0+cu${CUINDEX} \ --extra-index-url https://download.pytorch.org/whl/cu${CUINDEX} && \ python -m pip install --no-cache-dir sglang==0.4.8.post1 && \ # 12.8 额外装官方 sgl_kernel & NCCL if [ "$CUDA_VERSION" = "12.8.1" ]; then \ python -m pip install --no-cache-dir nvidia-nccl-cu12==2.27.3 --force-reinstall --no-deps && \ python -m pip install --no-cache-dir \ https://github.com/sgl-project/whl/releases/download/v0.2.0/sgl_kernel-0.2.0+cu128-cp39-abi3-manylinux2014_x86_64.whl \ --force-reinstall --no-deps ; \ fi ############################ # Stage-1: 最小运行镜像 # ############################ FROM nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu22.04 ENV DEBIAN_FRONTEND=noninteractive \ PYTHONUNBUFFERED=1 # 基础 Python RUN apt-get update && \ apt-get install -y --no-install-recommends python3 python3-distutils && \ ln -sf /usr/bin/python3 /usr/bin/python && \ rm -rf /var/lib/apt/lists/* # 复制虚拟环境(即 site-packages)到最终镜像 COPY --from=builder /usr/local/lib/python3.*/dist-packages /usr/local/lib/python3.*/dist-packages COPY --from=builder /usr/local/bin /usr/local/bin # 拷贝模型文件(示例路径,按需修改) COPY ./Alibaba/Qwen3-8B /root/.cradle/Alibaba/Qwen3-8B # 暴露推理端口 EXPOSE 30000 # 默认启动 SGLang 推理服务 # CMD ["python3", "-m", "sglang.launch_server", \ # "--cpu-offload-gb", "64", \ # "--host", "0.0.0.0", \ # "--port", "30000", \ # "--model-path", "/root/.cradle/Alibaba/QwQ-32B/", \ # "--tp", "2", \ # "--api-key", "token-abc123", \ # "--mem-fraction-static", "0.8"] CMD ["python3", "-m", "sglang.launch_server", \ "--host", "0.0.0.0", \ "--port", "30000", \ "--model-path", "/root/.cradle/Alibaba/Qwen3-8B/", \ "--tp", "1", \ "--api-key", "token-abc123"]