############################################################ # Stage-0: 构建依赖轮子(PyTorch + SGLang + sgl_kernel) # ############################################################ ARG CUDA_VERSION=12.8.1 FROM nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu22.04 AS builder # ---- Python 环境 ---- RUN apt-get update && \ apt-get install -y --no-install-recommends python3 python3-pip python3-distutils && \ ln -sf /usr/bin/python3 /usr/bin/python && \ python -m pip install --no-cache-dir --upgrade pip setuptools wheel html5lib six # ---- PyTorch / torchvision / SGLang / sgl_kernel ---- ARG TORCH_VER=2.7.1 ARG TV_VER=0.22.1 RUN case "$CUDA_VERSION" in \ 12.6.1) CUINDEX=126 ;; \ 12.8.1) CUINDEX=128 ;; \ *) echo "Unsupported CUDA version: $CUDA_VERSION" && exit 1 ;; \ esac && \ python -m pip install --no-cache-dir \ torch==${TORCH_VER}+cu${CUINDEX} \ torchvision==${TV_VER}+cu${CUINDEX} \ --extra-index-url https://download.pytorch.org/whl/cu${CUINDEX} && \ python -m pip install --no-cache-dir \ sglang==0.4.8.post1 \ sgl-kernel==0.0.2.post17 \ nvidia-nccl-cu12==2.27.3 --force-reinstall --no-deps && \ # ✅ 补全依赖(必须) python -m pip install --no-cache-dir \ pydantic psutil pyzmq pynvml transformers==4.48.3 uvicorn fastapi IPython aiohttp setproctitle && \ # ✅ 测试模块完整性 python -c "import sglang, torch, pydantic, transformers, sgl_kernel" ############################################################ # Stage-1: 生成最小运行镜像 # ############################################################ FROM nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu22.04 ENV DEBIAN_FRONTEND=noninteractive \ PYTHONUNBUFFERED=1 # ---- Python runtime ---- RUN apt-get update && \ apt-get install -y --no-install-recommends python3 python3-distutils && \ ln -sf /usr/bin/python3 /usr/bin/python && \ rm -rf /var/lib/apt/lists/* # ---- 拷贝 Python 包和入口 ---- COPY --from=builder /usr/local/lib/python3.10/dist-packages /usr/local/lib/python3.10/dist-packages COPY --from=builder /usr/local/bin /usr/local/bin # ---- 拷贝模型(路径可换) ---- COPY ./Alibaba/Qwen3-8B /root/.cradle/Alibaba/Qwen3-8B # ---- 启动服务 ---- EXPOSE 30000 CMD ["python3", "-m", "sglang.launch_server", \ "--host", "0.0.0.0", \ "--port", "30000", \ "--model-path", "/root/.cradle/Alibaba/Qwen3-8B/", \ "--tp", "1", \ "--api-key", "token-abc123"]