This commit is contained in:
hailin 2025-06-30 12:01:49 +08:00
parent 7cd437c41b
commit 67f8520177
1 changed files with 12 additions and 14 deletions

View File

@ -1,5 +1,5 @@
############################ ############################
# Stage-0: 取运行依赖轮子 # # Stage-0: 构建镜像 #
############################ ############################
ARG CUDA_VERSION=12.8.1 # 仅 12.6.1 / 12.8.1 经测试可用 ARG CUDA_VERSION=12.8.1 # 仅 12.6.1 / 12.8.1 经测试可用
FROM nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu22.04 AS builder FROM nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu22.04 AS builder
@ -12,7 +12,7 @@ RUN apt-get update && \
# ---- 安装 PyTorch + SGLang ---- # ---- 安装 PyTorch + SGLang ----
ARG TORCH_VER=2.7.1 # cu128 轮子从 2.7.* 开始提供 ARG TORCH_VER=2.7.1 # cu128 轮子从 2.7.* 开始提供
ARG TV_VER=0.22.0 ARG TV_VER=0.22.1 # 0.22.1 与 torch-2.7.1 完全兼容
RUN case "$CUDA_VERSION" in \ RUN case "$CUDA_VERSION" in \
12.6.1) CUINDEX=126 ;; \ 12.6.1) CUINDEX=126 ;; \
12.8.1) CUINDEX=128 ;; \ 12.8.1) CUINDEX=128 ;; \
@ -24,38 +24,36 @@ RUN case "$CUDA_VERSION" in \
--extra-index-url https://download.pytorch.org/whl/cu${CUINDEX} && \ --extra-index-url https://download.pytorch.org/whl/cu${CUINDEX} && \
python -m pip install --no-cache-dir sglang==0.4.8.post1 && \ python -m pip install --no-cache-dir sglang==0.4.8.post1 && \
# 12.8 额外装官方 sgl_kernel & NCCL # 12.8 额外装官方 sgl_kernel & NCCL
if [ "$CUDA_VERSION" = "12.8.1" ]; then \ python -m pip install --no-cache-dir nvidia-nccl-cu12==2.27.3 --force-reinstall --no-deps && \
python -m pip install --no-cache-dir nvidia-nccl-cu12==2.27.3 --force-reinstall --no-deps && \ python -m pip install --no-cache-dir \
python -m pip install --no-cache-dir \ https://github.com/sgl-project/whl/releases/download/v0.2.0/sgl_kernel-0.2.0+cu128-cp310-abi3-manylinux2014_x86_64.whl \
https://github.com/sgl-project/whl/releases/download/v0.2.0/sgl_kernel-0.2.0+cu128-cp310-abi3-manylinux2014_x86_64.whl \ --force-reinstall --no-deps
--force-reinstall --no-deps ; \
fi
############################ ############################
# Stage-1: 最小运行镜像 # # Stage-1: 运行镜像 #
############################ ############################
FROM nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu22.04 FROM nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu22.04
ENV DEBIAN_FRONTEND=noninteractive \ ENV DEBIAN_FRONTEND=noninteractive \
PYTHONUNBUFFERED=1 PYTHONUNBUFFERED=1
# 基础 Python # ---- 基础 Python ----
RUN apt-get update && \ RUN apt-get update && \
apt-get install -y --no-install-recommends python3 python3-distutils && \ apt-get install -y --no-install-recommends python3 python3-distutils && \
ln -sf /usr/bin/python3 /usr/bin/python && \ ln -sf /usr/bin/python3 /usr/bin/python && \
rm -rf /var/lib/apt/lists/* rm -rf /var/lib/apt/lists/*
# 复制虚拟环境(即 site-packages到最终镜像 # ---- 复制依赖轮子 ----
COPY --from=builder /usr/local/lib/python3.*/dist-packages /usr/local/lib/python3.*/dist-packages COPY --from=builder /usr/local/lib/python3.*/dist-packages /usr/local/lib/python3.*/dist-packages
COPY --from=builder /usr/local/bin /usr/local/bin COPY --from=builder /usr/local/bin /usr/local/bin
# 拷贝模型文件(示例路径,按需修改) # ---- 拷贝模型权重 ----
COPY ./Alibaba/Qwen3-8B /root/.cradle/Alibaba/Qwen3-8B COPY ./Alibaba/Qwen3-8B /root/.cradle/Alibaba/Qwen3-8B
# 暴露推理端口 # ---- 暴露端口 ----
EXPOSE 30000 EXPOSE 30000
# 默认启动 SGLang 推理服务 # ---- 启动命令 ----
CMD ["python3", "-m", "sglang.launch_server", \ CMD ["python3", "-m", "sglang.launch_server", \
"--host", "0.0.0.0", \ "--host", "0.0.0.0", \
"--port", "30000", \ "--port", "30000", \