sglang.0.4.8.post1/Dockerfile

75 lines
2.7 KiB
Docker
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

############################
# Stage-0: 取运行依赖轮子 #
############################
ARG CUDA_VERSION=12.8.1 # 仅 12.6.1 / 12.8.1 经测试可用
FROM nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu22.04 AS builder
# ---- Python & pip ----
RUN apt-get update && \
apt-get install -y --no-install-recommends python3 python3-pip && \
ln -sf /usr/bin/python3 /usr/bin/python && \
python -m pip install --no-cache-dir --upgrade pip
# ---- 安装 PyTorch + SGLang ----
ARG TORCH_VER=2.3.0 # 如需别的版本自行改
RUN case "$CUDA_VERSION" in \
12.6.1) CUINDEX=126 ;; \
12.8.1) CUINDEX=128 ;; \
*) echo "Unsupported CUDA version: $CUDA_VERSION" && exit 1 ;; \
esac && \
python -m pip install --no-cache-dir \
torch==${TORCH_VER}+cu${CUINDEX} \
torchvision==0.18.0+cu${CUINDEX} \
--extra-index-url https://download.pytorch.org/whl/cu${CUINDEX} && \
python -m pip install --no-cache-dir sglang==0.4.8.post1 && \
# 12.8 额外装官方 sgl_kernel & NCCL
if [ "$CUDA_VERSION" = "12.8.1" ]; then \
python -m pip install --no-cache-dir nvidia-nccl-cu12==2.27.3 --force-reinstall --no-deps && \
python -m pip install --no-cache-dir \
https://github.com/sgl-project/whl/releases/download/v0.2.0/sgl_kernel-0.2.0+cu128-cp39-abi3-manylinux2014_x86_64.whl \
--force-reinstall --no-deps ; \
fi
############################
# Stage-1: 最小运行镜像 #
############################
FROM nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu22.04
ENV DEBIAN_FRONTEND=noninteractive \
PYTHONUNBUFFERED=1
# 基础 Python
RUN apt-get update && \
apt-get install -y --no-install-recommends python3 python3-distutils && \
ln -sf /usr/bin/python3 /usr/bin/python && \
rm -rf /var/lib/apt/lists/*
# 复制虚拟环境(即 site-packages到最终镜像
COPY --from=builder /usr/local/lib/python3.*/dist-packages /usr/local/lib/python3.*/dist-packages
COPY --from=builder /usr/local/bin /usr/local/bin
# 拷贝模型文件(示例路径,按需修改)
COPY ./Alibaba/Qwen3-8B /root/.cradle/Alibaba/Qwen3-8B
# 暴露推理端口
EXPOSE 30000
# 默认启动 SGLang 推理服务
# CMD ["python3", "-m", "sglang.launch_server", \
# "--cpu-offload-gb", "64", \
# "--host", "0.0.0.0", \
# "--port", "30000", \
# "--model-path", "/root/.cradle/Alibaba/QwQ-32B/", \
# "--tp", "2", \
# "--api-key", "token-abc123", \
# "--mem-fraction-static", "0.8"]
CMD ["python3", "-m", "sglang.launch_server", \
"--host", "0.0.0.0", \
"--port", "30000", \
"--model-path", "/root/.cradle/Alibaba/Qwen3-8B/", \
"--tp", "1", \
"--api-key", "token-abc123"]