250 lines
11 KiB
Docker
250 lines
11 KiB
Docker
###############################################################################
|
||
# Stage 0 ─ builder-torch:编译 PyTorch 2.7.1 (+cu126)
|
||
###############################################################################
|
||
ARG CUDA_VERSION=12.6.1
|
||
FROM nvidia/cuda:${CUDA_VERSION}-devel-ubuntu22.04 AS builder-torch
|
||
|
||
ENV USE_CUDA=1 \
|
||
USE_DISTRIBUTED=1 \
|
||
USE_MPI=1 \
|
||
USE_GLOO=1 \
|
||
USE_NCCL=1 \
|
||
USE_SYSTEM_NCCL=1 \
|
||
BUILD_TEST=0
|
||
|
||
ARG MAX_JOBS=90
|
||
ENV DEBIAN_FRONTEND=noninteractive \
|
||
PYTHONUNBUFFERED=1 LANG=C.UTF-8 LC_ALL=C.UTF-8 \
|
||
TORCH_CUDA_ARCH_LIST="7.5;8.0;8.6;8.9;9.0"
|
||
|
||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||
python3 python3-dev python3-pip python3-distutils git cmake ninja-build \
|
||
libopenblas-dev libopenmpi-dev \
|
||
libnccl2=2.22.3-1+cuda12.6 \
|
||
libnccl-dev=2.22.3-1+cuda12.6 \
|
||
libjpeg-dev libpng-dev ca-certificates && \
|
||
python3 -m pip install --no-cache-dir --upgrade pip wheel setuptools sympy pyyaml typing-extensions numpy
|
||
|
||
RUN python3 -m pip install --no-cache-dir numpy requests packaging build
|
||
|
||
WORKDIR /opt
|
||
RUN git clone --recursive -b v2.7.1 https://github.com/pytorch/pytorch.git
|
||
|
||
WORKDIR /opt/pytorch
|
||
ENV MAX_JOBS=${MAX_JOBS}
|
||
RUN echo "Building PyTorch with USE_DISTRIBUTED=$USE_DISTRIBUTED" && \
|
||
python3 setup.py bdist_wheel
|
||
|
||
###############################################################################
|
||
# Stage 1 ─ builder-extras:用自编 Torch 装 TV / flashinfer / sglang,并收集轮子
|
||
###############################################################################
|
||
ARG CUDA_VERSION=12.6.1
|
||
FROM nvidia/cuda:${CUDA_VERSION}-devel-ubuntu22.04 AS builder-extras
|
||
|
||
ENV TORCH_CUDA_ARCH_LIST="7.5;8.0;8.6;8.9;9.0"
|
||
ENV DEBIAN_FRONTEND=noninteractive PYTHONUNBUFFERED=1 LANG=C.UTF-8 LC_ALL=C.UTF-8
|
||
|
||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||
python3 python3-pip python3-distutils python3.10-dev git build-essential \
|
||
cmake ninja-build libjpeg-dev libpng-dev ca-certificates \
|
||
libopenmpi-dev libopenblas-dev\
|
||
libnccl2=2.22.3-1+cuda12.6 \
|
||
libnccl-dev=2.22.3-1+cuda12.6 && \
|
||
python3 -m pip install --no-cache-dir --upgrade pip wheel setuptools
|
||
|
||
# ── 安装自编 torch 轮子 ──────────────────────────────────────────────────────
|
||
COPY --from=builder-torch /opt/pytorch/dist /tmp/torch_dist
|
||
RUN set -e && \
|
||
echo "==> Files in /tmp/torch_dist:" && ls -lh /tmp/torch_dist && \
|
||
find /tmp/torch_dist -name 'torch-*.whl' -print | xargs -r python3 -m pip install --no-cache-dir
|
||
|
||
|
||
|
||
# ── 编译 torchvision 0.22.1 (依赖本地 torch) ────────────────────────────────
|
||
WORKDIR /opt
|
||
RUN git clone -b v0.22.1 https://github.com/pytorch/vision.git
|
||
WORKDIR /opt/vision
|
||
RUN python3 setup.py bdist_wheel
|
||
|
||
# ── 编译 flashinfer (主分支支持 torch 2.7 / cu126) ─────────────────────────
|
||
WORKDIR /opt
|
||
RUN git clone --recursive -b v0.3.1 https://github.com/flashinfer-ai/flashinfer.git
|
||
WORKDIR /opt/flashinfer
|
||
|
||
|
||
|
||
# 覆盖你的目标算力:3090=8.6,4090=8.9,H100=9.0a;可按需增/减
|
||
ENV FLASHINFER_CUDA_ARCH_LIST="7.5 8.0 8.6 8.9 9.0a"
|
||
|
||
# 先做 AOT 预编译,再直接打 wheel(不隔离,使用同一份自编 torch)
|
||
RUN python3 -m pip install --no-cache-dir numpy requests build && \
|
||
python3 -m flashinfer.aot && \
|
||
python3 -m build --no-isolation --wheel && \
|
||
ls -lh dist/
|
||
|
||
|
||
|
||
# RUN pip install . && \
|
||
# python3 -m pip wheel . --no-deps -w dist/
|
||
|
||
# ── 下载 vllm 预编译 wheel,避免编译 flash-attn ───────────────────────────────
|
||
WORKDIR /opt
|
||
RUN pip download --only-binary=:all: --no-deps vllm==0.9.1 -d /tmp/vllm_wheels
|
||
|
||
|
||
# ── 编译你本地 sglang 源码并打 wheel ───────────────────────────────────────
|
||
COPY ./sglang /sgl/sglang
|
||
WORKDIR /sgl/sglang/python
|
||
RUN python3 -m pip install ".[srt,openai]" --no-build-isolation && \
|
||
python3 -m pip wheel ".[srt,openai]" --no-deps -w /tmp/sg_wheels
|
||
|
||
|
||
# ── 🔄 下载 sgl-kernel(与 sglang 同步)───────────────────────────────────────
|
||
RUN pip download --only-binary=:all: --no-deps sgl-kernel==0.3.9.post2 -d /tmp/sgl_kernel_wheels
|
||
|
||
# ── 收集所有 wheel 到 /wheels ──────────────────────────────────────────────
|
||
RUN mkdir -p /wheels && \
|
||
cp /tmp/torch_dist/torch*.whl /wheels/ && \
|
||
cp /opt/vision/dist/torchvision-*.whl /wheels/ && \
|
||
cp /opt/flashinfer/dist/flashinfer_python-*.whl /wheels/ && \
|
||
cp /tmp/vllm_wheels/vllm-*.whl /wheels/ && \
|
||
cp /tmp/sg_wheels/sglang-*.whl /wheels/ && \
|
||
cp /tmp/sgl_kernel_wheels/sgl_kernel-*.whl /wheels/ && \
|
||
pip wheel filelock typing-extensions sympy fsspec jinja2 networkx -w /wheels
|
||
|
||
# ── ✅ 再打包 runtime 阶段必需依赖 ────────────────────────────────────────────
|
||
RUN pip wheel \
|
||
pydantic orjson psutil pyzmq pynvml \
|
||
transformers==4.56.0 uvicorn fastapi IPython aiohttp \
|
||
setproctitle uvloop sentencepiece triton pillow cachetools msgspec blake3 cloudpickle compressed-tensors einops openai py-cpuinfo dill partial_json_parser python-multipart torchao \
|
||
-w /wheels
|
||
|
||
|
||
# 产出 openai-harmony 的离线 wheel
|
||
RUN pip wheel --no-deps openai-harmony==0.0.4 -w /wheels
|
||
|
||
# ── ✅ 打包 gradio UI 所需依赖 ────────────────────────────────────────────────
|
||
RUN pip wheel "gradio==5.38.2" requests -w /wheels
|
||
|
||
# 把运行时所需依赖也打包进入wheel ────────────────────────────────────────────────
|
||
RUN pip wheel pybase64==1.3.2 -w /wheels
|
||
|
||
|
||
# 导出轮子的独立阶段
|
||
FROM scratch AS wheelhouse
|
||
COPY --from=builder-extras /wheels /
|
||
|
||
|
||
# 从宿主机目录 _wheelhouse/ 安装轮子的 runtime
|
||
ARG CUDA_VERSION=12.6.1
|
||
FROM nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu22.04 AS runtime-prebuilt
|
||
|
||
ENV DEBIAN_FRONTEND=noninteractive PYTHONUNBUFFERED=1 LANG=C.UTF-8 LC_ALL=C.UTF-8
|
||
|
||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||
gcc g++ build-essential ninja-build cuda-compiler-12-6 \
|
||
python3 python3-dev python3-pip python3-distutils curl ca-certificates \
|
||
libopenblas-dev libgomp1 libcupti-dev libnuma1 libopenmpi-dev openmpi-bin libnuma-dev libpng16-16 libjpeg8 \
|
||
libnccl2=2.22.3-1+cuda12.6 && \
|
||
rm -rf /var/lib/apt/lists/* && \
|
||
python3 -m pip install --no-cache-dir --upgrade pip
|
||
|
||
RUN ldconfig -p | grep -i cupti || (echo "no cupti"; exit 1)
|
||
RUN ldconfig
|
||
|
||
# ★ 从宿主机构建上下文复制本地轮子(目录名固定:_wheelhouse/)
|
||
COPY _wheelhouse/ /tmp/wheels/
|
||
|
||
# 安装顺序与 runtime-autobuild 完全一致(优先 torch,再装其它)
|
||
RUN ls -lh /tmp/wheels || true && \
|
||
# rm -f /tmp/wheels/torch-2.7.1a0+*.whl && \
|
||
rm -f /tmp/wheels/huggingface_hub-0.34.4*.whl || true && \
|
||
python3 -m pip install --no-cache-dir --no-deps /tmp/wheels/torch*.whl && \
|
||
python3 -m pip install --no-cache-dir --no-deps /tmp/wheels/vllm-*.whl || true && \
|
||
python3 -m pip install --no-cache-dir --no-deps /tmp/wheels/sgl_kernel-*.whl || true && \
|
||
python3 -m pip install --no-cache-dir --no-deps /tmp/wheels/gradio-5.38.2*.whl || true && \
|
||
python3 -m pip install --no-cache-dir --no-deps $(find /tmp/wheels -maxdepth 1 -type f -name '*.whl' ! -name 'gradio-*' -printf "/tmp/wheels/%f ") && \
|
||
python3 -c "import gradio, sys; print('✅ Gradio version =', gradio.__version__)" && \
|
||
rm -rf /tmp/wheels
|
||
|
||
|
||
RUN python3 -m pip install --no-deps xgrammar==0.1.24
|
||
|
||
###############################################################################
|
||
# Stage 2 ─ runtime:极简运行镜像,仅离线安装 wheel
|
||
###############################################################################
|
||
ARG CUDA_VERSION=12.6.1
|
||
FROM nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu22.04 AS runtime-autobuild
|
||
|
||
ENV DEBIAN_FRONTEND=noninteractive PYTHONUNBUFFERED=1 LANG=C.UTF-8 LC_ALL=C.UTF-8
|
||
|
||
RUN apt-get update && apt-get install -y --no-install-recommends gcc g++ build-essential ninja-build cuda-compiler-12-6\
|
||
python3 python3-dev python3-pip python3-distutils curl ca-certificates \
|
||
libopenblas-dev libgomp1 libcupti-dev libnuma1 libopenmpi-dev openmpi-bin libnuma-dev libpng16-16 libjpeg8 libnccl2=2.22.3-1+cuda12.6 && \
|
||
rm -rf /var/lib/apt/lists/* && \
|
||
python3 -m pip install --no-cache-dir --upgrade pip
|
||
|
||
# 检查 cupti 动态库
|
||
RUN ldconfig -p | grep -i cupti || (echo "no cupti"; exit 1)
|
||
|
||
# 👇建议在后面补上
|
||
RUN ldconfig
|
||
|
||
COPY _wheelhouse/ /tmp/wheels/
|
||
|
||
# ✅ 优先装你自编的 torch,避免被 PyPI 上的覆盖
|
||
RUN ls -lh /tmp/wheels && \
|
||
# rm -f /tmp/wheels/torch-2.7.1a0+*.whl && \
|
||
rm -f /tmp/wheels/huggingface_hub-0.34.4*.whl && \
|
||
python3 -m pip install --no-cache-dir --no-deps /tmp/wheels/torch*.whl && \
|
||
python3 -m pip install --no-cache-dir --no-deps /tmp/wheels/vllm-*.whl && \
|
||
python3 -m pip install --no-cache-dir --no-deps /tmp/wheels/sgl_kernel-*.whl && \
|
||
python3 -m pip install --no-cache-dir --no-deps /tmp/wheels/gradio-5.38.2*.whl && \
|
||
# python3 -m pip install --no-cache-dir --no-deps $(ls /tmp/wheels | grep -v '^gradio-' | sed 's|^|/tmp/wheels/|') && \
|
||
python3 -m pip install --no-cache-dir --no-deps $(find /tmp/wheels -maxdepth 1 -type f -name '*.whl' ! -name 'gradio-*') && \
|
||
python3 -c "import gradio, sys; print('✅ Gradio version =', gradio.__version__)" && \
|
||
rm -rf /tmp/wheels
|
||
|
||
# RUN PIP_NO_INDEX= PIP_FIND_LINKS= python3 -m pip install --no-cache-dir --no-deps \
|
||
# openai-harmony==0.0.4 \
|
||
# flashinfer-python==0.3.1
|
||
|
||
# ✅ 安装 Prometheus client
|
||
RUN python3 -m pip install --no-cache-dir prometheus_client
|
||
RUN python3 -m pip install --no-deps xgrammar==0.1.24
|
||
# ✅ 设置多进程 metrics 收集目录(用于 MultiProcessCollector)
|
||
ENV PROMETHEUS_MULTIPROC_DIR=/tmp/prometheus
|
||
|
||
# ✅ 确保目录存在
|
||
RUN mkdir -p /tmp/prometheus
|
||
|
||
# ---- 拷贝预调优的 MoE Triton kernel config ----------------------------
|
||
COPY moe_kernels /usr/local/lib/python3.10/dist-packages/sglang/srt/layers/moe/fused_moe_triton/configs
|
||
|
||
# ✅ 添加 Tini(推荐)
|
||
ENV TINI_VERSION=v0.19.0
|
||
ADD https://github.com/krallin/tini/releases/download/${TINI_VERSION}/tini /tini
|
||
RUN chmod +x /tini
|
||
ENTRYPOINT ["/tini", "--"]
|
||
|
||
# ---- 拷贝模型(路径可换) ----
|
||
# COPY ./Alibaba/Qwen3-30B-A3B /root/.cradle/Alibaba/Qwen3-30B-A3B
|
||
|
||
HEALTHCHECK --interval=30s --timeout=2s --start-period=300s --retries=5 CMD curl -fs http://localhost:30000/health || exit 1
|
||
|
||
# ---- 暴露端口 ----
|
||
EXPOSE 30000 30001
|
||
|
||
# 安装 supervisor
|
||
RUN apt-get update && apt-get install -y supervisor && \
|
||
mkdir -p /etc/supervisor/conf.d
|
||
|
||
# 拷贝 supervisord 配置文件和 UI 脚本
|
||
COPY ./meta_ui.py /app/meta_ui.py
|
||
COPY ./supervisord.conf /etc/supervisor/supervisord.conf
|
||
|
||
# 作为容器主进程运行 supervisor
|
||
CMD ["/usr/bin/supervisord", "-c", "/etc/supervisor/supervisord.conf"]
|
||
|
||
|