From 9ca3ebe4bb4b41b0d35f885612f6e002dba2e957 Mon Sep 17 00:00:00 2001
From: hailin <hailin@gdzx.xyz>
Date: Mon, 7 Jul 2025 14:01:39 +0800
Subject: [PATCH] .

---
 Dockerfile          |   4 +-
 Dockerfile.bad      |  63 ----------------
 Dockerfile.qwen3-8b | 177 ++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 179 insertions(+), 65 deletions(-)
 delete mode 100644 Dockerfile.bad
 create mode 100644 Dockerfile.qwen3-8b

diff --git a/Dockerfile b/Dockerfile
index 102b165..c1861ad 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -160,7 +160,7 @@ RUN chmod +x /tini
 ENTRYPOINT ["/tini", "--"]
 
 # ---- 拷贝模型（路径可换） ----
-COPY ./Alibaba/Qwen3-8B /root/.cradle/Alibaba/Qwen3-8B
+COPY ./Alibaba/Qwen3-14B /root/.cradle/Alibaba/Qwen3-14B
 
 HEALTHCHECK --interval=30s --timeout=2s --start-period=300s --retries=5 CMD curl -fs http://localhost:30000/health || exit 1
 
@@ -171,7 +171,7 @@ EXPOSE 30000
 CMD ["python3", "-m", "sglang.launch_server", \
      "--host", "0.0.0.0", \
      "--port", "30000", \
-     "--model-path", "/root/.cradle/Alibaba/Qwen3-8B/", \
+     "--model-path", "/root/.cradle/Alibaba/Qwen3-14B/", \
      "--tp", "1", \
      "--api-key", "token-abc123", \
      "--enable-metrics"]
\ No newline at end of file
diff --git a/Dockerfile.bad b/Dockerfile.bad
deleted file mode 100644
index d72a7f4..0000000
--- a/Dockerfile.bad
+++ /dev/null
@@ -1,63 +0,0 @@
-############################################################
-#  Stage-0: 构建依赖轮子（PyTorch + SGLang + sgl_kernel）   #
-############################################################
-ARG CUDA_VERSION=12.8.1
-FROM nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu22.04 AS builder
-
-# ---- Python 环境 ----
-RUN apt-get update && \
-    apt-get install -y --no-install-recommends python3 python3-pip python3-distutils && \
-    ln -sf /usr/bin/python3 /usr/bin/python && \
-    python -m pip install --no-cache-dir --upgrade pip setuptools wheel html5lib six
-
-# ---- PyTorch / torchvision / SGLang / sgl_kernel ----
-ARG TORCH_VER=2.7.1
-ARG TV_VER=0.22.1
-RUN case "$CUDA_VERSION" in \
-        12.6.1) CUINDEX=126 ;; \
-        12.8.1) CUINDEX=128 ;; \
-        *) echo "Unsupported CUDA version: $CUDA_VERSION" && exit 1 ;; \
-    esac && \
-    python -m pip install --no-cache-dir \
-        torch==${TORCH_VER}+cu${CUINDEX} \
-        torchvision==${TV_VER}+cu${CUINDEX} \
-        --extra-index-url https://download.pytorch.org/whl/cu${CUINDEX} && \
-    python -m pip install --no-cache-dir \
-        sglang==0.4.8.post1 \
-        sgl-kernel==0.0.2.post17 \
-        nvidia-nccl-cu12==2.27.3 --force-reinstall --no-deps && \
-    # ✅ 补全依赖（必须）
-    python -m pip install --no-cache-dir \
-        pydantic psutil pyzmq pynvml transformers==4.48.3 uvicorn fastapi IPython aiohttp setproctitle orjson uvloop sentencepiece
-    # ✅ 测试模块完整性
-    #python -c "import sglang, torch, pydantic, transformers, sgl_kernel"
-
-############################################################
-#  Stage-1: 生成最小运行镜像                                #
-############################################################
-FROM nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu22.04
-
-ENV DEBIAN_FRONTEND=noninteractive \
-    PYTHONUNBUFFERED=1
-
-# ---- Python runtime ----
-RUN apt-get update && \
-    apt-get install -y --no-install-recommends python3 python3-distutils && \
-    ln -sf /usr/bin/python3 /usr/bin/python && \
-    rm -rf /var/lib/apt/lists/*
-
-# ---- 拷贝 Python 包和入口 ----
-COPY --from=builder /usr/local/lib/python3.10/dist-packages /usr/local/lib/python3.10/dist-packages
-COPY --from=builder /usr/local/bin /usr/local/bin
-
-# ---- 拷贝模型（路径可换） ----
-COPY ./Alibaba/Qwen3-8B /root/.cradle/Alibaba/Qwen3-8B
-
-# ---- 启动服务 ----
-EXPOSE 30000
-CMD ["python3", "-m", "sglang.launch_server", \
-     "--host", "0.0.0.0", \
-     "--port", "30000", \
-     "--model-path", "/root/.cradle/Alibaba/Qwen3-8B/", \
-     "--tp", "1", \
-     "--api-key", "token-abc123"]
diff --git a/Dockerfile.qwen3-8b b/Dockerfile.qwen3-8b
new file mode 100644
index 0000000..102b165
--- /dev/null
+++ b/Dockerfile.qwen3-8b
@@ -0,0 +1,177 @@
+###############################################################################
+# Stage 0 ─ builder-torch：编译 PyTorch 2.7.1 (+cu126)
+###############################################################################
+ARG CUDA_VERSION=12.6.1
+FROM nvidia/cuda:${CUDA_VERSION}-devel-ubuntu22.04 AS builder-torch
+
+ENV USE_CUDA=1 \
+    USE_DISTRIBUTED=1 \
+    USE_MPI=1 \
+    USE_GLOO=1 \
+    USE_NCCL=1 \
+    USE_SYSTEM_NCCL=1 \
+    BUILD_TEST=0
+
+ARG MAX_JOBS=90                       
+ENV DEBIAN_FRONTEND=noninteractive \
+    PYTHONUNBUFFERED=1 LANG=C.UTF-8 LC_ALL=C.UTF-8 \
+    TORCH_CUDA_ARCH_LIST="7.5;8.0;8.6;9.0"
+
+RUN apt-get update && apt-get install -y --no-install-recommends \
+      python3 python3-dev python3-pip python3-distutils git cmake ninja-build \
+      libopenblas-dev libopenmpi-dev \
+      libnccl2=2.22.3-1+cuda12.6 \
+      libnccl-dev=2.22.3-1+cuda12.6 \
+      libjpeg-dev libpng-dev ca-certificates && \
+    python3 -m pip install --no-cache-dir --upgrade pip wheel setuptools sympy pyyaml typing-extensions numpy
+
+WORKDIR /opt
+RUN git clone --recursive -b v2.7.1 https://github.com/pytorch/pytorch.git
+
+WORKDIR /opt/pytorch
+ENV MAX_JOBS=${MAX_JOBS}
+RUN echo "Building PyTorch with USE_DISTRIBUTED=$USE_DISTRIBUTED" && \
+    python3 setup.py bdist_wheel
+
+###############################################################################
+# Stage 1 ─ builder-extras：用自编 Torch 装 TV / flashinfer / sglang，并收集轮子
+###############################################################################
+ARG CUDA_VERSION=12.6.1
+FROM nvidia/cuda:${CUDA_VERSION}-devel-ubuntu22.04 AS builder-extras
+
+ENV DEBIAN_FRONTEND=noninteractive PYTHONUNBUFFERED=1 LANG=C.UTF-8 LC_ALL=C.UTF-8
+
+RUN apt-get update && apt-get install -y --no-install-recommends \
+      python3 python3-pip python3-distutils python3.10-dev git build-essential \
+      cmake ninja-build libjpeg-dev libpng-dev ca-certificates \
+      libopenmpi-dev libopenblas-dev\
+      libnccl2=2.22.3-1+cuda12.6 \
+      libnccl-dev=2.22.3-1+cuda12.6 && \
+    python3 -m pip install --no-cache-dir --upgrade pip wheel setuptools
+
+# ── 安装自编 torch 轮子 ──────────────────────────────────────────────────────
+COPY --from=builder-torch /opt/pytorch/dist /tmp/torch_dist
+RUN set -e && \
+    echo "==> Files in /tmp/torch_dist:" && ls -lh /tmp/torch_dist && \
+    find /tmp/torch_dist -name 'torch-*.whl' -print | xargs -r python3 -m pip install --no-cache-dir
+
+
+
+# ── 编译 torchvision 0.22.1 (依赖本地 torch) ────────────────────────────────
+WORKDIR /opt
+RUN git clone -b v0.22.1 https://github.com/pytorch/vision.git
+WORKDIR /opt/vision
+RUN python3 setup.py bdist_wheel
+
+# ── 编译 flashinfer (主分支支持 torch 2.7 / cu126) ─────────────────────────
+WORKDIR /opt
+RUN git clone https://github.com/flashinfer-ai/flashinfer.git
+WORKDIR /opt/flashinfer
+
+RUN pip install . && \
+    python3 -m pip wheel . --no-deps -w dist/
+
+
+# # ── 安装 vllm（跳过编译，直接装） ─────────────────────────────────────────────
+# WORKDIR /opt
+# RUN pip install setuptools wheel setuptools_scm && \
+#     pip install git+https://github.com/vllm-project/vllm.git@main --no-deps && \
+#     python3 -m pip wheel vllm -w /tmp/vllm_wheels --no-deps
+
+# ── 下载 vllm 预编译 wheel，避免编译 flash-attn ───────────────────────────────
+WORKDIR /opt
+RUN pip download --only-binary=:all: --no-deps vllm==0.9.1 -d /tmp/vllm_wheels
+
+
+# ── 编译你本地 sglang 源码并打 wheel ───────────────────────────────────────
+COPY ./sglang /sgl/sglang
+WORKDIR /sgl/sglang/python
+RUN python3 -m pip install ".[srt,openai]" --no-build-isolation && \
+    python3 -m pip wheel ".[srt,openai]" --no-deps -w /tmp/sg_wheels
+
+# ── sgl-kernel 的 Python 模块 ───────────────────────────────
+RUN pip download --only-binary=:all: --no-deps sgl-kernel -d /tmp/sgl_kernel_wheel  
+
+# ── 收集所有 wheel 到 /wheels ──────────────────────────────────────────────
+RUN mkdir -p /wheels && \
+    cp /tmp/torch_dist/torch*.whl /wheels/ && \
+    cp /opt/vision/dist/torchvision-*.whl /wheels/ && \
+    cp /opt/flashinfer/dist/flashinfer_python-*.whl /wheels/ && \
+    cp /tmp/vllm_wheels/vllm-*.whl /wheels/ && \
+    cp /tmp/sg_wheels/sglang-*.whl /wheels/ && \
+    pip wheel filelock typing-extensions sympy fsspec jinja2 networkx -w /wheels
+
+# ── ✅ 再打包 runtime 阶段必需依赖 ────────────────────────────────────────────
+RUN pip wheel \
+    pydantic orjson psutil pyzmq pynvml \
+    transformers==4.52.0 uvicorn fastapi IPython aiohttp \
+    setproctitle uvloop sentencepiece triton pillow cachetools msgspec blake3 cloudpickle compressed-tensors einops openai py-cpuinfo dill partial_json_parser python-multipart torchao \
+    -w /wheels
+
+###############################################################################
+# Stage 2 ─ runtime：极简运行镜像，仅离线安装 wheel
+###############################################################################
+ARG CUDA_VERSION=12.6.1
+FROM nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu22.04
+
+ENV DEBIAN_FRONTEND=noninteractive PYTHONUNBUFFERED=1 LANG=C.UTF-8 LC_ALL=C.UTF-8
+
+RUN apt-get update && apt-get install -y --no-install-recommends gcc g++ build-essential ninja-build cuda-compiler-12-6\
+        python3 python3-dev python3-pip python3-distutils curl ca-certificates \
+        libopenblas-dev libgomp1 libcupti-dev libnuma1 libopenmpi-dev openmpi-bin libnuma-dev libpng16-16 libjpeg8 && \
+    rm -rf /var/lib/apt/lists/* && \
+    python3 -m pip install --no-cache-dir --upgrade pip \
+    && python3 -m pip install --no-cache-dir xgrammar
+
+# 👉 拷贝 cupti 动态库（避免写死版本号）
+COPY --from=builder-extras /usr/local/cuda/lib64/libcupti.so.12 /usr/lib/x86_64-linux-gnu/
+COPY --from=builder-extras /usr/local/cuda/lib64/libcupti.so /usr/lib/x86_64-linux-gnu/
+
+# 👇建议在后面补上
+RUN ldconfig
+
+COPY --from=builder-extras /wheels /tmp/wheels
+COPY --from=builder-extras /tmp/sgl_kernel_wheel /tmp/sgl_kernel_wheel
+
+#RUN python3 -m pip install --no-cache-dir /tmp/wheels/* && rm -rf /tmp/wheels
+# ✅ 优先装你自编的 torch，避免被 PyPI 上的覆盖
+RUN ls -lh /tmp/wheels && \
+    rm -f /tmp/wheels/torch-2.7.1a0+*.whl && \
+    python3 -m pip install --no-cache-dir --no-deps /tmp/wheels/torch*.whl && \
+    python3 -m pip install --no-cache-dir --no-deps /tmp/wheels/vllm-*.whl && \
+    python3 -m pip install --no-cache-dir --no-deps /tmp/sgl_kernel_wheel/*.whl && \
+    python3 -m pip install --no-cache-dir --no-deps /tmp/wheels/* && \
+    python3 -c "from torch.distributed import Backend; print('✅ Runtime torch distributed OK, GLOO =', Backend.GLOO)" && \
+    rm -rf /tmp/wheels
+
+# ✅ 安装 Prometheus client
+RUN python3 -m pip install --no-cache-dir prometheus_client
+
+# ✅ 设置多进程 metrics 收集目录（用于 MultiProcessCollector）
+ENV PROMETHEUS_MULTIPROC_DIR=/tmp/prometheus
+
+# ✅ 确保目录存在
+RUN mkdir -p /tmp/prometheus
+
+# ✅ 添加 Tini（推荐）
+ENV TINI_VERSION=v0.19.0
+ADD https://github.com/krallin/tini/releases/download/${TINI_VERSION}/tini /tini
+RUN chmod +x /tini
+ENTRYPOINT ["/tini", "--"]
+
+# ---- 拷贝模型（路径可换） ----
+COPY ./Alibaba/Qwen3-8B /root/.cradle/Alibaba/Qwen3-8B
+
+HEALTHCHECK --interval=30s --timeout=2s --start-period=300s --retries=5 CMD curl -fs http://localhost:30000/health || exit 1
+
+# ---- 暴露端口 ----
+EXPOSE 30000
+
+# ---- 启动 SGLang 推理服务 ----
+CMD ["python3", "-m", "sglang.launch_server", \
+     "--host", "0.0.0.0", \
+     "--port", "30000", \
+     "--model-path", "/root/.cradle/Alibaba/Qwen3-8B/", \
+     "--tp", "1", \
+     "--api-key", "token-abc123", \
+     "--enable-metrics"]
\ No newline at end of file