From 9ca3ebe4bb4b41b0d35f885612f6e002dba2e957 Mon Sep 17 00:00:00 2001 From: hailin Date: Mon, 7 Jul 2025 14:01:39 +0800 Subject: [PATCH] . --- Dockerfile | 4 +- Dockerfile.bad | 63 ---------------- Dockerfile.qwen3-8b | 177 ++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 179 insertions(+), 65 deletions(-) delete mode 100644 Dockerfile.bad create mode 100644 Dockerfile.qwen3-8b diff --git a/Dockerfile b/Dockerfile index 102b165..c1861ad 100644 --- a/Dockerfile +++ b/Dockerfile @@ -160,7 +160,7 @@ RUN chmod +x /tini ENTRYPOINT ["/tini", "--"] # ---- 拷贝模型(路径可换) ---- -COPY ./Alibaba/Qwen3-8B /root/.cradle/Alibaba/Qwen3-8B +COPY ./Alibaba/Qwen3-14B /root/.cradle/Alibaba/Qwen3-14B HEALTHCHECK --interval=30s --timeout=2s --start-period=300s --retries=5 CMD curl -fs http://localhost:30000/health || exit 1 @@ -171,7 +171,7 @@ EXPOSE 30000 CMD ["python3", "-m", "sglang.launch_server", \ "--host", "0.0.0.0", \ "--port", "30000", \ - "--model-path", "/root/.cradle/Alibaba/Qwen3-8B/", \ + "--model-path", "/root/.cradle/Alibaba/Qwen3-14B/", \ "--tp", "1", \ "--api-key", "token-abc123", \ "--enable-metrics"] \ No newline at end of file diff --git a/Dockerfile.bad b/Dockerfile.bad deleted file mode 100644 index d72a7f4..0000000 --- a/Dockerfile.bad +++ /dev/null @@ -1,63 +0,0 @@ -############################################################ -# Stage-0: 构建依赖轮子(PyTorch + SGLang + sgl_kernel) # -############################################################ -ARG CUDA_VERSION=12.8.1 -FROM nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu22.04 AS builder - -# ---- Python 环境 ---- -RUN apt-get update && \ - apt-get install -y --no-install-recommends python3 python3-pip python3-distutils && \ - ln -sf /usr/bin/python3 /usr/bin/python && \ - python -m pip install --no-cache-dir --upgrade pip setuptools wheel html5lib six - -# ---- PyTorch / torchvision / SGLang / sgl_kernel ---- -ARG TORCH_VER=2.7.1 -ARG TV_VER=0.22.1 -RUN case "$CUDA_VERSION" in \ - 12.6.1) CUINDEX=126 ;; \ - 12.8.1) CUINDEX=128 ;; \ - *) echo "Unsupported CUDA version: $CUDA_VERSION" && exit 1 ;; \ - esac && \ - python -m pip install --no-cache-dir \ - torch==${TORCH_VER}+cu${CUINDEX} \ - torchvision==${TV_VER}+cu${CUINDEX} \ - --extra-index-url https://download.pytorch.org/whl/cu${CUINDEX} && \ - python -m pip install --no-cache-dir \ - sglang==0.4.8.post1 \ - sgl-kernel==0.0.2.post17 \ - nvidia-nccl-cu12==2.27.3 --force-reinstall --no-deps && \ - # ✅ 补全依赖(必须) - python -m pip install --no-cache-dir \ - pydantic psutil pyzmq pynvml transformers==4.48.3 uvicorn fastapi IPython aiohttp setproctitle orjson uvloop sentencepiece - # ✅ 测试模块完整性 - #python -c "import sglang, torch, pydantic, transformers, sgl_kernel" - -############################################################ -# Stage-1: 生成最小运行镜像 # -############################################################ -FROM nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu22.04 - -ENV DEBIAN_FRONTEND=noninteractive \ - PYTHONUNBUFFERED=1 - -# ---- Python runtime ---- -RUN apt-get update && \ - apt-get install -y --no-install-recommends python3 python3-distutils && \ - ln -sf /usr/bin/python3 /usr/bin/python && \ - rm -rf /var/lib/apt/lists/* - -# ---- 拷贝 Python 包和入口 ---- -COPY --from=builder /usr/local/lib/python3.10/dist-packages /usr/local/lib/python3.10/dist-packages -COPY --from=builder /usr/local/bin /usr/local/bin - -# ---- 拷贝模型(路径可换) ---- -COPY ./Alibaba/Qwen3-8B /root/.cradle/Alibaba/Qwen3-8B - -# ---- 启动服务 ---- -EXPOSE 30000 -CMD ["python3", "-m", "sglang.launch_server", \ - "--host", "0.0.0.0", \ - "--port", "30000", \ - "--model-path", "/root/.cradle/Alibaba/Qwen3-8B/", \ - "--tp", "1", \ - "--api-key", "token-abc123"] diff --git a/Dockerfile.qwen3-8b b/Dockerfile.qwen3-8b new file mode 100644 index 0000000..102b165 --- /dev/null +++ b/Dockerfile.qwen3-8b @@ -0,0 +1,177 @@ +############################################################################### +# Stage 0 ─ builder-torch:编译 PyTorch 2.7.1 (+cu126) +############################################################################### +ARG CUDA_VERSION=12.6.1 +FROM nvidia/cuda:${CUDA_VERSION}-devel-ubuntu22.04 AS builder-torch + +ENV USE_CUDA=1 \ + USE_DISTRIBUTED=1 \ + USE_MPI=1 \ + USE_GLOO=1 \ + USE_NCCL=1 \ + USE_SYSTEM_NCCL=1 \ + BUILD_TEST=0 + +ARG MAX_JOBS=90 +ENV DEBIAN_FRONTEND=noninteractive \ + PYTHONUNBUFFERED=1 LANG=C.UTF-8 LC_ALL=C.UTF-8 \ + TORCH_CUDA_ARCH_LIST="7.5;8.0;8.6;9.0" + +RUN apt-get update && apt-get install -y --no-install-recommends \ + python3 python3-dev python3-pip python3-distutils git cmake ninja-build \ + libopenblas-dev libopenmpi-dev \ + libnccl2=2.22.3-1+cuda12.6 \ + libnccl-dev=2.22.3-1+cuda12.6 \ + libjpeg-dev libpng-dev ca-certificates && \ + python3 -m pip install --no-cache-dir --upgrade pip wheel setuptools sympy pyyaml typing-extensions numpy + +WORKDIR /opt +RUN git clone --recursive -b v2.7.1 https://github.com/pytorch/pytorch.git + +WORKDIR /opt/pytorch +ENV MAX_JOBS=${MAX_JOBS} +RUN echo "Building PyTorch with USE_DISTRIBUTED=$USE_DISTRIBUTED" && \ + python3 setup.py bdist_wheel + +############################################################################### +# Stage 1 ─ builder-extras:用自编 Torch 装 TV / flashinfer / sglang,并收集轮子 +############################################################################### +ARG CUDA_VERSION=12.6.1 +FROM nvidia/cuda:${CUDA_VERSION}-devel-ubuntu22.04 AS builder-extras + +ENV DEBIAN_FRONTEND=noninteractive PYTHONUNBUFFERED=1 LANG=C.UTF-8 LC_ALL=C.UTF-8 + +RUN apt-get update && apt-get install -y --no-install-recommends \ + python3 python3-pip python3-distutils python3.10-dev git build-essential \ + cmake ninja-build libjpeg-dev libpng-dev ca-certificates \ + libopenmpi-dev libopenblas-dev\ + libnccl2=2.22.3-1+cuda12.6 \ + libnccl-dev=2.22.3-1+cuda12.6 && \ + python3 -m pip install --no-cache-dir --upgrade pip wheel setuptools + +# ── 安装自编 torch 轮子 ────────────────────────────────────────────────────── +COPY --from=builder-torch /opt/pytorch/dist /tmp/torch_dist +RUN set -e && \ + echo "==> Files in /tmp/torch_dist:" && ls -lh /tmp/torch_dist && \ + find /tmp/torch_dist -name 'torch-*.whl' -print | xargs -r python3 -m pip install --no-cache-dir + + + +# ── 编译 torchvision 0.22.1 (依赖本地 torch) ──────────────────────────────── +WORKDIR /opt +RUN git clone -b v0.22.1 https://github.com/pytorch/vision.git +WORKDIR /opt/vision +RUN python3 setup.py bdist_wheel + +# ── 编译 flashinfer (主分支支持 torch 2.7 / cu126) ───────────────────────── +WORKDIR /opt +RUN git clone https://github.com/flashinfer-ai/flashinfer.git +WORKDIR /opt/flashinfer + +RUN pip install . && \ + python3 -m pip wheel . --no-deps -w dist/ + + +# # ── 安装 vllm(跳过编译,直接装) ───────────────────────────────────────────── +# WORKDIR /opt +# RUN pip install setuptools wheel setuptools_scm && \ +# pip install git+https://github.com/vllm-project/vllm.git@main --no-deps && \ +# python3 -m pip wheel vllm -w /tmp/vllm_wheels --no-deps + +# ── 下载 vllm 预编译 wheel,避免编译 flash-attn ─────────────────────────────── +WORKDIR /opt +RUN pip download --only-binary=:all: --no-deps vllm==0.9.1 -d /tmp/vllm_wheels + + +# ── 编译你本地 sglang 源码并打 wheel ─────────────────────────────────────── +COPY ./sglang /sgl/sglang +WORKDIR /sgl/sglang/python +RUN python3 -m pip install ".[srt,openai]" --no-build-isolation && \ + python3 -m pip wheel ".[srt,openai]" --no-deps -w /tmp/sg_wheels + +# ── sgl-kernel 的 Python 模块 ─────────────────────────────── +RUN pip download --only-binary=:all: --no-deps sgl-kernel -d /tmp/sgl_kernel_wheel + +# ── 收集所有 wheel 到 /wheels ────────────────────────────────────────────── +RUN mkdir -p /wheels && \ + cp /tmp/torch_dist/torch*.whl /wheels/ && \ + cp /opt/vision/dist/torchvision-*.whl /wheels/ && \ + cp /opt/flashinfer/dist/flashinfer_python-*.whl /wheels/ && \ + cp /tmp/vllm_wheels/vllm-*.whl /wheels/ && \ + cp /tmp/sg_wheels/sglang-*.whl /wheels/ && \ + pip wheel filelock typing-extensions sympy fsspec jinja2 networkx -w /wheels + +# ── ✅ 再打包 runtime 阶段必需依赖 ──────────────────────────────────────────── +RUN pip wheel \ + pydantic orjson psutil pyzmq pynvml \ + transformers==4.52.0 uvicorn fastapi IPython aiohttp \ + setproctitle uvloop sentencepiece triton pillow cachetools msgspec blake3 cloudpickle compressed-tensors einops openai py-cpuinfo dill partial_json_parser python-multipart torchao \ + -w /wheels + +############################################################################### +# Stage 2 ─ runtime:极简运行镜像,仅离线安装 wheel +############################################################################### +ARG CUDA_VERSION=12.6.1 +FROM nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu22.04 + +ENV DEBIAN_FRONTEND=noninteractive PYTHONUNBUFFERED=1 LANG=C.UTF-8 LC_ALL=C.UTF-8 + +RUN apt-get update && apt-get install -y --no-install-recommends gcc g++ build-essential ninja-build cuda-compiler-12-6\ + python3 python3-dev python3-pip python3-distutils curl ca-certificates \ + libopenblas-dev libgomp1 libcupti-dev libnuma1 libopenmpi-dev openmpi-bin libnuma-dev libpng16-16 libjpeg8 && \ + rm -rf /var/lib/apt/lists/* && \ + python3 -m pip install --no-cache-dir --upgrade pip \ + && python3 -m pip install --no-cache-dir xgrammar + +# 👉 拷贝 cupti 动态库(避免写死版本号) +COPY --from=builder-extras /usr/local/cuda/lib64/libcupti.so.12 /usr/lib/x86_64-linux-gnu/ +COPY --from=builder-extras /usr/local/cuda/lib64/libcupti.so /usr/lib/x86_64-linux-gnu/ + +# 👇建议在后面补上 +RUN ldconfig + +COPY --from=builder-extras /wheels /tmp/wheels +COPY --from=builder-extras /tmp/sgl_kernel_wheel /tmp/sgl_kernel_wheel + +#RUN python3 -m pip install --no-cache-dir /tmp/wheels/* && rm -rf /tmp/wheels +# ✅ 优先装你自编的 torch,避免被 PyPI 上的覆盖 +RUN ls -lh /tmp/wheels && \ + rm -f /tmp/wheels/torch-2.7.1a0+*.whl && \ + python3 -m pip install --no-cache-dir --no-deps /tmp/wheels/torch*.whl && \ + python3 -m pip install --no-cache-dir --no-deps /tmp/wheels/vllm-*.whl && \ + python3 -m pip install --no-cache-dir --no-deps /tmp/sgl_kernel_wheel/*.whl && \ + python3 -m pip install --no-cache-dir --no-deps /tmp/wheels/* && \ + python3 -c "from torch.distributed import Backend; print('✅ Runtime torch distributed OK, GLOO =', Backend.GLOO)" && \ + rm -rf /tmp/wheels + +# ✅ 安装 Prometheus client +RUN python3 -m pip install --no-cache-dir prometheus_client + +# ✅ 设置多进程 metrics 收集目录(用于 MultiProcessCollector) +ENV PROMETHEUS_MULTIPROC_DIR=/tmp/prometheus + +# ✅ 确保目录存在 +RUN mkdir -p /tmp/prometheus + +# ✅ 添加 Tini(推荐) +ENV TINI_VERSION=v0.19.0 +ADD https://github.com/krallin/tini/releases/download/${TINI_VERSION}/tini /tini +RUN chmod +x /tini +ENTRYPOINT ["/tini", "--"] + +# ---- 拷贝模型(路径可换) ---- +COPY ./Alibaba/Qwen3-8B /root/.cradle/Alibaba/Qwen3-8B + +HEALTHCHECK --interval=30s --timeout=2s --start-period=300s --retries=5 CMD curl -fs http://localhost:30000/health || exit 1 + +# ---- 暴露端口 ---- +EXPOSE 30000 + +# ---- 启动 SGLang 推理服务 ---- +CMD ["python3", "-m", "sglang.launch_server", \ + "--host", "0.0.0.0", \ + "--port", "30000", \ + "--model-path", "/root/.cradle/Alibaba/Qwen3-8B/", \ + "--tp", "1", \ + "--api-key", "token-abc123", \ + "--enable-metrics"] \ No newline at end of file