diff --git a/Dockerfile b/Dockerfile index d72a7f4..cd30d12 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,63 +1,54 @@ -############################################################ -# Stage-0: 构建依赖轮子(PyTorch + SGLang + sgl_kernel) # -############################################################ -ARG CUDA_VERSION=12.8.1 -FROM nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu22.04 AS builder +############################################################################### +# Stage 0 ─ builder:CUDA 12.6.1 + nvcc/gcc,编译本地 sglang 源码 +############################################################################### +ARG CUDA_VERSION=12.6.1 +ARG CUINDEX=126 # 12.6.x ⇒ cu126 +FROM nvidia/cuda:${CUDA_VERSION}-devel-ubuntu22.04 AS builder -# ---- Python 环境 ---- -RUN apt-get update && \ - apt-get install -y --no-install-recommends python3 python3-pip python3-distutils && \ - ln -sf /usr/bin/python3 /usr/bin/python && \ - python -m pip install --no-cache-dir --upgrade pip setuptools wheel html5lib six +ENV DEBIAN_FRONTEND=noninteractive PYTHONUNBUFFERED=1 LANG=C.UTF-8 LC_ALL=C.UTF-8 -# ---- PyTorch / torchvision / SGLang / sgl_kernel ---- -ARG TORCH_VER=2.7.1 -ARG TV_VER=0.22.1 -RUN case "$CUDA_VERSION" in \ - 12.6.1) CUINDEX=126 ;; \ - 12.8.1) CUINDEX=128 ;; \ - *) echo "Unsupported CUDA version: $CUDA_VERSION" && exit 1 ;; \ - esac && \ - python -m pip install --no-cache-dir \ - torch==${TORCH_VER}+cu${CUINDEX} \ - torchvision==${TV_VER}+cu${CUINDEX} \ - --extra-index-url https://download.pytorch.org/whl/cu${CUINDEX} && \ - python -m pip install --no-cache-dir \ - sglang==0.4.8.post1 \ - sgl-kernel==0.0.2.post17 \ - nvidia-nccl-cu12==2.27.3 --force-reinstall --no-deps && \ - # ✅ 补全依赖(必须) - python -m pip install --no-cache-dir \ - pydantic psutil pyzmq pynvml transformers==4.48.3 uvicorn fastapi IPython aiohttp setproctitle orjson uvloop sentencepiece - # ✅ 测试模块完整性 - #python -c "import sglang, torch, pydantic, transformers, sgl_kernel" +RUN apt-get update && apt-get install -y --no-install-recommends \ + python3 python3-pip python3-distutils build-essential git ca-certificates && \ + python3 -m pip install --no-cache-dir --upgrade pip setuptools wheel -############################################################ -# Stage-1: 生成最小运行镜像 # -############################################################ +# ── ① 下载官方 PyTorch / TorchVision wheel ──────────────────────────────── +RUN python3 -m pip download --no-deps \ + --dest /tmp/wheels \ + --index-url https://download.pytorch.org/whl/cu${CUINDEX} \ + torch==2.7.1 torchvision==0.22.1 + +# ── ② 下载 flashinfer cu126 / torch-2.7 wheel(只下载,不安装)───────────── +RUN python3 -m pip download --no-deps \ + --dest /tmp/wheels \ + --find-links https://flashinfer.ai/whl/cu${CUINDEX}/torch2.7/flashinfer-python \ + flashinfer-python + +# ── ③ 安装前两步 wheel(为后面编译 sglang 做依赖)────────────────────── +RUN python3 -m pip install --no-cache-dir /tmp/wheels/torch-2.7.1+cu${CUINDEX}*.whl \ + /tmp/wheels/torchvision-0.22.1+cu${CUINDEX}*.whl \ + /tmp/wheels/flashinfer_python-*-cu${CUINDEX}_torch27*.whl + +# ── ④ COPY 本地 sglang 源码并编译成 wheel ───────────────────────────────── +COPY ./sglang /sgl-workspace/sglang +WORKDIR /sgl-workspace/sglang/python +RUN python3 -m pip wheel '.[srt,openai]' --no-deps -w /tmp/wheels + +############################################################################### +# Stage 1 ─ runtime:极简镜像,只离线安装 wheel +############################################################################### +ARG CUDA_VERSION=12.6.1 FROM nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu22.04 -ENV DEBIAN_FRONTEND=noninteractive \ - PYTHONUNBUFFERED=1 +ENV DEBIAN_FRONTEND=noninteractive PYTHONUNBUFFERED=1 LANG=C.UTF-8 LC_ALL=C.UTF-8 -# ---- Python runtime ---- -RUN apt-get update && \ - apt-get install -y --no-install-recommends python3 python3-distutils && \ - ln -sf /usr/bin/python3 /usr/bin/python && \ - rm -rf /var/lib/apt/lists/* +RUN apt-get update && apt-get install -y --no-install-recommends \ + python3 python3-pip python3-distutils ca-certificates && \ + rm -rf /var/lib/apt/lists/* && \ + python3 -m pip install --no-cache-dir --upgrade pip -# ---- 拷贝 Python 包和入口 ---- -COPY --from=builder /usr/local/lib/python3.10/dist-packages /usr/local/lib/python3.10/dist-packages -COPY --from=builder /usr/local/bin /usr/local/bin +# ── 把 builder 产出的所有 wheel(一共 4~5 个)拷贝进来并安装 ─────────── +COPY --from=builder /tmp/wheels /tmp/wheels +RUN python3 -m pip install --no-cache-dir /tmp/wheels/* && rm -rf /tmp/wheels -# ---- 拷贝模型(路径可换) ---- -COPY ./Alibaba/Qwen3-8B /root/.cradle/Alibaba/Qwen3-8B - -# ---- 启动服务 ---- -EXPOSE 30000 -CMD ["python3", "-m", "sglang.launch_server", \ - "--host", "0.0.0.0", \ - "--port", "30000", \ - "--model-path", "/root/.cradle/Alibaba/Qwen3-8B/", \ - "--tp", "1", \ - "--api-key", "token-abc123"] +# ── 仅打印帮助,CPU 机器也能跑 ───────────────────────────────────────────── +CMD ["python3", "-m", "sglang.launch_server", "--help"] diff --git a/Dockerfile.bad b/Dockerfile.bad new file mode 100644 index 0000000..d72a7f4 --- /dev/null +++ b/Dockerfile.bad @@ -0,0 +1,63 @@ +############################################################ +# Stage-0: 构建依赖轮子(PyTorch + SGLang + sgl_kernel) # +############################################################ +ARG CUDA_VERSION=12.8.1 +FROM nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu22.04 AS builder + +# ---- Python 环境 ---- +RUN apt-get update && \ + apt-get install -y --no-install-recommends python3 python3-pip python3-distutils && \ + ln -sf /usr/bin/python3 /usr/bin/python && \ + python -m pip install --no-cache-dir --upgrade pip setuptools wheel html5lib six + +# ---- PyTorch / torchvision / SGLang / sgl_kernel ---- +ARG TORCH_VER=2.7.1 +ARG TV_VER=0.22.1 +RUN case "$CUDA_VERSION" in \ + 12.6.1) CUINDEX=126 ;; \ + 12.8.1) CUINDEX=128 ;; \ + *) echo "Unsupported CUDA version: $CUDA_VERSION" && exit 1 ;; \ + esac && \ + python -m pip install --no-cache-dir \ + torch==${TORCH_VER}+cu${CUINDEX} \ + torchvision==${TV_VER}+cu${CUINDEX} \ + --extra-index-url https://download.pytorch.org/whl/cu${CUINDEX} && \ + python -m pip install --no-cache-dir \ + sglang==0.4.8.post1 \ + sgl-kernel==0.0.2.post17 \ + nvidia-nccl-cu12==2.27.3 --force-reinstall --no-deps && \ + # ✅ 补全依赖(必须) + python -m pip install --no-cache-dir \ + pydantic psutil pyzmq pynvml transformers==4.48.3 uvicorn fastapi IPython aiohttp setproctitle orjson uvloop sentencepiece + # ✅ 测试模块完整性 + #python -c "import sglang, torch, pydantic, transformers, sgl_kernel" + +############################################################ +# Stage-1: 生成最小运行镜像 # +############################################################ +FROM nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu22.04 + +ENV DEBIAN_FRONTEND=noninteractive \ + PYTHONUNBUFFERED=1 + +# ---- Python runtime ---- +RUN apt-get update && \ + apt-get install -y --no-install-recommends python3 python3-distutils && \ + ln -sf /usr/bin/python3 /usr/bin/python && \ + rm -rf /var/lib/apt/lists/* + +# ---- 拷贝 Python 包和入口 ---- +COPY --from=builder /usr/local/lib/python3.10/dist-packages /usr/local/lib/python3.10/dist-packages +COPY --from=builder /usr/local/bin /usr/local/bin + +# ---- 拷贝模型(路径可换) ---- +COPY ./Alibaba/Qwen3-8B /root/.cradle/Alibaba/Qwen3-8B + +# ---- 启动服务 ---- +EXPOSE 30000 +CMD ["python3", "-m", "sglang.launch_server", \ + "--host", "0.0.0.0", \ + "--port", "30000", \ + "--model-path", "/root/.cradle/Alibaba/Qwen3-8B/", \ + "--tp", "1", \ + "--api-key", "token-abc123"]