diff --git a/Dockerfile b/Dockerfile index 9a4765478..b5c569c92 100644 --- a/Dockerfile +++ b/Dockerfile @@ -65,11 +65,24 @@ RUN python3 setup.py bdist_wheel # ── 编译 flashinfer (主分支支持 torch 2.7 / cu126) ───────────────────────── WORKDIR /opt -RUN git clone https://github.com/flashinfer-ai/flashinfer.git +RUN git clone --recursive -b v0.3.1 https://github.com/flashinfer-ai/flashinfer.git WORKDIR /opt/flashinfer -RUN pip install . && \ - python3 -m pip wheel . --no-deps -w dist/ + + +# 覆盖你的目标算力:3090=8.6,4090=8.9,H100=9.0a;可按需增/减 +ENV FLASHINFER_CUDA_ARCH_LIST="7.5 8.0 8.6 8.9 9.0a" + +# 先做 AOT 预编译,再直接打 wheel(不隔离,使用同一份自编 torch) +RUN python3 -m flashinfer.aot && \ + python3 -m pip install --no-cache-dir build && \ + python3 -m build --no-isolation --wheel && \ + ls -lh dist/ + + + +# RUN pip install . && \ +# python3 -m pip wheel . --no-deps -w dist/ # ── 下载 vllm 预编译 wheel,避免编译 flash-attn ─────────────────────────────── WORKDIR /opt @@ -103,6 +116,10 @@ RUN pip wheel \ setproctitle uvloop sentencepiece triton pillow cachetools msgspec blake3 cloudpickle compressed-tensors einops openai py-cpuinfo dill partial_json_parser python-multipart torchao \ -w /wheels + +# 产出 openai-harmony 的离线 wheel +RUN pip wheel --no-deps openai-harmony==0.0.4 -w /wheels + # ── ✅ 打包 gradio UI 所需依赖 ──────────────────────────────────────────────── RUN pip wheel "gradio==5.38.2" requests -w /wheels @@ -185,9 +202,9 @@ RUN ls -lh /tmp/wheels && \ python3 -c "import gradio, sys; print('✅ Gradio version =', gradio.__version__)" && \ rm -rf /tmp/wheels -RUN PIP_NO_INDEX= PIP_FIND_LINKS= python3 -m pip install --no-cache-dir --no-deps \ - openai-harmony==0.0.4 \ - flashinfer-python==0.3.1 +# RUN PIP_NO_INDEX= PIP_FIND_LINKS= python3 -m pip install --no-cache-dir --no-deps \ +# openai-harmony==0.0.4 \ +# flashinfer-python==0.3.1 # ✅ 安装 Prometheus client RUN python3 -m pip install --no-cache-dir prometheus_client