This commit is contained in:
hailin 2025-09-16 17:48:47 +08:00
parent 395511fab2
commit 4e38d1f0d2
1 changed files with 23 additions and 6 deletions

View File

@ -65,11 +65,24 @@ RUN python3 setup.py bdist_wheel
# ── 编译 flashinfer (主分支支持 torch 2.7 / cu126) ─────────────────────────
WORKDIR /opt
RUN git clone https://github.com/flashinfer-ai/flashinfer.git
RUN git clone --recursive -b v0.3.1 https://github.com/flashinfer-ai/flashinfer.git
WORKDIR /opt/flashinfer
RUN pip install . && \
python3 -m pip wheel . --no-deps -w dist/
# 覆盖你的目标算力3090=8.64090=8.9H100=9.0a;可按需增/减
ENV FLASHINFER_CUDA_ARCH_LIST="7.5 8.0 8.6 8.9 9.0a"
# 先做 AOT 预编译,再直接打 wheel不隔离使用同一份自编 torch
RUN python3 -m flashinfer.aot && \
python3 -m pip install --no-cache-dir build && \
python3 -m build --no-isolation --wheel && \
ls -lh dist/
# RUN pip install . && \
# python3 -m pip wheel . --no-deps -w dist/
# ── 下载 vllm 预编译 wheel避免编译 flash-attn ───────────────────────────────
WORKDIR /opt
@ -103,6 +116,10 @@ RUN pip wheel \
setproctitle uvloop sentencepiece triton pillow cachetools msgspec blake3 cloudpickle compressed-tensors einops openai py-cpuinfo dill partial_json_parser python-multipart torchao \
-w /wheels
# 产出 openai-harmony 的离线 wheel
RUN pip wheel --no-deps openai-harmony==0.0.4 -w /wheels
# ── ✅ 打包 gradio UI 所需依赖 ────────────────────────────────────────────────
RUN pip wheel "gradio==5.38.2" requests -w /wheels
@ -185,9 +202,9 @@ RUN ls -lh /tmp/wheels && \
python3 -c "import gradio, sys; print('✅ Gradio version =', gradio.__version__)" && \
rm -rf /tmp/wheels
RUN PIP_NO_INDEX= PIP_FIND_LINKS= python3 -m pip install --no-cache-dir --no-deps \
openai-harmony==0.0.4 \
flashinfer-python==0.3.1
# RUN PIP_NO_INDEX= PIP_FIND_LINKS= python3 -m pip install --no-cache-dir --no-deps \
# openai-harmony==0.0.4 \
# flashinfer-python==0.3.1
# ✅ 安装 Prometheus client
RUN python3 -m pip install --no-cache-dir prometheus_client