This commit is contained in:
hailin 2025-07-26 22:19:16 +08:00
parent 49b8cae1bb
commit d1a2b815b3
2 changed files with 15 additions and 4 deletions

View File

@ -135,17 +135,18 @@ COPY --from=builder-extras /usr/local/cuda/lib64/libcupti.so /usr/lib/x86_64-lin
# 👇建议在后面补上
RUN ldconfig
COPY --from=builder-extras /wheels /tmp/wheels
#COPY --from=builder-extras /tmp/sgl_kernel_wheel /tmp/sgl_kernel_wheel
# ---- 拷贝预调优的 MoE Triton kernel config ----------------------------
COPY moe_kernels /usr/local/lib/python3.10/dist-packages/sglang/srt/layers/moe/fused_moe_triton/configs
COPY --from=builder-extras /wheels /tmp/wheels
#RUN python3 -m pip install --no-cache-dir /tmp/wheels/* && rm -rf /tmp/wheels
# ✅ 优先装你自编的 torch避免被 PyPI 上的覆盖
RUN ls -lh /tmp/wheels && \
rm -f /tmp/wheels/torch-2.7.1a0+*.whl && \
rm -f /tmp/wheels/huggingface_hub-0.33.4*.whl && \
python3 -m pip install --no-cache-dir --no-deps /tmp/wheels/torch*.whl && \
python3 -m pip install --no-cache-dir --no-deps /tmp/wheels/vllm-*.whl && \
#python3 -m pip install --no-cache-dir --no-deps /tmp/sgl_kernel_wheel/*.whl && \
python3 -m pip install --no-cache-dir --no-deps /tmp/wheels/sgl_kernel-*.whl && \
python3 -m pip install --no-cache-dir --no-deps /tmp/wheels/* && \
python3 -c "from torch.distributed import Backend; print('✅ Runtime torch distributed OK, GLOO =', Backend.GLOO)" && \

View File

@ -0,0 +1,10 @@
{
"64": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 128,
"GROUP_SIZE_M": 32,
"num_warps": 8,
"num_stages": 2
}
}