This commit is contained in:
parent
49b8cae1bb
commit
d1a2b815b3
|
|
@ -135,17 +135,18 @@ COPY --from=builder-extras /usr/local/cuda/lib64/libcupti.so /usr/lib/x86_64-lin
|
|||
# 👇建议在后面补上
|
||||
RUN ldconfig
|
||||
|
||||
COPY --from=builder-extras /wheels /tmp/wheels
|
||||
#COPY --from=builder-extras /tmp/sgl_kernel_wheel /tmp/sgl_kernel_wheel
|
||||
# ---- 拷贝预调优的 MoE Triton kernel config ----------------------------
|
||||
COPY moe_kernels /usr/local/lib/python3.10/dist-packages/sglang/srt/layers/moe/fused_moe_triton/configs
|
||||
|
||||
|
||||
COPY --from=builder-extras /wheels /tmp/wheels
|
||||
|
||||
#RUN python3 -m pip install --no-cache-dir /tmp/wheels/* && rm -rf /tmp/wheels
|
||||
# ✅ 优先装你自编的 torch,避免被 PyPI 上的覆盖
|
||||
RUN ls -lh /tmp/wheels && \
|
||||
rm -f /tmp/wheels/torch-2.7.1a0+*.whl && \
|
||||
rm -f /tmp/wheels/huggingface_hub-0.33.4*.whl && \
|
||||
python3 -m pip install --no-cache-dir --no-deps /tmp/wheels/torch*.whl && \
|
||||
python3 -m pip install --no-cache-dir --no-deps /tmp/wheels/vllm-*.whl && \
|
||||
#python3 -m pip install --no-cache-dir --no-deps /tmp/sgl_kernel_wheel/*.whl && \
|
||||
python3 -m pip install --no-cache-dir --no-deps /tmp/wheels/sgl_kernel-*.whl && \
|
||||
python3 -m pip install --no-cache-dir --no-deps /tmp/wheels/* && \
|
||||
python3 -c "from torch.distributed import Backend; print('✅ Runtime torch distributed OK, GLOO =', Backend.GLOO)" && \
|
||||
|
|
|
|||
|
|
@ -0,0 +1,10 @@
|
|||
{
|
||||
"64": {
|
||||
"BLOCK_SIZE_M": 16,
|
||||
"BLOCK_SIZE_N": 128,
|
||||
"BLOCK_SIZE_K": 128,
|
||||
"GROUP_SIZE_M": 32,
|
||||
"num_warps": 8,
|
||||
"num_stages": 2
|
||||
}
|
||||
}
|
||||
Loading…
Reference in New Issue