diff --git a/Dockerfile b/Dockerfile index 91ff789..9ffb35f 100644 --- a/Dockerfile +++ b/Dockerfile @@ -135,17 +135,18 @@ COPY --from=builder-extras /usr/local/cuda/lib64/libcupti.so /usr/lib/x86_64-lin # 👇建议在后面补上 RUN ldconfig -COPY --from=builder-extras /wheels /tmp/wheels -#COPY --from=builder-extras /tmp/sgl_kernel_wheel /tmp/sgl_kernel_wheel +# ---- 拷贝预调优的 MoE Triton kernel config ---------------------------- +COPY moe_kernels /usr/local/lib/python3.10/dist-packages/sglang/srt/layers/moe/fused_moe_triton/configs + + +COPY --from=builder-extras /wheels /tmp/wheels -#RUN python3 -m pip install --no-cache-dir /tmp/wheels/* && rm -rf /tmp/wheels # ✅ 优先装你自编的 torch,避免被 PyPI 上的覆盖 RUN ls -lh /tmp/wheels && \ rm -f /tmp/wheels/torch-2.7.1a0+*.whl && \ rm -f /tmp/wheels/huggingface_hub-0.33.4*.whl && \ python3 -m pip install --no-cache-dir --no-deps /tmp/wheels/torch*.whl && \ python3 -m pip install --no-cache-dir --no-deps /tmp/wheels/vllm-*.whl && \ - #python3 -m pip install --no-cache-dir --no-deps /tmp/sgl_kernel_wheel/*.whl && \ python3 -m pip install --no-cache-dir --no-deps /tmp/wheels/sgl_kernel-*.whl && \ python3 -m pip install --no-cache-dir --no-deps /tmp/wheels/* && \ python3 -c "from torch.distributed import Backend; print('✅ Runtime torch distributed OK, GLOO =', Backend.GLOO)" && \ diff --git a/moe_kernels/triton_3_3_1/E=128,N=384,device_name=NVIDIA_GeForce_RTX_3090.json b/moe_kernels/triton_3_3_1/E=128,N=384,device_name=NVIDIA_GeForce_RTX_3090.json new file mode 100644 index 0000000..661af2d --- /dev/null +++ b/moe_kernels/triton_3_3_1/E=128,N=384,device_name=NVIDIA_GeForce_RTX_3090.json @@ -0,0 +1,10 @@ +{ + "64": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 128, + "GROUP_SIZE_M": 32, + "num_warps": 8, + "num_stages": 2 + } +}