diff --git a/Dockerfile b/Dockerfile
index ad0dec3da..4c807ff1a 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -77,7 +77,7 @@ WORKDIR /opt/flashinfer
 ENV FLASHINFER_CUDA_ARCH_LIST="7.5 8.0 8.6 8.9 9.0a"
 
 # 先做 AOT 预编译，再直接打 wheel（不隔离，使用同一份自编 torch）
-RUN python3 -m pip install --no-cache-dir numpy requests build "cuda-python>=12.0,<13" "nvidia-nvshmem-cu12" pynvml && \
+RUN python3 -m pip install --no-cache-dir numpy requests build "cuda-python>=12.0,<13" "nvidia-nvshmem-cu12" ninja pynvml && \
     python3 -m flashinfer.aot && \
     python3 -m build --no-isolation --wheel && \
     ls -lh dist/