diff --git a/Dockerfile b/Dockerfile index a3cd410..b0c3562 100644 --- a/Dockerfile +++ b/Dockerfile @@ -5,7 +5,6 @@ ARG PYTHON_VERSION=3.12 ARG BUILD_BASE_IMAGE=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu20.04 ARG FINAL_BASE_IMAGE=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu22.04 - # ARG DEADSNAKES_MIRROR_URL # ARG DEADSNAKES_GPGKEY_URL @@ -108,15 +107,29 @@ RUN --mount=type=cache,target=/root/.cache/uv \ uv pip install --system -r requirements/cuda.txt \ --extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.') -ARG torch_cuda_arch_list='7.0 7.5 8.0 8.9 9.0 10.0 12.0' +# ARG torch_cuda_arch_list='7.0 7.5 8.0 8.9 9.0 10.0 12.0' +ARG torch_cuda_arch_list='8.6 8.9' ENV TORCH_CUDA_ARCH_LIST=${torch_cuda_arch_list} # Override the arch list for flash-attn to reduce the binary size -ARG vllm_fa_cmake_gpu_arches='80-real;90-real' +# ARG vllm_fa_cmake_gpu_arches='80-real;90-real' +ARG vllm_fa_cmake_gpu_arches='86-real;89-real' ENV VLLM_FA_CMAKE_GPU_ARCHES=${vllm_fa_cmake_gpu_arches} #################### BASE BUILD IMAGE #################### #################### WHEEL BUILD IMAGE #################### FROM base AS build + +RUN curl -L -o sccache.tar.gz https://github.com/mozilla/sccache/releases/download/v0.8.1/sccache-v0.8.1-x86_64-unknown-linux-musl.tar.gz && \ + tar -xzf sccache.tar.gz && mv sccache-*/sccache /usr/local/bin/sccache && rm -rf sccache* + + +ENV SCCACHE_DIR=/home/jzy/.cache/sccache +ENV SCCACHE_IDLE_TIMEOUT=0 +ENV RUSTC_WRAPPER=sccache +ENV CMAKE_C_COMPILER_LAUNCHER=sccache +ENV CMAKE_CXX_COMPILER_LAUNCHER=sccache +ENV CMAKE_CUDA_COMPILER_LAUNCHER=sccache + ARG TARGETPLATFORM ARG PIP_INDEX_URL UV_INDEX_URL @@ -165,24 +178,31 @@ RUN if [ "${VLLM_USE_PRECOMPILED}" = "1" ]; then \ fi # if USE_SCCACHE is set, use sccache to speed up compilation -RUN --mount=type=cache,target=/root/.cache/uv \ - --mount=type=bind,source=.git,target=.git \ - if [ "$USE_SCCACHE" = "1" ]; then \ - echo "Installing sccache..." \ - && curl -L -o sccache.tar.gz ${SCCACHE_DOWNLOAD_URL} \ - && tar -xzf sccache.tar.gz \ - && sudo mv sccache-v0.8.1-x86_64-unknown-linux-musl/sccache /usr/bin/sccache \ - && rm -rf sccache.tar.gz sccache-v0.8.1-x86_64-unknown-linux-musl \ - && if [ ! -z ${SCCACHE_ENDPOINT} ] ; then export SCCACHE_ENDPOINT=${SCCACHE_ENDPOINT} ; fi \ - && export SCCACHE_BUCKET=${SCCACHE_BUCKET_NAME} \ - && export SCCACHE_REGION=${SCCACHE_REGION_NAME} \ - && export SCCACHE_S3_NO_CREDENTIALS=${SCCACHE_S3_NO_CREDENTIALS} \ - && export SCCACHE_IDLE_TIMEOUT=0 \ - && export CMAKE_BUILD_TYPE=Release \ - && sccache --show-stats \ - && python3 setup.py bdist_wheel --dist-dir=dist --py-limited-api=cp38 \ - && sccache --show-stats; \ - fi +# RUN --mount=type=cache,target=/root/.cache/uv \ +# --mount=type=bind,source=.git,target=.git \ +# if [ "$USE_SCCACHE" = "1" ]; then \ +# echo "Installing sccache..." \ +# && curl -L -o sccache.tar.gz ${SCCACHE_DOWNLOAD_URL} \ +# && tar -xzf sccache.tar.gz \ +# && sudo mv sccache-v0.8.1-x86_64-unknown-linux-musl/sccache /usr/bin/sccache \ +# && rm -rf sccache.tar.gz sccache-v0.8.1-x86_64-unknown-linux-musl \ +# && if [ ! -z ${SCCACHE_ENDPOINT} ] ; then export SCCACHE_ENDPOINT=${SCCACHE_ENDPOINT} ; fi \ +# && export SCCACHE_BUCKET=${SCCACHE_BUCKET_NAME} \ +# && export SCCACHE_REGION=${SCCACHE_REGION_NAME} \ +# && export SCCACHE_S3_NO_CREDENTIALS=${SCCACHE_S3_NO_CREDENTIALS} \ +# && export SCCACHE_IDLE_TIMEOUT=0 \ +# && export CMAKE_BUILD_TYPE=Release \ +# && sccache --show-stats \ +# && python3 setup.py bdist_wheel --dist-dir=dist --py-limited-api=cp38 \ +# && sccache --show-stats; \ +# fi + +RUN --mount=type=cache,target=/root/.cache/sccache \ + --mount=type=cache,target=/root/.cache/uv \ + sccache --start-server && \ + sccache --zero-stats && \ + python3 setup.py bdist_wheel --dist-dir=dist --py-limited-api=cp38 && \ + sccache --show-stats ENV CCACHE_DIR=/root/.cache/ccache RUN --mount=type=cache,target=/root/.cache/ccache \ @@ -325,7 +345,8 @@ RUN --mount=type=cache,target=/root/.cache/uv bash - <<'BASH' FI_TORCH_CUDA_ARCH_LIST="7.5 8.0 8.9 9.0a" else # CUDA 12.8+ supports 10.0a and 12.0 - FI_TORCH_CUDA_ARCH_LIST="7.5 8.0 8.9 9.0a 10.0a 12.0" + # FI_TORCH_CUDA_ARCH_LIST="7.5 8.0 8.9 9.0a 10.0a 12.0" + FI_TORCH_CUDA_ARCH_LIST="8.6 8.9" fi echo "🏗️ Building FlashInfer for arches: ${FI_TORCH_CUDA_ARCH_LIST}" # Needed to build AOT kernels