This commit is contained in:
parent
f2048d004f
commit
e9abf3cb03
65
Dockerfile
65
Dockerfile
|
|
@ -5,7 +5,6 @@ ARG PYTHON_VERSION=3.12
|
||||||
ARG BUILD_BASE_IMAGE=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu20.04
|
ARG BUILD_BASE_IMAGE=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu20.04
|
||||||
ARG FINAL_BASE_IMAGE=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu22.04
|
ARG FINAL_BASE_IMAGE=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu22.04
|
||||||
|
|
||||||
|
|
||||||
# ARG DEADSNAKES_MIRROR_URL
|
# ARG DEADSNAKES_MIRROR_URL
|
||||||
# ARG DEADSNAKES_GPGKEY_URL
|
# ARG DEADSNAKES_GPGKEY_URL
|
||||||
|
|
||||||
|
|
@ -108,15 +107,29 @@ RUN --mount=type=cache,target=/root/.cache/uv \
|
||||||
uv pip install --system -r requirements/cuda.txt \
|
uv pip install --system -r requirements/cuda.txt \
|
||||||
--extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')
|
--extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')
|
||||||
|
|
||||||
ARG torch_cuda_arch_list='7.0 7.5 8.0 8.9 9.0 10.0 12.0'
|
# ARG torch_cuda_arch_list='7.0 7.5 8.0 8.9 9.0 10.0 12.0'
|
||||||
|
ARG torch_cuda_arch_list='8.6 8.9'
|
||||||
ENV TORCH_CUDA_ARCH_LIST=${torch_cuda_arch_list}
|
ENV TORCH_CUDA_ARCH_LIST=${torch_cuda_arch_list}
|
||||||
# Override the arch list for flash-attn to reduce the binary size
|
# Override the arch list for flash-attn to reduce the binary size
|
||||||
ARG vllm_fa_cmake_gpu_arches='80-real;90-real'
|
# ARG vllm_fa_cmake_gpu_arches='80-real;90-real'
|
||||||
|
ARG vllm_fa_cmake_gpu_arches='86-real;89-real'
|
||||||
ENV VLLM_FA_CMAKE_GPU_ARCHES=${vllm_fa_cmake_gpu_arches}
|
ENV VLLM_FA_CMAKE_GPU_ARCHES=${vllm_fa_cmake_gpu_arches}
|
||||||
#################### BASE BUILD IMAGE ####################
|
#################### BASE BUILD IMAGE ####################
|
||||||
|
|
||||||
#################### WHEEL BUILD IMAGE ####################
|
#################### WHEEL BUILD IMAGE ####################
|
||||||
FROM base AS build
|
FROM base AS build
|
||||||
|
|
||||||
|
RUN curl -L -o sccache.tar.gz https://github.com/mozilla/sccache/releases/download/v0.8.1/sccache-v0.8.1-x86_64-unknown-linux-musl.tar.gz && \
|
||||||
|
tar -xzf sccache.tar.gz && mv sccache-*/sccache /usr/local/bin/sccache && rm -rf sccache*
|
||||||
|
|
||||||
|
|
||||||
|
ENV SCCACHE_DIR=/home/jzy/.cache/sccache
|
||||||
|
ENV SCCACHE_IDLE_TIMEOUT=0
|
||||||
|
ENV RUSTC_WRAPPER=sccache
|
||||||
|
ENV CMAKE_C_COMPILER_LAUNCHER=sccache
|
||||||
|
ENV CMAKE_CXX_COMPILER_LAUNCHER=sccache
|
||||||
|
ENV CMAKE_CUDA_COMPILER_LAUNCHER=sccache
|
||||||
|
|
||||||
ARG TARGETPLATFORM
|
ARG TARGETPLATFORM
|
||||||
|
|
||||||
ARG PIP_INDEX_URL UV_INDEX_URL
|
ARG PIP_INDEX_URL UV_INDEX_URL
|
||||||
|
|
@ -165,24 +178,31 @@ RUN if [ "${VLLM_USE_PRECOMPILED}" = "1" ]; then \
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# if USE_SCCACHE is set, use sccache to speed up compilation
|
# if USE_SCCACHE is set, use sccache to speed up compilation
|
||||||
RUN --mount=type=cache,target=/root/.cache/uv \
|
# RUN --mount=type=cache,target=/root/.cache/uv \
|
||||||
--mount=type=bind,source=.git,target=.git \
|
# --mount=type=bind,source=.git,target=.git \
|
||||||
if [ "$USE_SCCACHE" = "1" ]; then \
|
# if [ "$USE_SCCACHE" = "1" ]; then \
|
||||||
echo "Installing sccache..." \
|
# echo "Installing sccache..." \
|
||||||
&& curl -L -o sccache.tar.gz ${SCCACHE_DOWNLOAD_URL} \
|
# && curl -L -o sccache.tar.gz ${SCCACHE_DOWNLOAD_URL} \
|
||||||
&& tar -xzf sccache.tar.gz \
|
# && tar -xzf sccache.tar.gz \
|
||||||
&& sudo mv sccache-v0.8.1-x86_64-unknown-linux-musl/sccache /usr/bin/sccache \
|
# && sudo mv sccache-v0.8.1-x86_64-unknown-linux-musl/sccache /usr/bin/sccache \
|
||||||
&& rm -rf sccache.tar.gz sccache-v0.8.1-x86_64-unknown-linux-musl \
|
# && rm -rf sccache.tar.gz sccache-v0.8.1-x86_64-unknown-linux-musl \
|
||||||
&& if [ ! -z ${SCCACHE_ENDPOINT} ] ; then export SCCACHE_ENDPOINT=${SCCACHE_ENDPOINT} ; fi \
|
# && if [ ! -z ${SCCACHE_ENDPOINT} ] ; then export SCCACHE_ENDPOINT=${SCCACHE_ENDPOINT} ; fi \
|
||||||
&& export SCCACHE_BUCKET=${SCCACHE_BUCKET_NAME} \
|
# && export SCCACHE_BUCKET=${SCCACHE_BUCKET_NAME} \
|
||||||
&& export SCCACHE_REGION=${SCCACHE_REGION_NAME} \
|
# && export SCCACHE_REGION=${SCCACHE_REGION_NAME} \
|
||||||
&& export SCCACHE_S3_NO_CREDENTIALS=${SCCACHE_S3_NO_CREDENTIALS} \
|
# && export SCCACHE_S3_NO_CREDENTIALS=${SCCACHE_S3_NO_CREDENTIALS} \
|
||||||
&& export SCCACHE_IDLE_TIMEOUT=0 \
|
# && export SCCACHE_IDLE_TIMEOUT=0 \
|
||||||
&& export CMAKE_BUILD_TYPE=Release \
|
# && export CMAKE_BUILD_TYPE=Release \
|
||||||
&& sccache --show-stats \
|
# && sccache --show-stats \
|
||||||
&& python3 setup.py bdist_wheel --dist-dir=dist --py-limited-api=cp38 \
|
# && python3 setup.py bdist_wheel --dist-dir=dist --py-limited-api=cp38 \
|
||||||
&& sccache --show-stats; \
|
# && sccache --show-stats; \
|
||||||
fi
|
# fi
|
||||||
|
|
||||||
|
RUN --mount=type=cache,target=/root/.cache/sccache \
|
||||||
|
--mount=type=cache,target=/root/.cache/uv \
|
||||||
|
sccache --start-server && \
|
||||||
|
sccache --zero-stats && \
|
||||||
|
python3 setup.py bdist_wheel --dist-dir=dist --py-limited-api=cp38 && \
|
||||||
|
sccache --show-stats
|
||||||
|
|
||||||
ENV CCACHE_DIR=/root/.cache/ccache
|
ENV CCACHE_DIR=/root/.cache/ccache
|
||||||
RUN --mount=type=cache,target=/root/.cache/ccache \
|
RUN --mount=type=cache,target=/root/.cache/ccache \
|
||||||
|
|
@ -325,7 +345,8 @@ RUN --mount=type=cache,target=/root/.cache/uv bash - <<'BASH'
|
||||||
FI_TORCH_CUDA_ARCH_LIST="7.5 8.0 8.9 9.0a"
|
FI_TORCH_CUDA_ARCH_LIST="7.5 8.0 8.9 9.0a"
|
||||||
else
|
else
|
||||||
# CUDA 12.8+ supports 10.0a and 12.0
|
# CUDA 12.8+ supports 10.0a and 12.0
|
||||||
FI_TORCH_CUDA_ARCH_LIST="7.5 8.0 8.9 9.0a 10.0a 12.0"
|
# FI_TORCH_CUDA_ARCH_LIST="7.5 8.0 8.9 9.0a 10.0a 12.0"
|
||||||
|
FI_TORCH_CUDA_ARCH_LIST="8.6 8.9"
|
||||||
fi
|
fi
|
||||||
echo "🏗️ Building FlashInfer for arches: ${FI_TORCH_CUDA_ARCH_LIST}"
|
echo "🏗️ Building FlashInfer for arches: ${FI_TORCH_CUDA_ARCH_LIST}"
|
||||||
# Needed to build AOT kernels
|
# Needed to build AOT kernels
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue