This commit is contained in:
parent
cd9aa7b98b
commit
c757ea02fc
|
|
@ -46,7 +46,7 @@ RUN echo "Building PyTorch with USE_DISTRIBUTED=$USE_DISTRIBUTED" && \
|
||||||
ARG CUDA_VERSION=12.6.1
|
ARG CUDA_VERSION=12.6.1
|
||||||
FROM nvidia/cuda:${CUDA_VERSION}-devel-ubuntu22.04 AS builder-extras
|
FROM nvidia/cuda:${CUDA_VERSION}-devel-ubuntu22.04 AS builder-extras
|
||||||
|
|
||||||
ENV TORCH_CUDA_ARCH_LIST="8.0;8.6;8.9"
|
ENV TORCH_CUDA_ARCH_LIST=8.0,8.6,8.9
|
||||||
ENV DEBIAN_FRONTEND=noninteractive PYTHONUNBUFFERED=1 LANG=C.UTF-8 LC_ALL=C.UTF-8
|
ENV DEBIAN_FRONTEND=noninteractive PYTHONUNBUFFERED=1 LANG=C.UTF-8 LC_ALL=C.UTF-8
|
||||||
|
|
||||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||||
|
|
@ -95,10 +95,11 @@ ENV FLASHINFER_CUDA_ARCH_LIST=8.0,8.6,8.9
|
||||||
|
|
||||||
# 先做 AOT 预编译,再直接打 wheel(不隔离,使用同一份自编 torch)
|
# 先做 AOT 预编译,再直接打 wheel(不隔离,使用同一份自编 torch)
|
||||||
RUN python3 -m pip install --no-cache-dir numpy requests build "cuda-python>=12.0,<13" "nvidia-nvshmem-cu12" ninja pynvml && \
|
RUN python3 -m pip install --no-cache-dir numpy requests build "cuda-python>=12.0,<13" "nvidia-nvshmem-cu12" ninja pynvml && \
|
||||||
python3 -m flashinfer.aot && \
|
bash -lc 'unset TORCH_CUDA_ARCH_LIST; \
|
||||||
|
FLASHINFER_CUDA_ARCH_LIST=8.0,8.6,8.9 python3 -m flashinfer.aot' && \
|
||||||
python3 -m build --no-isolation --wheel && \
|
python3 -m build --no-isolation --wheel && \
|
||||||
ls -lh dist/ \
|
ls -lh dist/ && \
|
||||||
&& python3 -m pip install --no-cache-dir --no-deps dist/*.whl
|
python3 -m pip install --no-cache-dir --no-deps dist/*.whl
|
||||||
|
|
||||||
COPY ./sglang /sgl/sglang
|
COPY ./sglang /sgl/sglang
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue