86 lines
2.6 KiB
Docker
86 lines
2.6 KiB
Docker
# Usage (to build SGLang ROCm docker image):
|
|
# docker build --build-arg SGL_BRANCH=v0.4.4.post3 -t v0.4.4.post3-rocm630 -f Dockerfile.rocm .
|
|
|
|
# default base image
|
|
ARG BASE_IMAGE="rocm/sgl-dev:vllm20250114"
|
|
|
|
FROM $BASE_IMAGE AS base
|
|
USER root
|
|
|
|
WORKDIR /sgl-workspace
|
|
ARG BUILD_TYPE=all
|
|
ARG SGL_REPO="https://github.com/sgl-project/sglang"
|
|
ENV SGL_DEFAULT="main"
|
|
ARG SGL_BRANCH=${SGL_DEFAULT}
|
|
|
|
ARG TRITON_REPO="https://github.com/ROCm/triton.git"
|
|
ARG TRITON_COMMIT="improve_fa_decode_3.0.0"
|
|
|
|
|
|
ARG AITER_REPO="https://github.com/ROCm/aiter.git"
|
|
ARG AITER_COMMIT="testx"
|
|
|
|
RUN git clone ${SGL_REPO} \
|
|
&& cd sglang \
|
|
&& if [ "${SGL_BRANCH}" = ${SGL_DEFAULT} ]; then \
|
|
echo "Using ${SGL_DEFAULT}, default branch."; \
|
|
else \
|
|
echo "Using ${SGL_BRANCH} branch."; \
|
|
git checkout ${SGL_BRANCH}; \
|
|
fi \
|
|
&& cd sgl-kernel \
|
|
&& rm -f pyproject.toml \
|
|
&& mv pyproject_rocm.toml pyproject.toml \
|
|
&& python setup_rocm.py install \
|
|
&& cd .. \
|
|
&& if [ "$BUILD_TYPE" = "srt" ]; then \
|
|
python -m pip --no-cache-dir install -e "python[srt_hip]"; \
|
|
else \
|
|
python -m pip --no-cache-dir install -e "python[all_hip]"; \
|
|
fi
|
|
|
|
RUN cp -r /sgl-workspace/sglang /sglang
|
|
RUN python -m pip cache purge
|
|
|
|
RUN pip install IPython \
|
|
&& pip install orjson \
|
|
&& pip install python-multipart \
|
|
&& pip install torchao \
|
|
&& pip install pybind11
|
|
|
|
RUN pip uninstall -y triton
|
|
RUN git clone ${TRITON_REPO} \
|
|
&& cd triton \
|
|
&& git checkout ${TRITON_COMMIT} \
|
|
&& cd python \
|
|
&& python3 setup.py install
|
|
|
|
RUN git clone ${AITER_REPO} \
|
|
&& cd aiter \
|
|
&& git checkout ${AITER_COMMIT} \
|
|
&& git submodule update --init --recursive \
|
|
&& PREBUILD_KERNELS=1 GPU_ARCHS=gfx942 python3 setup.py develop
|
|
|
|
# Copy config files to support MI300X in virtualized environments (MI300X_VF). Symlinks will not be created in image build.
|
|
RUN find /sgl-workspace/sglang/python/sglang/srt/layers/quantization/configs/ \
|
|
/sgl-workspace/sglang/python/sglang/srt/layers/moe/fused_moe_triton/configs/ \
|
|
-type f -name '*MI300X*' | xargs -I {} sh -c 'vf_config=$(echo "$1" | sed "s/MI300X/MI300X_VF/"); cp "$1" "$vf_config"' -- {}
|
|
|
|
# Performance environment variable.
|
|
|
|
ENV HIP_FORCE_DEV_KERNARG=1
|
|
ENV HSA_NO_SCRATCH_RECLAIM=1
|
|
ENV SGLANG_SET_CPU_AFFINITY=1
|
|
ENV SGLANG_ALLOW_OVERWRITE_LONGER_CONTEXT_LEN=1
|
|
ENV NCCL_MIN_NCHANNELS=112
|
|
|
|
ENV MOE_PADDING=1
|
|
ENV VLLM_FP8_PADDING=1
|
|
ENV VLLM_FP8_ACT_PADDING=1
|
|
ENV VLLM_FP8_WEIGHT_PADDING=1
|
|
ENV VLLM_FP8_REDUCE_CONV=1
|
|
ENV TORCHINDUCTOR_MAX_AUTOTUNE=1
|
|
ENV TORCHINDUCTOR_MAX_AUTOTUNE_POINTWISE=1
|
|
|
|
CMD ["/bin/bash"]
|