chatai/sglang/docker/Dockerfile

79 lines
3.2 KiB
Docker
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

ARG CUDA_VERSION=12.5.1
FROM nvcr.io/nvidia/tritonserver:24.04-py3-min
ARG BUILD_TYPE=all
ENV DEBIAN_FRONTEND=noninteractive
# 安装依赖(强制 IPv4
RUN echo 'tzdata tzdata/Areas select Asia' | debconf-set-selections \
&& echo 'tzdata tzdata/Zones/Asia select Shanghai' | debconf-set-selections \
&& apt -o Acquire::ForceIPv4=true update -y \
&& apt -o Acquire::ForceIPv4=true install software-properties-common -y \
&& add-apt-repository ppa:deadsnakes/ppa -y \
&& apt -o Acquire::ForceIPv4=true update \
&& apt -o Acquire::ForceIPv4=true install python3.10 python3.10-dev -y \
&& update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.10 1 \
&& update-alternatives --set python3 /usr/bin/python3.10 \
&& apt -o Acquire::ForceIPv4=true install python3.10-distutils -y \
&& apt -o Acquire::ForceIPv4=true install curl git sudo libibverbs-dev -y \
&& apt -o Acquire::ForceIPv4=true install -y rdma-core infiniband-diags openssh-server perftest ibverbs-providers libibumad3 libibverbs1 libnl-3-200 libnl-route-3-200 librdmacm1 \
&& curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py \
&& python3 get-pip.py \
&& python3 --version \
&& python3 -m pip --version \
&& rm -rf /var/lib/apt/lists/* \
&& apt clean
# 安装 datamodel_code_generator用于 MiniCPM 模型)
RUN pip3 install datamodel_code_generator
WORKDIR /sgl-workspace
# 拷贝 sglang 源代码并构建包
COPY ./sglang /sgl-workspace/sglang
# 拷贝模型文件(修正方式)
#COPY ./Alibaba/QwQ-32B /root/.cradle/Alibaba/QwQ-32B
COPY ./DeepSeek/DeepSeek-R1-Distill-Llama-70B /root/.cradle/DeepSeek/DeepSeek-R1-Distill-Llama-70B
ARG CUDA_VERSION
# 安装依赖、安装 sglang、安装 transformers并清理源码
RUN python3 -m pip install --upgrade pip setuptools wheel html5lib six \
&& if [ "$CUDA_VERSION" = "12.1.1" ]; then \
CUINDEX=121; \
elif [ "$CUDA_VERSION" = "12.4.1" ]; then \
CUINDEX=124; \
elif [ "$CUDA_VERSION" = "12.5.1" ]; then \
CUINDEX=124; \
elif [ "$CUDA_VERSION" = "11.8.0" ]; then \
CUINDEX=118; \
python3 -m pip install --no-cache-dir sgl-kernel -i https://docs.sglang.ai/whl/cu118; \
else \
echo "Unsupported CUDA version: $CUDA_VERSION" && exit 1; \
fi \
&& python3 -m pip install --no-cache-dir torch --index-url https://download.pytorch.org/whl/cu${CUINDEX} \
&& python3 -m pip install --no-cache-dir psutil pyzmq pynvml \
&& cd /sgl-workspace/sglang/python \
&& python3 -m pip install --no-cache-dir '.[srt,openai]' --find-links https://flashinfer.ai/whl/cu${CUINDEX}/torch2.5/flashinfer-python \
&& cd / && rm -rf /sgl-workspace/sglang \
&& python3 -m pip install --no-cache-dir transformers==4.48.3 \
&& python3 -c "import sglang; print('✅ sglang module installed successfully')"
# 暴露端口
EXPOSE 30000
# 启动命令QwQ 模型为例)
CMD ["python3", "-m", "sglang.launch_server", \
"--host","0.0.0.0", \
"--port","30000", \
"--model-path", "/root/.cradle/DeepSeek/DeepSeek-R1-Distill-Llama-70B/", \
"--cpu-offload-gb","60", \
"--tp", "4", \
"--api-key", "token-abc123"]
# 设置回默认交互模式
ENV DEBIAN_FRONTEND=interactive