78 lines
3.2 KiB
Docker
78 lines
3.2 KiB
Docker
ARG CUDA_VERSION=12.5.1
|
||
|
||
FROM nvcr.io/nvidia/tritonserver:24.04-py3-min
|
||
|
||
ARG BUILD_TYPE=all
|
||
ENV DEBIAN_FRONTEND=noninteractive
|
||
|
||
# 安装依赖(强制 IPv4)
|
||
RUN echo 'tzdata tzdata/Areas select Asia' | debconf-set-selections \
|
||
&& echo 'tzdata tzdata/Zones/Asia select Shanghai' | debconf-set-selections \
|
||
&& apt -o Acquire::ForceIPv4=true update -y \
|
||
&& apt -o Acquire::ForceIPv4=true install software-properties-common -y \
|
||
&& add-apt-repository ppa:deadsnakes/ppa -y \
|
||
&& apt -o Acquire::ForceIPv4=true update \
|
||
&& apt -o Acquire::ForceIPv4=true install python3.10 python3.10-dev -y \
|
||
&& update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.10 1 \
|
||
&& update-alternatives --set python3 /usr/bin/python3.10 \
|
||
&& apt -o Acquire::ForceIPv4=true install python3.10-distutils -y \
|
||
&& apt -o Acquire::ForceIPv4=true install curl git sudo libibverbs-dev -y \
|
||
&& apt -o Acquire::ForceIPv4=true install -y rdma-core infiniband-diags openssh-server perftest ibverbs-providers libibumad3 libibverbs1 libnl-3-200 libnl-route-3-200 librdmacm1 \
|
||
&& curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py \
|
||
&& python3 get-pip.py \
|
||
&& python3 --version \
|
||
&& python3 -m pip --version \
|
||
&& rm -rf /var/lib/apt/lists/* \
|
||
&& apt clean
|
||
|
||
# 安装 datamodel_code_generator(用于 MiniCPM 模型)
|
||
RUN pip3 install datamodel_code_generator
|
||
|
||
WORKDIR /sgl-workspace
|
||
|
||
# 拷贝 sglang 源代码并构建包
|
||
COPY ./sglang /sgl-workspace/sglang
|
||
|
||
# 拷贝模型文件(修正方式)
|
||
#COPY ./Alibaba/QwQ-32B /root/.cradle/Alibaba/QwQ-32B
|
||
COPY ./Alibaba/QwQ-32B /root/.cradle/Alibaba/QwQ-32B
|
||
|
||
ARG CUDA_VERSION
|
||
|
||
# 安装依赖、安装 sglang、安装 transformers,并清理源码
|
||
RUN python3 -m pip install --upgrade pip setuptools wheel html5lib six \
|
||
&& if [ "$CUDA_VERSION" = "12.1.1" ]; then \
|
||
CUINDEX=121; \
|
||
elif [ "$CUDA_VERSION" = "12.4.1" ]; then \
|
||
CUINDEX=124; \
|
||
elif [ "$CUDA_VERSION" = "12.5.1" ]; then \
|
||
CUINDEX=124; \
|
||
elif [ "$CUDA_VERSION" = "11.8.0" ]; then \
|
||
CUINDEX=118; \
|
||
python3 -m pip install --no-cache-dir sgl-kernel -i https://docs.sglang.ai/whl/cu118; \
|
||
else \
|
||
echo "Unsupported CUDA version: $CUDA_VERSION" && exit 1; \
|
||
fi \
|
||
&& python3 -m pip install --no-cache-dir torch --index-url https://download.pytorch.org/whl/cu${CUINDEX} \
|
||
&& python3 -m pip install --no-cache-dir psutil pyzmq pynvml \
|
||
&& cd /sgl-workspace/sglang/python \
|
||
&& python3 -m pip install --no-cache-dir '.[srt,openai]' --find-links https://flashinfer.ai/whl/cu${CUINDEX}/torch2.5/flashinfer-python \
|
||
&& cd / && rm -rf /sgl-workspace/sglang \
|
||
&& python3 -m pip install --no-cache-dir transformers==4.48.3 \
|
||
&& python3 -c "import sglang; print('✅ sglang module installed successfully')"
|
||
|
||
# 暴露端口
|
||
EXPOSE 30000
|
||
|
||
|
||
# 启动命令(QwQ 模型为例)
|
||
CMD ["python3", "-m", "sglang.launch_server", \
|
||
"--host","0.0.0.0", \
|
||
"--port","30000", \
|
||
"--model-path", "/root/.cradle/Alibaba/QwQ-32B/", \
|
||
"--tp", "4", \
|
||
"--api-key", "token-abc123"]
|
||
|
||
# 设置回默认交互模式
|
||
ENV DEBIAN_FRONTEND=interactive
|