This commit is contained in:
parent
1f0eaca89a
commit
7cd437c41b
18
Dockerfile
18
Dockerfile
|
|
@ -11,7 +11,8 @@ RUN apt-get update && \
|
||||||
python -m pip install --no-cache-dir --upgrade pip
|
python -m pip install --no-cache-dir --upgrade pip
|
||||||
|
|
||||||
# ---- 安装 PyTorch + SGLang ----
|
# ---- 安装 PyTorch + SGLang ----
|
||||||
ARG TORCH_VER=2.3.0 # 如需别的版本自行改
|
ARG TORCH_VER=2.7.1 # cu128 轮子从 2.7.* 开始提供
|
||||||
|
ARG TV_VER=0.22.0
|
||||||
RUN case "$CUDA_VERSION" in \
|
RUN case "$CUDA_VERSION" in \
|
||||||
12.6.1) CUINDEX=126 ;; \
|
12.6.1) CUINDEX=126 ;; \
|
||||||
12.8.1) CUINDEX=128 ;; \
|
12.8.1) CUINDEX=128 ;; \
|
||||||
|
|
@ -19,14 +20,14 @@ RUN case "$CUDA_VERSION" in \
|
||||||
esac && \
|
esac && \
|
||||||
python -m pip install --no-cache-dir \
|
python -m pip install --no-cache-dir \
|
||||||
torch==${TORCH_VER}+cu${CUINDEX} \
|
torch==${TORCH_VER}+cu${CUINDEX} \
|
||||||
torchvision==0.18.0+cu${CUINDEX} \
|
torchvision==${TV_VER}+cu${CUINDEX} \
|
||||||
--extra-index-url https://download.pytorch.org/whl/cu${CUINDEX} && \
|
--extra-index-url https://download.pytorch.org/whl/cu${CUINDEX} && \
|
||||||
python -m pip install --no-cache-dir sglang==0.4.8.post1 && \
|
python -m pip install --no-cache-dir sglang==0.4.8.post1 && \
|
||||||
# 12.8 额外装官方 sgl_kernel & NCCL
|
# 12.8 额外装官方 sgl_kernel & NCCL
|
||||||
if [ "$CUDA_VERSION" = "12.8.1" ]; then \
|
if [ "$CUDA_VERSION" = "12.8.1" ]; then \
|
||||||
python -m pip install --no-cache-dir nvidia-nccl-cu12==2.27.3 --force-reinstall --no-deps && \
|
python -m pip install --no-cache-dir nvidia-nccl-cu12==2.27.3 --force-reinstall --no-deps && \
|
||||||
python -m pip install --no-cache-dir \
|
python -m pip install --no-cache-dir \
|
||||||
https://github.com/sgl-project/whl/releases/download/v0.2.0/sgl_kernel-0.2.0+cu128-cp39-abi3-manylinux2014_x86_64.whl \
|
https://github.com/sgl-project/whl/releases/download/v0.2.0/sgl_kernel-0.2.0+cu128-cp310-abi3-manylinux2014_x86_64.whl \
|
||||||
--force-reinstall --no-deps ; \
|
--force-reinstall --no-deps ; \
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
|
@ -55,20 +56,9 @@ COPY ./Alibaba/Qwen3-8B /root/.cradle/Alibaba/Qwen3-8B
|
||||||
EXPOSE 30000
|
EXPOSE 30000
|
||||||
|
|
||||||
# 默认启动 SGLang 推理服务
|
# 默认启动 SGLang 推理服务
|
||||||
# CMD ["python3", "-m", "sglang.launch_server", \
|
|
||||||
# "--cpu-offload-gb", "64", \
|
|
||||||
# "--host", "0.0.0.0", \
|
|
||||||
# "--port", "30000", \
|
|
||||||
# "--model-path", "/root/.cradle/Alibaba/QwQ-32B/", \
|
|
||||||
# "--tp", "2", \
|
|
||||||
# "--api-key", "token-abc123", \
|
|
||||||
# "--mem-fraction-static", "0.8"]
|
|
||||||
|
|
||||||
|
|
||||||
CMD ["python3", "-m", "sglang.launch_server", \
|
CMD ["python3", "-m", "sglang.launch_server", \
|
||||||
"--host", "0.0.0.0", \
|
"--host", "0.0.0.0", \
|
||||||
"--port", "30000", \
|
"--port", "30000", \
|
||||||
"--model-path", "/root/.cradle/Alibaba/Qwen3-8B/", \
|
"--model-path", "/root/.cradle/Alibaba/Qwen3-8B/", \
|
||||||
"--tp", "1", \
|
"--tp", "1", \
|
||||||
"--api-key", "token-abc123"]
|
"--api-key", "token-abc123"]
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue