This commit is contained in:
hailin 2025-11-21 00:56:47 +08:00
parent 91d1e7afca
commit 90ea1f5611
3 changed files with 10 additions and 6 deletions

View File

@ -1,4 +1,4 @@
ARG CUDA_VERSION=12.8.1
ARG CUDA_VERSION=12.1
ARG PYTHON_VERSION=3.10
ARG GET_PIP_URL="https://bootstrap.pypa.io/get-pip.py"

View File

@ -5,7 +5,7 @@ logfile_maxbytes=0
loglevel=info
[program:vllm]
command=python3 -m vllm.entrypoints.openai.api_server --model /root/.cradle/Alibaba/Qwen3-4B --served-model-name Qwen3-4B --tensor-parallel-size 2 --port 30000 --api-key token-abc123
command=python3 -m vllm.entrypoints.openai.api_server --model /root/.cradle/Alibaba/Qwen3-4B --served-model-name Qwen3-4B --dtype=float32 --enable-chunked-prefill=False --tensor-parallel-size 2 --port 30000 --api-key token-abc123
autostart=true
autorestart=true
stdout_logfile=/dev/stdout

View File

@ -6,9 +6,13 @@ numba == 0.61.2; python_version > '3.9'
# Dependencies for NVIDIA GPUs
ray[cgraph]>=2.43.0, !=2.44.* # Ray Compiled Graph, required for pipeline parallelism in V1.
torch==2.7.1
torchaudio==2.7.1
torch==2.5.1
torchaudio==2.5.1
# torch==2.7.1
# torchaudio==2.7.1
# These must be updated alongside torch
torchvision==0.22.1 # Required for phi3v processor. See https://github.com/pytorch/vision?tab=readme-ov-file#installation for corresponding version
# torchvision==0.22.1
torchvision==0.20.1 # Required for phi3v processor. See https://github.com/pytorch/vision?tab=readme-ov-file#installation for corresponding version
# https://github.com/facebookresearch/xformers/releases/tag/v0.0.31
xformers==0.0.31; platform_system == 'Linux' and platform_machine == 'x86_64' # Requires PyTorch >= 2.7
# xformers==0.0.31; platform_system == 'Linux' and platform_machine == 'x86_64'
xformers==0.0.28.post3; platform_system == 'Linux' and platform_machine == 'x86_64' # Requires PyTorch >= 2.7