.

2025-11-21 00:56:47 +08:00 · 2025-11-21 00:56:47 +08:00 · 90ea1f5611
parent 91d1e7afca
commit 90ea1f5611
3 changed files with 10 additions and 6 deletions
--- a/2
+++ b/2
@ -1,4 +1,4 @@
-ARG CUDA_VERSION=12.8.1
+ARG CUDA_VERSION=12.1
 ARG PYTHON_VERSION=3.10
 ARG GET_PIP_URL="https://bootstrap.pypa.io/get-pip.py"

--- a/supervisord.conf
+++ b/supervisord.conf
@ -5,7 +5,7 @@ logfile_maxbytes=0
 loglevel=info

 [program:vllm]
-command=python3 -m vllm.entrypoints.openai.api_server --model /root/.cradle/Alibaba/Qwen3-4B --served-model-name Qwen3-4B --tensor-parallel-size 2 --port 30000 --api-key token-abc123
+command=python3 -m vllm.entrypoints.openai.api_server --model /root/.cradle/Alibaba/Qwen3-4B --served-model-name Qwen3-4B --dtype=float32 --enable-chunked-prefill=False --tensor-parallel-size 2 --port 30000 --api-key token-abc123
 autostart=true
 autorestart=true
 stdout_logfile=/dev/stdout
--- a/vllm_v0.10.0/requirements/cuda.txt
+++ b/vllm_v0.10.0/requirements/cuda.txt
@ -6,9 +6,13 @@ numba == 0.61.2; python_version > '3.9'

 # Dependencies for NVIDIA GPUs
 ray[cgraph]>=2.43.0, !=2.44.* # Ray Compiled Graph, required for pipeline parallelism in V1.
-torch==2.7.1
-torchaudio==2.7.1
+torch==2.5.1
+torchaudio==2.5.1
+# torch==2.7.1
+# torchaudio==2.7.1
 # These must be updated alongside torch
-torchvision==0.22.1 # Required for phi3v processor. See https://github.com/pytorch/vision?tab=readme-ov-file#installation for corresponding version
+# torchvision==0.22.1
+torchvision==0.20.1 # Required for phi3v processor. See https://github.com/pytorch/vision?tab=readme-ov-file#installation for corresponding version
 # https://github.com/facebookresearch/xformers/releases/tag/v0.0.31
-xformers==0.0.31; platform_system == 'Linux' and platform_machine == 'x86_64'  # Requires PyTorch >= 2.7
+# xformers==0.0.31; platform_system == 'Linux' and platform_machine == 'x86_64'
+xformers==0.0.28.post3; platform_system == 'Linux' and platform_machine == 'x86_64'  # Requires PyTorch >= 2.7