This commit is contained in:
parent
91d1e7afca
commit
90ea1f5611
|
|
@ -1,4 +1,4 @@
|
|||
ARG CUDA_VERSION=12.8.1
|
||||
ARG CUDA_VERSION=12.1
|
||||
ARG PYTHON_VERSION=3.10
|
||||
ARG GET_PIP_URL="https://bootstrap.pypa.io/get-pip.py"
|
||||
|
||||
|
|
|
|||
|
|
@ -5,7 +5,7 @@ logfile_maxbytes=0
|
|||
loglevel=info
|
||||
|
||||
[program:vllm]
|
||||
command=python3 -m vllm.entrypoints.openai.api_server --model /root/.cradle/Alibaba/Qwen3-4B --served-model-name Qwen3-4B --tensor-parallel-size 2 --port 30000 --api-key token-abc123
|
||||
command=python3 -m vllm.entrypoints.openai.api_server --model /root/.cradle/Alibaba/Qwen3-4B --served-model-name Qwen3-4B --dtype=float32 --enable-chunked-prefill=False --tensor-parallel-size 2 --port 30000 --api-key token-abc123
|
||||
autostart=true
|
||||
autorestart=true
|
||||
stdout_logfile=/dev/stdout
|
||||
|
|
|
|||
|
|
@ -6,9 +6,13 @@ numba == 0.61.2; python_version > '3.9'
|
|||
|
||||
# Dependencies for NVIDIA GPUs
|
||||
ray[cgraph]>=2.43.0, !=2.44.* # Ray Compiled Graph, required for pipeline parallelism in V1.
|
||||
torch==2.7.1
|
||||
torchaudio==2.7.1
|
||||
torch==2.5.1
|
||||
torchaudio==2.5.1
|
||||
# torch==2.7.1
|
||||
# torchaudio==2.7.1
|
||||
# These must be updated alongside torch
|
||||
torchvision==0.22.1 # Required for phi3v processor. See https://github.com/pytorch/vision?tab=readme-ov-file#installation for corresponding version
|
||||
# torchvision==0.22.1
|
||||
torchvision==0.20.1 # Required for phi3v processor. See https://github.com/pytorch/vision?tab=readme-ov-file#installation for corresponding version
|
||||
# https://github.com/facebookresearch/xformers/releases/tag/v0.0.31
|
||||
xformers==0.0.31; platform_system == 'Linux' and platform_machine == 'x86_64' # Requires PyTorch >= 2.7
|
||||
# xformers==0.0.31; platform_system == 'Linux' and platform_machine == 'x86_64'
|
||||
xformers==0.0.28.post3; platform_system == 'Linux' and platform_machine == 'x86_64' # Requires PyTorch >= 2.7
|
||||
|
|
|
|||
Loading…
Reference in New Issue