This commit is contained in:
hailin 2025-09-22 09:51:56 +08:00
parent ad93b283a5
commit 3320b33dc6
4 changed files with 15 additions and 1 deletions

1
build_lora_image.sh Normal file
View File

@ -0,0 +1 @@
docker build --build-arg http_proxy=http://127.0.0.1:7890 --build-arg https_proxy=http://127.0.0.1:7890 --network=host --build-arg CUDA_VERSION=12.6.1 --target runtime-autobuild --build-arg MAX_JOBS=96 --load -t sglang_lora .

2
build_lora_wheels.sh Normal file
View File

@ -0,0 +1,2 @@
export DOCKER_BUILDKIT=1
docker build --build-arg http_proxy=http://127.0.0.1:7890 --build-arg https_proxy=http://127.0.0.1:7890 --network=host --build-arg CUDA_VERSION=12.6.1 --target wheelhouse --output type=local,dest=./_wheelhouse --build-arg MAX_JOBS=96 .

11
run_docker_lora_image.sh Normal file
View File

@ -0,0 +1,11 @@
docker run -d \
--name sglang_lora \
--gpus all \
--shm-size=8g \
-e NVIDIA_VISIBLE_DEVICES=all \
-e SUPABASE_URL=http://183.36.35.42 \
-p 30000:30000 \
-p 30001:30001 \
-v /home/jzy/Qwen3-32B:/root/.cradle/external/llm \
-v /home/jzy/adapters:/root/.cradle/external/lora/q3 \
sglang_lora:latest

View File

@ -5,7 +5,7 @@ logfile_maxbytes=0
loglevel=info loglevel=info
[program:sglang] [program:sglang]
command=python3 -m sglang.launch_server --host 0.0.0.0 --port 30000 --model-path /root/.cradle/external/llm/ --lora-paths q3=/root/.cradle/external/lora/q3 --disable-radix-cache --tp 4 --api-key token-abc123 --enable-metrics command=python3 -m sglang.launch_server --host 0.0.0.0 --port 30000 --model-path /root/.cradle/external/llm/ --lora-paths q3=/root/.cradle/external/lora/q3 --lora-target-modules q_proj,k_proj,v_proj,o_proj,gate_proj,up_proj,down_proj --max-lora-rank 16 --served-model-name qwen3-32b --disable-radix-cache --tp 4 --api-key token-abc123 --enable-metrics --log-requests --log-requests-level 2
autostart=true autostart=true
autorestart=true autorestart=true
stdout_logfile=/dev/stdout stdout_logfile=/dev/stdout