.

2025-09-22 09:51:56 +08:00 · 2025-09-22 09:51:56 +08:00 · 3320b33dc6
parent ad93b283a5
commit 3320b33dc6
4 changed files with 15 additions and 1 deletions
--- a/build_lora_image.sh
+++ b/build_lora_image.sh
@ -0,0 +1 @@
 docker build   --build-arg http_proxy=http://127.0.0.1:7890   --build-arg https_proxy=http://127.0.0.1:7890   --network=host   --build-arg CUDA_VERSION=12.6.1 --target runtime-autobuild --build-arg MAX_JOBS=96  --load -t sglang_lora .
--- a/build_lora_wheels.sh
+++ b/build_lora_wheels.sh
@ -0,0 +1,2 @@
 export DOCKER_BUILDKIT=1 
 docker build --build-arg http_proxy=http://127.0.0.1:7890   --build-arg https_proxy=http://127.0.0.1:7890   --network=host   --build-arg CUDA_VERSION=12.6.1 --target wheelhouse --output type=local,dest=./_wheelhouse  --build-arg MAX_JOBS=96 .
--- a/run_docker_lora_image.sh
+++ b/run_docker_lora_image.sh
@ -0,0 +1,11 @@
 docker run -d \
  --name sglang_lora \
  --gpus all \
  --shm-size=8g \
  -e NVIDIA_VISIBLE_DEVICES=all \
  -e SUPABASE_URL=http://183.36.35.42 \
  -p 30000:30000 \
  -p 30001:30001 \
  -v /home/jzy/Qwen3-32B:/root/.cradle/external/llm \
  -v /home/jzy/adapters:/root/.cradle/external/lora/q3 \
  sglang_lora:latest
--- a/supervisord.conf
+++ b/supervisord.conf
@ -5,7 +5,7 @@ logfile_maxbytes=0
 loglevel=info
 [program:sglang]
-command=python3 -m sglang.launch_server --host 0.0.0.0 --port 30000 --model-path /root/.cradle/external/llm/ --lora-paths q3=/root/.cradle/external/lora/q3 --disable-radix-cache --tp 4 --api-key token-abc123 --enable-metrics
+command=python3 -m sglang.launch_server --host 0.0.0.0 --port 30000 --model-path /root/.cradle/external/llm/ --lora-paths q3=/root/.cradle/external/lora/q3 --lora-target-modules q_proj,k_proj,v_proj,o_proj,gate_proj,up_proj,down_proj --max-lora-rank 16 --served-model-name qwen3-32b --disable-radix-cache --tp 4 --api-key token-abc123 --enable-metrics --log-requests --log-requests-level 2
 autostart=true
 autorestart=true
 stdout_logfile=/dev/stdout
		`@ -0,0 +1 @@`
							`docker build --build-arg http_proxy=http://127.0.0.1:7890 --build-arg https_proxy=http://127.0.0.1:7890 --network=host --build-arg CUDA_VERSION=12.6.1 --target runtime-autobuild --build-arg MAX_JOBS=96 --load -t sglang_lora .`
		`@ -0,0 +1,2 @@`
							`export DOCKER_BUILDKIT=1`
							`docker build --build-arg http_proxy=http://127.0.0.1:7890 --build-arg https_proxy=http://127.0.0.1:7890 --network=host --build-arg CUDA_VERSION=12.6.1 --target wheelhouse --output type=local,dest=./_wheelhouse --build-arg MAX_JOBS=96 .`