diff --git a/build_lora_image.sh b/build_lora_image.sh new file mode 100644 index 000000000..88d305717 --- /dev/null +++ b/build_lora_image.sh @@ -0,0 +1 @@ +docker build --build-arg http_proxy=http://127.0.0.1:7890 --build-arg https_proxy=http://127.0.0.1:7890 --network=host --build-arg CUDA_VERSION=12.6.1 --target runtime-autobuild --build-arg MAX_JOBS=96 --load -t sglang_lora . diff --git a/build_lora_wheels.sh b/build_lora_wheels.sh new file mode 100644 index 000000000..dc9babdf0 --- /dev/null +++ b/build_lora_wheels.sh @@ -0,0 +1,2 @@ +export DOCKER_BUILDKIT=1 +docker build --build-arg http_proxy=http://127.0.0.1:7890 --build-arg https_proxy=http://127.0.0.1:7890 --network=host --build-arg CUDA_VERSION=12.6.1 --target wheelhouse --output type=local,dest=./_wheelhouse --build-arg MAX_JOBS=96 . \ No newline at end of file diff --git a/run_docker_lora_image.sh b/run_docker_lora_image.sh new file mode 100644 index 000000000..63be5307a --- /dev/null +++ b/run_docker_lora_image.sh @@ -0,0 +1,11 @@ +docker run -d \ + --name sglang_lora \ + --gpus all \ + --shm-size=8g \ + -e NVIDIA_VISIBLE_DEVICES=all \ + -e SUPABASE_URL=http://183.36.35.42 \ + -p 30000:30000 \ + -p 30001:30001 \ + -v /home/jzy/Qwen3-32B:/root/.cradle/external/llm \ + -v /home/jzy/adapters:/root/.cradle/external/lora/q3 \ + sglang_lora:latest diff --git a/supervisord.conf b/supervisord.conf index eee4c52f8..1c392fcdf 100644 --- a/supervisord.conf +++ b/supervisord.conf @@ -5,7 +5,7 @@ logfile_maxbytes=0 loglevel=info [program:sglang] -command=python3 -m sglang.launch_server --host 0.0.0.0 --port 30000 --model-path /root/.cradle/external/llm/ --lora-paths q3=/root/.cradle/external/lora/q3 --disable-radix-cache --tp 4 --api-key token-abc123 --enable-metrics +command=python3 -m sglang.launch_server --host 0.0.0.0 --port 30000 --model-path /root/.cradle/external/llm/ --lora-paths q3=/root/.cradle/external/lora/q3 --lora-target-modules q_proj,k_proj,v_proj,o_proj,gate_proj,up_proj,down_proj --max-lora-rank 16 --served-model-name qwen3-32b --disable-radix-cache --tp 4 --api-key token-abc123 --enable-metrics --log-requests --log-requests-level 2 autostart=true autorestart=true stdout_logfile=/dev/stdout