This commit is contained in:
parent
ad93b283a5
commit
3320b33dc6
|
|
@ -0,0 +1 @@
|
|||
docker build --build-arg http_proxy=http://127.0.0.1:7890 --build-arg https_proxy=http://127.0.0.1:7890 --network=host --build-arg CUDA_VERSION=12.6.1 --target runtime-autobuild --build-arg MAX_JOBS=96 --load -t sglang_lora .
|
||||
|
|
@ -0,0 +1,2 @@
|
|||
export DOCKER_BUILDKIT=1
|
||||
docker build --build-arg http_proxy=http://127.0.0.1:7890 --build-arg https_proxy=http://127.0.0.1:7890 --network=host --build-arg CUDA_VERSION=12.6.1 --target wheelhouse --output type=local,dest=./_wheelhouse --build-arg MAX_JOBS=96 .
|
||||
|
|
@ -0,0 +1,11 @@
|
|||
docker run -d \
|
||||
--name sglang_lora \
|
||||
--gpus all \
|
||||
--shm-size=8g \
|
||||
-e NVIDIA_VISIBLE_DEVICES=all \
|
||||
-e SUPABASE_URL=http://183.36.35.42 \
|
||||
-p 30000:30000 \
|
||||
-p 30001:30001 \
|
||||
-v /home/jzy/Qwen3-32B:/root/.cradle/external/llm \
|
||||
-v /home/jzy/adapters:/root/.cradle/external/lora/q3 \
|
||||
sglang_lora:latest
|
||||
|
|
@ -5,7 +5,7 @@ logfile_maxbytes=0
|
|||
loglevel=info
|
||||
|
||||
[program:sglang]
|
||||
command=python3 -m sglang.launch_server --host 0.0.0.0 --port 30000 --model-path /root/.cradle/external/llm/ --lora-paths q3=/root/.cradle/external/lora/q3 --disable-radix-cache --tp 4 --api-key token-abc123 --enable-metrics
|
||||
command=python3 -m sglang.launch_server --host 0.0.0.0 --port 30000 --model-path /root/.cradle/external/llm/ --lora-paths q3=/root/.cradle/external/lora/q3 --lora-target-modules q_proj,k_proj,v_proj,o_proj,gate_proj,up_proj,down_proj --max-lora-rank 16 --served-model-name qwen3-32b --disable-radix-cache --tp 4 --api-key token-abc123 --enable-metrics --log-requests --log-requests-level 2
|
||||
autostart=true
|
||||
autorestart=true
|
||||
stdout_logfile=/dev/stdout
|
||||
|
|
|
|||
Loading…
Reference in New Issue