36 lines
1.1 KiB
YAML
36 lines
1.1 KiB
YAML
services:
|
|
sglang:
|
|
image: lmsysorg/sglang:latest
|
|
container_name: sglang
|
|
volumes:
|
|
- ${HOME}/.cache/huggingface:/root/.cache/huggingface
|
|
# If you use modelscope, you need mount this directory
|
|
# - ${HOME}/.cache/modelscope:/root/.cache/modelscope
|
|
restart: always
|
|
network_mode: host # required by RDMA
|
|
privileged: true # required by RDMA
|
|
# Or you can only publish port 30000
|
|
# ports:
|
|
# - 30000:30000
|
|
environment:
|
|
HF_TOKEN: <secret>
|
|
# if you use modelscope to download model, you need set this environment
|
|
# - SGLANG_USE_MODELSCOPE: true
|
|
entrypoint: python3 -m sglang.launch_server
|
|
command: --model-path meta-llama/Llama-3.1-8B-Instruct
|
|
--host 0.0.0.0
|
|
--port 30000
|
|
ulimits:
|
|
memlock: -1
|
|
stack: 67108864
|
|
ipc: host
|
|
healthcheck:
|
|
test: ["CMD-SHELL", "curl -f http://localhost:30000/health || exit 1"]
|
|
deploy:
|
|
resources:
|
|
reservations:
|
|
devices:
|
|
- driver: nvidia
|
|
device_ids: ["0"]
|
|
capabilities: [gpu]
|