From 68f7ea0fd95f35fc675018407e0a839b678344db Mon Sep 17 00:00:00 2001 From: hailin Date: Wed, 6 Aug 2025 15:37:31 +0800 Subject: [PATCH] . --- 1 | 365 ----------------------------------------------- Dockerfile | 13 ++ meta_ui.py | 224 +++++++++++++++++++++++++++++ supervisord.conf | 23 +++ 4 files changed, 260 insertions(+), 365 deletions(-) delete mode 100644 1 create mode 100644 meta_ui.py create mode 100644 supervisord.conf diff --git a/1 b/1 deleted file mode 100644 index 9fde304..0000000 --- a/1 +++ /dev/null @@ -1,365 +0,0 @@ -ARG CUDA_VERSION=12.8.1 -ARG PYTHON_VERSION=3.12 - - -ARG BUILD_BASE_IMAGE=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu20.04 -ARG FINAL_BASE_IMAGE=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu22.04 - - -ARG DEADSNAKES_MIRROR_URL -ARG DEADSNAKES_GPGKEY_URL - -ARG GET_PIP_URL="https://bootstrap.pypa.io/get-pip.py" - -ARG PIP_INDEX_URL -ARG PIP_EXTRA_INDEX_URL -ARG UV_INDEX_URL=${PIP_INDEX_URL} -ARG UV_EXTRA_INDEX_URL=${PIP_EXTRA_INDEX_URL} - -# PyTorch provides its own indexes for standard and nightly builds -ARG PYTORCH_CUDA_INDEX_BASE_URL=https://download.pytorch.org/whl -ARG PYTORCH_CUDA_NIGHTLY_INDEX_BASE_URL=https://download.pytorch.org/whl/nightly - -ARG PIP_KEYRING_PROVIDER=disabled -ARG UV_KEYRING_PROVIDER=${PIP_KEYRING_PROVIDER} - -# Flag enables built-in KV-connector dependency libs into docker images -ARG INSTALL_KV_CONNECTORS=false - -#################### BASE BUILD IMAGE #################### -# prepare basic build environment -FROM ${BUILD_BASE_IMAGE} AS base -ARG CUDA_VERSION -ARG PYTHON_VERSION -ARG TARGETPLATFORM -ARG INSTALL_KV_CONNECTORS=false -ENV DEBIAN_FRONTEND=noninteractive - -ARG DEADSNAKES_MIRROR_URL -ARG DEADSNAKES_GPGKEY_URL -ARG GET_PIP_URL - -# Install Python and other dependencies -RUN echo 'tzdata tzdata/Areas select America' | debconf-set-selections \ - && echo 'tzdata tzdata/Zones/America select Los_Angeles' | debconf-set-selections \ - && apt-get update -y \ - && apt-get install -y ccache software-properties-common git curl sudo \ - && if [ ! -z ${DEADSNAKES_MIRROR_URL} ] ; then \ - if [ ! -z "${DEADSNAKES_GPGKEY_URL}" ] ; then \ - mkdir -p -m 0755 /etc/apt/keyrings ; \ - curl -L ${DEADSNAKES_GPGKEY_URL} | gpg --dearmor > /etc/apt/keyrings/deadsnakes.gpg ; \ - sudo chmod 644 /etc/apt/keyrings/deadsnakes.gpg ; \ - echo "deb [signed-by=/etc/apt/keyrings/deadsnakes.gpg] ${DEADSNAKES_MIRROR_URL} $(lsb_release -cs) main" > /etc/apt/sources.list.d/deadsnakes.list ; \ - fi ; \ - else \ - for i in 1 2 3; do \ - add-apt-repository -y ppa:deadsnakes/ppa && break || \ - { echo "Attempt $i failed, retrying in 5s..."; sleep 5; }; \ - done ; \ - fi \ - && apt-get update -y \ - && apt-get install -y python${PYTHON_VERSION} python${PYTHON_VERSION}-dev python${PYTHON_VERSION}-venv \ - && update-alternatives --install /usr/bin/python3 python3 /usr/bin/python${PYTHON_VERSION} 1 \ - && update-alternatives --set python3 /usr/bin/python${PYTHON_VERSION} \ - && ln -sf /usr/bin/python${PYTHON_VERSION}-config /usr/bin/python3-config \ - && curl -sS ${GET_PIP_URL} | python${PYTHON_VERSION} \ - && python3 --version && python3 -m pip --version - -ARG PIP_INDEX_URL UV_INDEX_URL -ARG PIP_EXTRA_INDEX_URL UV_EXTRA_INDEX_URL -ARG PYTORCH_CUDA_INDEX_BASE_URL -ARG PYTORCH_CUDA_NIGHTLY_INDEX_BASE_URL -ARG PIP_KEYRING_PROVIDER UV_KEYRING_PROVIDER - -# Install uv for faster pip installs -RUN --mount=type=cache,target=/root/.cache/uv \ - python3 -m pip install uv - -ENV UV_HTTP_TIMEOUT=500 -ENV UV_INDEX_STRATEGY="unsafe-best-match" - -RUN apt-get install -y gcc-10 g++-10 -RUN update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-10 110 --slave /usr/bin/g++ g++ /usr/bin/g++-10 -RUN <> /etc/environment - -# Install Python and other dependencies -RUN echo 'tzdata tzdata/Areas select America' | debconf-set-selections \ - && echo 'tzdata tzdata/Zones/America select Los_Angeles' | debconf-set-selections \ - && apt-get update -y \ - && apt-get install -y ccache software-properties-common git curl wget sudo vim python3-pip \ - && apt-get install -y ffmpeg libsm6 libxext6 libgl1 \ - && if [ ! -z ${DEADSNAKES_MIRROR_URL} ] ; then \ - if [ ! -z "${DEADSNAKES_GPGKEY_URL}" ] ; then \ - mkdir -p -m 0755 /etc/apt/keyrings ; \ - curl -L ${DEADSNAKES_GPGKEY_URL} | gpg --dearmor > /etc/apt/keyrings/deadsnakes.gpg ; \ - sudo chmod 644 /etc/apt/keyrings/deadsnakes.gpg ; \ - echo "deb [signed-by=/etc/apt/keyrings/deadsnakes.gpg] ${DEADSNAKES_MIRROR_URL} $(lsb_release -cs) main" > /etc/apt/sources.list.d/deadsnakes.list ; \ - fi ; \ - else \ - for i in 1 2 3; do \ - add-apt-repository -y ppa:deadsnakes/ppa && break || \ - { echo "Attempt $i failed, retrying in 5s..."; sleep 5; }; \ - done ; \ - fi \ - && apt-get update -y \ - && apt-get install -y python${PYTHON_VERSION} python${PYTHON_VERSION}-dev python${PYTHON_VERSION}-venv libibverbs-dev \ - && update-alternatives --install /usr/bin/python3 python3 /usr/bin/python${PYTHON_VERSION} 1 \ - && update-alternatives --set python3 /usr/bin/python${PYTHON_VERSION} \ - && ln -sf /usr/bin/python${PYTHON_VERSION}-config /usr/bin/python3-config \ - && curl -sS ${GET_PIP_URL} | python${PYTHON_VERSION} \ - && python3 --version && python3 -m pip --version - -ARG PIP_INDEX_URL UV_INDEX_URL -ARG PIP_EXTRA_INDEX_URL UV_EXTRA_INDEX_URL -ARG PYTORCH_CUDA_INDEX_BASE_URL -ARG PYTORCH_CUDA_NIGHTLY_INDEX_BASE_URL -ARG PIP_KEYRING_PROVIDER UV_KEYRING_PROVIDER - -# Install uv for faster pip installs -RUN --mount=type=cache,target=/root/.cache/uv \ - python3 -m pip install uv - -# This timeout (in seconds) is necessary when installing some dependencies via uv since it's likely to time out -# Reference: https://github.com/astral-sh/uv/pull/1694 -ENV UV_HTTP_TIMEOUT=500 -ENV UV_INDEX_STRATEGY="unsafe-best-match" - -RUN ldconfig /usr/local/cuda-$(echo $CUDA_VERSION | cut -d. -f1,2)/compat/ - -RUN --mount=type=cache,target=/root/.cache/uv \ - if [ "$TARGETPLATFORM" = "linux/arm64" ]; then \ - uv pip install --system \ - --index-url ${PYTORCH_CUDA_NIGHTLY_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.') \ - "torch==2.8.0.dev20250318+cu128" "torchvision==0.22.0.dev20250319" ; \ - uv pip install --system \ - --index-url ${PYTORCH_CUDA_NIGHTLY_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.') \ - --pre pytorch_triton==3.3.0+gitab727c40 ; \ - fi - -# Install vllm wheel first, so that torch etc will be installed. -RUN --mount=type=bind,from=build,src=/workspace/dist,target=/vllm-workspace/dist \ - --mount=type=cache,target=/root/.cache/uv \ - uv pip install --system dist/*.whl --verbose \ - --extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.') - -# Install FlashInfer from source -ARG FLASHINFER_GIT_REPO="https://github.com/flashinfer-ai/flashinfer.git" -ARG FLASHINFER_GIT_REF="v0.2.8rc1" -RUN --mount=type=cache,target=/root/.cache/uv bash - <<'BASH' - . /etc/environment - git clone --depth 1 --recursive --shallow-submodules \ - --branch ${FLASHINFER_GIT_REF} \ - ${FLASHINFER_GIT_REPO} flashinfer - # Exclude CUDA arches for older versions (11.x and 12.0-12.7) - # TODO: Update this to allow setting TORCH_CUDA_ARCH_LIST as a build arg. - if [[ "${CUDA_VERSION}" == 11.* ]]; then - FI_TORCH_CUDA_ARCH_LIST="7.5 8.0 8.9" - elif [[ "${CUDA_VERSION}" == 12.[0-7]* ]]; then - FI_TORCH_CUDA_ARCH_LIST="7.5 8.0 8.9 9.0a" - else - # CUDA 12.8+ supports 10.0a and 12.0 - FI_TORCH_CUDA_ARCH_LIST="7.5 8.0 8.9 9.0a 10.0a 12.0" - fi - echo "๐Ÿ—๏ธ Building FlashInfer for arches: ${FI_TORCH_CUDA_ARCH_LIST}" - # Needed to build AOT kernels - pushd flashinfer - TORCH_CUDA_ARCH_LIST="${FI_TORCH_CUDA_ARCH_LIST}" \ - python3 -m flashinfer.aot - TORCH_CUDA_ARCH_LIST="${FI_TORCH_CUDA_ARCH_LIST}" \ - uv pip install --system --no-build-isolation . - popd - rm -rf flashinfer -BASH -COPY ./vllm_v0.10.0/examples examples -COPY ./vllm_v0.10.0/benchmarks benchmarks -COPY ./vllm_v0.10.0/vllm/collect_env.py . - -RUN --mount=type=cache,target=/root/.cache/uv \ -. /etc/environment && \ -uv pip list - - -COPY ./vllm_v0.10.0/requirements/build.txt requirements/build.txt -RUN --mount=type=cache,target=/root/.cache/uv \ - uv pip install --system -r ./vllm_v0.10.0/requirements/build.txt \ - --extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.') - -#################### vLLM installation IMAGE #################### - -#################### OPENAI API SERVER #################### -# base openai image with additional requirements, for any subsequent openai-style images -FROM vllm-base AS vllm-openai-base -ARG TARGETPLATFORM -ARG INSTALL_KV_CONNECTORS=false - -ARG PIP_INDEX_URL UV_INDEX_URL -ARG PIP_EXTRA_INDEX_URL UV_EXTRA_INDEX_URL - -# This timeout (in seconds) is necessary when installing some dependencies via uv since it's likely to time out -# Reference: https://github.com/astral-sh/uv/pull/1694 -ENV UV_HTTP_TIMEOUT=500 - -COPY ./vllm_v0.10.0/requirements/kv_connectors.txt requirements/kv_connectors.txt - -# install additional dependencies for openai api server -RUN --mount=type=cache,target=/root/.cache/uv \ - if [ "$INSTALL_KV_CONNECTORS" = "true" ]; then \ - uv pip install --system -r ./vllm_v0.10.0/requirements/kv_connectors.txt; \ - fi; \ - if [ "$TARGETPLATFORM" = "linux/arm64" ]; then \ - BITSANDBYTES_VERSION="0.42.0"; \ - else \ - BITSANDBYTES_VERSION="0.46.1"; \ - fi; \ - uv pip install --system accelerate hf_transfer modelscope "bitsandbytes>=${BITSANDBYTES_VERSION}" 'timm==0.9.10' boto3 runai-model-streamer runai-model-streamer[s3] - -ENV VLLM_USAGE_SOURCE production-docker-image - - -FROM vllm-openai-base AS vllm-openai - -ENTRYPOINT ["python3", "-m", "vllm.entrypoints.openai.api_server"] -#################### OPENAI API SERVER #################### diff --git a/Dockerfile b/Dockerfile index 831e95e..a4a07b4 100644 --- a/Dockerfile +++ b/Dockerfile @@ -415,6 +415,12 @@ FROM vllm-base AS vllm-openai-base ARG TARGETPLATFORM ARG INSTALL_KV_CONNECTORS=false +# ---- Add Tini as the container init process +ENV TINI_VERSION=v0.19.0 +ADD https://github.com/krallin/tini/releases/download/${TINI_VERSION}/tini /tini +RUN chmod +x /tini +ENTRYPOINT ["/tini", "--"] + ARG PIP_INDEX_URL UV_INDEX_URL ARG PIP_EXTRA_INDEX_URL UV_EXTRA_INDEX_URL @@ -438,6 +444,13 @@ RUN --mount=type=cache,target=/root/.cache/uv \ ENV VLLM_USAGE_SOURCE production-docker-image +RUN apt-get update && apt-get install -y supervisor && mkdir -p /etc/supervisor/conf.d + +# ๆ‹ท่ด้…็ฝฎๆ–‡ไปถ๏ผˆๅ‡่ฎพไฝ ๅ‡†ๅค‡ไบ†๏ผ‰ +COPY ./supervisord.conf /etc/supervisor/supervisord.conf +COPY ./meta_ui.py /app/meta_ui.py + + # # define sagemaker first, so it is not default from `docker build` # FROM vllm-openai-base AS vllm-sagemaker diff --git a/meta_ui.py b/meta_ui.py new file mode 100644 index 0000000..522c7f0 --- /dev/null +++ b/meta_ui.py @@ -0,0 +1,224 @@ +import json, datetime, textwrap, requests, gradio as gr +from pathlib import Path +from collections import deque +import queue, threading, time + +# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ ๅŸบ็ก€้…็ฝฎ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ +API_KEY = "token-abc123" +MODEL_PATH = Path("/root/.cradle/Alibaba/Qwen3-30B-A3B-Base") + + +def model_name(path: Path): + cfg = path / "config.json" + if cfg.exists(): + data = json.load(cfg.open()) + return data.get("architectures", [None])[0] or data.get("model_type") or path.name + return path.name + +MODEL_NAME = model_name(MODEL_PATH) +now = lambda: datetime.datetime.now().strftime("%H:%M:%S") + +# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ ๆ—ฅๅฟ—้˜Ÿๅˆ— โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ +LOG_Q: "queue.Queue[str]" = queue.Queue() +LOG_TXT = "" + + +def log(msg): + print(msg, flush=True) + LOG_Q.put(msg) + + +prev_log_value = "" + +def consume_logs(dummy=None): + global LOG_TXT, prev_log_value + buf = deque(LOG_TXT.splitlines(), maxlen=400) + while not LOG_Q.empty(): + buf.append(LOG_Q.get()) + LOG_TXT = "\n".join(buf) + if LOG_TXT != prev_log_value: + prev_log_value = LOG_TXT + return gr.update(value=LOG_TXT) + return gr.update() + + +# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ ๅŽ็ซฏ่ฐƒ็”จ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ +def backend(text, sampling, api_suffix): + url = f"http://localhost:30000{api_suffix}" + if api_suffix == "/generate": + payload = {"model": MODEL_NAME, "text": text, "sampling_params": sampling} + elif api_suffix == "/v1/completions": + payload = { + "model": MODEL_NAME, + "prompt": text, + **sampling + } + elif api_suffix == "/v1/chat/completions": + payload = { + "model": MODEL_NAME, + "messages": text, # โ† ่ฟ™้‡Œ text ๅฎž้™…ๆ˜ฏ messages list + **sampling + } + + log(f"\n๐ŸŸก [{now()}] POST {url}\n{json.dumps(payload, ensure_ascii=False, indent=2)}") + try: + r = requests.post(url, + headers={"Authorization": f"Bearer {API_KEY}", + "Content-Type": "application/json"}, + json=payload, timeout=180) + try: + data = r.json() + except Exception: + data = {} + + if api_suffix == "/generate": + txt = data.get("text", "").strip() + meta = data.get("meta_info", {}) + fr = meta.get("finish_reason") + ctok = meta.get("completion_tokens") + elif api_suffix == "/v1/completions": + choice = data.get("choices", [{}])[0] + txt = choice.get("text", "").strip() + fr = choice.get("finish_reason") + ctok = data.get("usage", {}).get("completion_tokens") + elif api_suffix == "/v1/chat/completions": + choice = data.get("choices", [{}])[0] + msg = choice.get("message", {}) + txt = msg.get("content", "").strip() + + # ๆ–ฐๅขž๏ผšไปŽ usage ่Žทๅ– completion_tokens + ctok = data.get("usage", {}).get("completion_tokens") + fr = choice.get("finish_reason") # ๅฆ‚ๆžœๅŽ็ปญ้œ€่ฆ finish reason + + log(f"๐ŸŸข [{now()}] HTTP {r.status_code} tokens={ctok} finish={fr}\n" + f"๐ŸŸข resp={r.text!r}") + if r.status_code != 200: + return f"[HTTP {r.status_code}] {r.text}" + return txt or "[โš  ็ฉบ]" + except Exception as e: + log(f"[โŒ ่ฏทๆฑ‚ๅผ‚ๅธธ] {e}") + return f"[โŒ ่ฏทๆฑ‚ๅผ‚ๅธธ] {e}" + + +# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ Chat ๅ›ž่ฐƒ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ +def chat( + user_msg, history, + max_new, temp, top_p, top_k, + rep_pen, pres_pen, stop_raw, + api_suffix, log_state +): + from queue import Queue, Empty + + user = user_msg["text"] if isinstance(user_msg, dict) and "text" in user_msg else user_msg + + if api_suffix == "/v1/chat/completions": + # ็ป™ LLM ็š„ๅฎŒๆ•ด history๏ผˆ็”จไบŽไธŠไธ‹ๆ–‡ๆŽจ็†๏ผ‰ + messages = history[:] + messages.append({"role": "user", "content": user}) + prompt_input = messages + else: + prompt_input = user + + stop = [s.strip() for s in stop_raw.split(",") if s.strip()] or None + samp = { + ("max_tokens" if api_suffix == "/v1/completions" else "max_new_tokens"): int(max_new), + "temperature": temp, + "top_p": top_p, + "top_k": int(top_k), + "repetition_penalty": rep_pen, + "presence_penalty": pres_pen, + **({"stop": stop} if stop else {}) + } + + result_q = Queue() + + def worker(): + out = backend(prompt_input, samp, api_suffix) + result_q.put(out) + + thread = threading.Thread(target=worker, daemon=True) + thread.start() + + if api_suffix == "/v1/chat/completions": + while True: + if not thread.is_alive() and result_q.empty(): + break + try: + result = result_q.get(timeout=0.1) + except Empty: + continue + + txt = result.strip() if isinstance(result, str) else str(result).strip() + + yield {"text": txt}, log_state + return + else: + while thread.is_alive(): + try: + result = result_q.get(timeout=0.1) + break + except Empty: + continue + + if isinstance(result, str): + result = {"text": result} + elif not isinstance(result, dict) or "text" not in result: + result = {"text": str(result)} + + yield result["text"], log_state + return + + +# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ Gradio UI โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ +with gr.Blocks(title="่ฐƒ่ฏ•็•Œ้ข") as demo: + gr.Markdown(f"## ๐Ÿ’ฌ ่ฐƒ่ฏ•็•Œ้ข \nๆƒ้‡ **{MODEL_PATH.name}**") + + with gr.Row(): + api_choice = gr.Dropdown(choices=["/generate", "/v1/completions", "/v1/chat/completions"], + value="/generate", label="้€‰ๆ‹ฉๆŽจ็†ๆŽฅๅฃ") + + with gr.Row(): + max_new = gr.Slider(32, 32768, 1024, label="max_new_tokens") + temp = gr.Slider(0, 1.5, 0.8, step=0.05, label="temperature") + with gr.Row(): + top_p = gr.Slider(0, 1, 0.95, step=0.01, label="top_p") + top_k = gr.Slider(0, 200, 50, step=1, label="top_k") + with gr.Row(): + rep_pen = gr.Slider(0.8, 2, 1.05, step=0.01, label="repetition_penalty") + pres_pen= gr.Slider(0, 2, 0.0, step=0.05, label="presence_penalty") + stop_txt = gr.Textbox("", label="stop ๅบๅˆ—๏ผˆ้€—ๅทๅˆ†้š”๏ผ‰") + + log_state = gr.State("") + dbg_chk = gr.Checkbox(label="๐Ÿ“œ ๆ˜พ็คบ Debug ้ขๆฟ", value=False) + log_box = gr.Textbox(label="ๅฎžๆ—ถๆ—ฅๅฟ—", lines=20, interactive=False, visible=False) + + chat = gr.ChatInterface( + fn=chat, + additional_inputs=[max_new, temp, top_p, top_k, + rep_pen, pres_pen, stop_txt, + api_choice, log_state], + additional_outputs=[log_state], + type="messages" + ) + + timer = gr.Timer(1.0, render=True) + timer.tick( + fn=consume_logs, + inputs=[], + outputs=[log_box], + ) + + def clear_all_logs(_): + global LOG_Q, LOG_TXT, prev_log_value + with LOG_Q.mutex: + LOG_Q.queue.clear() + LOG_TXT = "" + prev_log_value = "" + return gr.update(value=""), gr.update(value="") + + api_choice.change(fn=clear_all_logs, inputs=api_choice, outputs=[log_state, log_box]) + log_state.change(lambda txt: gr.update(value=txt), log_state, log_box) + dbg_chk.change(lambda v: gr.update(visible=v), dbg_chk, log_box) + + +demo.launch(server_name="0.0.0.0", server_port=30001) diff --git a/supervisord.conf b/supervisord.conf new file mode 100644 index 0000000..3f4149a --- /dev/null +++ b/supervisord.conf @@ -0,0 +1,23 @@ +[supervisord] +nodaemon=true +logfile=/dev/stdout +logfile_maxbytes=0 +loglevel=info + +[program:sglang] +command=python3 -m sglang.launch_server --host 0.0.0.0 --port 30000 --model-path /root/.cradle/Alibaba/Qwen3-30B-A3B/ --tp 4 --api-key token-abc123 --enable-metrics +autostart=true +autorestart=true +stdout_logfile=/dev/stdout +stdout_logfile_maxbytes=0 +stderr_logfile=/dev/stderr +stderr_logfile_maxbytes=0 + +[program:ui] +command=python3 /app/meta_ui.py --port 30001 +autostart=true +autorestart=true +stdout_logfile=/dev/stdout +stdout_logfile_maxbytes=0 +stderr_logfile=/dev/stderr +stderr_logfile_maxbytes=0