From 35ba2eab429c0f213c9edae0377c8c311a127795 Mon Sep 17 00:00:00 2001 From: hailin Date: Fri, 25 Jul 2025 12:19:03 +0800 Subject: [PATCH] . --- Dockerfile | 23 ++++++++------ meta_ui.py | 79 ++++++++++++++++++++++++++++++++++++++++++++++++ supervisord.conf | 18 +++++++++++ 3 files changed, 111 insertions(+), 9 deletions(-) create mode 100644 meta_ui.py create mode 100644 supervisord.conf diff --git a/Dockerfile b/Dockerfile index 6f69935..3b45647 100644 --- a/Dockerfile +++ b/Dockerfile @@ -108,6 +108,9 @@ RUN pip wheel \ setproctitle uvloop sentencepiece triton pillow cachetools msgspec blake3 cloudpickle compressed-tensors einops openai py-cpuinfo dill partial_json_parser python-multipart torchao \ -w /wheels +# ── ✅ 打包 gradio UI 所需依赖 ──────────────────────────────────────────────── +RUN pip wheel gradio requests -w /wheels + ############################################################################### # Stage 2 ─ runtime:极简运行镜像,仅离线安装 wheel ############################################################################### @@ -165,13 +168,15 @@ COPY ./Alibaba/Qwen3-14B-Base /root/.cradle/Alibaba/Qwen3-14B-Base HEALTHCHECK --interval=30s --timeout=2s --start-period=300s --retries=5 CMD curl -fs http://localhost:30000/health || exit 1 # ---- 暴露端口 ---- -EXPOSE 30000 +EXPOSE 30000 30001 -# ---- 启动 SGLang 推理服务 ---- -CMD ["python3", "-m", "sglang.launch_server", \ - "--host", "0.0.0.0", \ - "--port", "30000", \ - "--model-path", "/root/.cradle/Alibaba/Qwen3-14B-Base/", \ - "--tp", "2", \ - "--api-key", "token-abc123", \ - "--enable-metrics"] \ No newline at end of file +# 安装 supervisor +RUN apt-get update && apt-get install -y supervisor && \ + mkdir -p /etc/supervisor/conf.d + +# 拷贝 supervisord 配置文件和 UI 脚本 +COPY ./meta_ui.py /app/meta_ui.py +COPY ./supervisord.conf /etc/supervisor/supervisord.conf + +# 作为容器主进程运行 supervisor +CMD ["/usr/bin/supervisord", "-c", "/etc/supervisor/supervisord.conf"] \ No newline at end of file diff --git a/meta_ui.py b/meta_ui.py new file mode 100644 index 0000000..8177526 --- /dev/null +++ b/meta_ui.py @@ -0,0 +1,79 @@ +import gradio as gr +import requests + +API_URL = "http://localhost:30000/v1/completions" +API_KEY = "token-abc123" +MODEL_NAME = "Qwen3-14b-base" + +# 构造 prompt:Base 模型靠拼接上下文 +def build_prompt(history, user_message): + prompt = "" + for user, bot in history: + prompt += f"User: {user}\nAssistant: {bot}\n" + prompt += f"User: {user_message}\nAssistant:" + return prompt + +# 主对话函数 +def chat(user_message, history, max_tokens, temperature): + prompt = build_prompt(history, user_message) + + headers = { + "Authorization": f"Bearer {API_KEY}", + "Content-Type": "application/json" + } + payload = { + "model": MODEL_NAME, + "prompt": prompt, + "max_tokens": max_tokens, + "temperature": temperature, + "stop": ["\nUser:", "\nAssistant:"] + } + + try: + response = requests.post(API_URL, headers=headers, json=payload, timeout=30) + result = response.json() + reply = result["choices"][0]["text"].strip() + except Exception as e: + reply = f"[请求失败] {e}" + + return reply + +# 手动测试 API 功能 +def test_api_connection(max_tokens, temperature): + headers = { + "Authorization": f"Bearer {API_KEY}", + "Content-Type": "application/json" + } + payload = { + "model": MODEL_NAME, + "prompt": "Ping?", + "max_tokens": max_tokens, + "temperature": temperature + } + + try: + resp = requests.post(API_URL, headers=headers, json=payload, timeout=10) + out = resp.json()["choices"][0]["text"].strip() + return f"✅ API 可用,响应: {out}" + except Exception as e: + return f"❌ API 请求失败: {e}" + +# Gradio 控件组合 +with gr.Blocks(title="Base 模型测试 UI") as demo: + gr.Markdown("# 💬 Base 模型对话界面") + + with gr.Row(): + max_tokens = gr.Slider(32, 1024, value=256, label="max_tokens") + temperature = gr.Slider(0.0, 1.5, value=0.7, step=0.05, label="temperature") + test_btn = gr.Button("🔁 测试 API 可用性") + test_output = gr.Textbox(label="API 测试结果", interactive=False) + + chatbot = gr.ChatInterface( + fn=lambda msg, hist: chat(msg, hist, max_tokens.value, temperature.value), + title=None + ) + + test_btn.click(fn=test_api_connection, inputs=[max_tokens, temperature], outputs=test_output) + +# 启动服务 +demo.launch(server_name="0.0.0.0", server_port=30001) diff --git a/supervisord.conf b/supervisord.conf new file mode 100644 index 0000000..380a3e5 --- /dev/null +++ b/supervisord.conf @@ -0,0 +1,18 @@ +[supervisord] +nodaemon=true +logfile=/dev/stdout +loglevel=info + +[program:sglang] +command=python3 -m sglang.launch_server --host 0.0.0.0 --port 30000 --model-path /root/.cradle/Alibaba/Qwen3-14B-Base/ --tp 2 --api-key token-abc123 --enable-metrics +autostart=true +autorestart=true +stdout_logfile=/dev/stdout +stderr_logfile=/dev/stderr + +[program:ui] +command=python3 /app/meta_ui.py --port 30001 +autostart=true +autorestart=true +stdout_logfile=/dev/stdout +stderr_logfile=/dev/stderr