diff --git a/meta_ui.py b/meta_ui.py index e4ea2b1..db68a05 100644 --- a/meta_ui.py +++ b/meta_ui.py @@ -3,11 +3,11 @@ from pathlib import Path from collections import deque import queue, threading, time -# ───────────────────── 基础配置 ───────────────────── -API_URL = "http://localhost:30000/generate" +# ────────────────── 基础配置 ────────────────── API_KEY = "token-abc123" MODEL_PATH = Path("/root/.cradle/Alibaba/Qwen3-30B-A3B-Base") + def model_name(path: Path): cfg = path / "config.json" if cfg.exists(): @@ -18,18 +18,19 @@ def model_name(path: Path): MODEL_NAME = model_name(MODEL_PATH) now = lambda: datetime.datetime.now().strftime("%H:%M:%S") -# ───────────────────── 日志队列 ───────────────────── +# ────────────────── 日志队列 ────────────────── LOG_Q: "queue.Queue[str]" = queue.Queue() -LOG_TXT = "" # ✅ 全局日志缓存,避免 chat 焦点阻断 log_box 更新 +LOG_TXT = "" -def log(msg): # 写终端 + 推队列 + +def log(msg): print(msg, flush=True) LOG_Q.put(msg) -prev_log_value = "" # 上一帧的日志内容 + +prev_log_value = "" def consume_logs(dummy=None): - """每秒更新 log_box 内容,避免 chat 阻塞 UI 刷新""" global LOG_TXT, prev_log_value buf = deque(LOG_TXT.splitlines(), maxlen=400) while not LOG_Q.empty(): @@ -38,10 +39,10 @@ def consume_logs(dummy=None): if LOG_TXT != prev_log_value: prev_log_value = LOG_TXT return gr.update(value=LOG_TXT) - return gr.update() # 无更新则不触发前端刷新 + return gr.update() -# ───────────────────── 后端调用 ───────────────────── +# ────────────────── 后端调用 ────────────────── def backend(text, sampling, api_suffix): url = f"http://localhost:30000{api_suffix}" if api_suffix == "/generate": @@ -69,7 +70,7 @@ def backend(text, sampling, api_suffix): meta = data.get("meta_info", {}) fr = meta.get("finish_reason") ctok = meta.get("completion_tokens") - else: # "/v1/chat/completions" + else: choice = data.get("choices", [{}])[0] txt = choice.get("message", {}).get("content", "").strip() fr = choice.get("finish_reason") @@ -85,18 +86,21 @@ def backend(text, sampling, api_suffix): return f"[❌ 请求异常] {e}" -# ───────────────────── Chat 回调 ───────────────────── +# ────────────────── Chat 回调 ────────────────── def chat( - user, history, + user_msg, history, max_new, temp, top_p, top_k, rep_pen, pres_pen, stop_raw, api_suffix, log_state ): from queue import Queue, Empty + # 解析传入的 ChatInput 格式 + user = user_msg["text"] if isinstance(user_msg, dict) and "text" in user_msg else user_msg + stop = [s.strip() for s in stop_raw.split(",") if s.strip()] or None samp = { - ("max_tokens" if api_suffix == "/v1/completion" else "max_new_tokens"): int(max_new), + ("max_tokens" if api_suffix == "/v1/chat/completions" else "max_new_tokens"): int(max_new), "temperature": temp, "top_p": top_p, "top_k": int(top_k), @@ -105,7 +109,6 @@ def chat( **({"stop": stop} if stop else {}) } - result_q = Queue() def worker(): @@ -123,14 +126,13 @@ def chat( continue -# ───────────────────── Gradio UI ───────────────────── +# ────────────────── Gradio UI ────────────────── with gr.Blocks(title="调试界面") as demo: gr.Markdown(f"## 💬 调试界面 \n权重 **{MODEL_PATH.name}**") with gr.Row(): - api_choice = gr.Dropdown(choices=["/generate", "/v1/completions"], + api_choice = gr.Dropdown(choices=["/generate", "/v1/chat/completions"], value="/generate", label="选择推理接口") - # 采样参数控件 with gr.Row(): max_new = gr.Slider(32, 32768, 128, label="max_new_tokens") temp = gr.Slider(0, 1.5, 0.8, step=0.05, label="temperature") @@ -142,21 +144,19 @@ with gr.Blocks(title="调试界面") as demo: pres_pen= gr.Slider(0, 2, 0.0, step=0.05, label="presence_penalty") stop_txt = gr.Textbox("", label="stop 序列(逗号分隔)") - log_state = gr.State("") # 状态透传 - dbg_chk = gr.Checkbox(label="📜 显示 Debug 面板", value=False) # ✅ 默认关闭 - log_box = gr.Textbox(label="实时日志", lines=20, interactive=False, visible=False) # ✅ 默认隐藏 + log_state = gr.State("") + dbg_chk = gr.Checkbox(label="📜 显示 Debug 面板", value=False) + log_box = gr.Textbox(label="实时日志", lines=20, interactive=False, visible=False) - # Chat 界面(移到日志之前) chatbot = gr.ChatInterface( fn=chat, additional_inputs=[max_new, temp, top_p, top_k, rep_pen, pres_pen, stop_txt, api_choice, log_state], - additional_outputs=[], # ✅ 移除 log_state 输出 + additional_outputs=[], type="messages" ) - # 日志刷新定时器 timer = gr.Timer(1.0, render=True) timer.tick( fn=consume_logs,