From d2df3af90f5ede448bf0557d7746af9047e341c9 Mon Sep 17 00:00:00 2001 From: hailin Date: Fri, 1 Aug 2025 11:54:41 +0800 Subject: [PATCH] . --- meta_ui.py | 133 ++++++++++++++++++++++++++++++++++------------------- 1 file changed, 86 insertions(+), 47 deletions(-) diff --git a/meta_ui.py b/meta_ui.py index 1a0762d..af9bd99 100644 --- a/meta_ui.py +++ b/meta_ui.py @@ -109,24 +109,15 @@ def chat( ): from queue import Queue, Empty - # 解析传入的 ChatInput 格式 user = user_msg["text"] if isinstance(user_msg, dict) and "text" in user_msg else user_msg - # 构造 OpenAI 风格 messages,仅用于 /v1/chat/completions if api_suffix == "/v1/chat/completions": - messages = [] - # for u, a in history: - # messages.append({"role": "user", "content": u}) - # messages.append({"role": "assistant", "content": a}) - messages = history[:] # 正确使用 OpenAI 格式 + # 给 LLM 的完整 history(用于上下文推理) + messages = history[:] messages.append({"role": "user", "content": user}) prompt_input = messages - - # user_input = user_msg["text"] if isinstance(user_msg, dict) and "text" in user_msg else user_msg - # messages.append({"role": "user", "content": user_input}) - # prompt_input = messages else: - prompt_input = user # 原来的单轮文本 prompt + prompt_input = user stop = [s.strip() for s in stop_raw.split(",") if s.strip()] or None samp = { @@ -142,15 +133,11 @@ def chat( result_q = Queue() def worker(): - #out = backend(user, samp, api_suffix) out = backend(prompt_input, samp, api_suffix) result_q.put(out) - # threading.Thread(target=worker).start() - thread = threading.Thread(target=worker, daemon=True) - thread.start() - # yield "⏳ 正在生成中...", log_state - + thread = threading.Thread(target=worker, daemon=True) + thread.start() if api_suffix == "/v1/chat/completions": while True: @@ -166,8 +153,9 @@ def chat( elif not isinstance(result, dict) or "text" not in result: result = {"text": str(result)} - history.append({"role": "assistant", "content": result["text"]}) - yield result["text"], None # ✅ 显示模型输出,同时更新 history + # ❌ 不 append 到 history(让前端 UI 不显示之前的历史) + # ✅ 但我们已经在前面把 history 全部传给 LLM 推理了 + yield result["text"], None # UI 只显示当前回复 return else: while thread.is_alive(): @@ -182,35 +170,86 @@ def chat( elif not isinstance(result, dict) or "text" not in result: result = {"text": str(result)} - yield result["text"], log_state # ✅ 其它接口只输出文本,不更新 history + yield result["text"], log_state return + + + +# # ────────────────── Chat 回调 ────────────────── +# def chat( +# user_msg, history, +# max_new, temp, top_p, top_k, +# rep_pen, pres_pen, stop_raw, +# api_suffix, log_state +# ): +# from queue import Queue, Empty + +# # 解析传入的 ChatInput 格式 +# user = user_msg["text"] if isinstance(user_msg, dict) and "text" in user_msg else user_msg + +# # 构造 OpenAI 风格 messages,仅用于 /v1/chat/completions +# if api_suffix == "/v1/chat/completions": +# messages = [] +# messages = history[:] # 正确使用 OpenAI 格式 +# messages.append({"role": "user", "content": user}) +# prompt_input = messages + +# else: +# prompt_input = user # 原来的单轮文本 prompt + +# stop = [s.strip() for s in stop_raw.split(",") if s.strip()] or None +# samp = { +# ("max_tokens" if api_suffix == "/v1/completions" else "max_new_tokens"): int(max_new), +# "temperature": temp, +# "top_p": top_p, +# "top_k": int(top_k), +# "repetition_penalty": rep_pen, +# "presence_penalty": pres_pen, +# **({"stop": stop} if stop else {}) +# } + +# result_q = Queue() + +# def worker(): +# out = backend(prompt_input, samp, api_suffix) +# result_q.put(out) + +# thread = threading.Thread(target=worker, daemon=True) +# thread.start() + +# if api_suffix == "/v1/chat/completions": +# while True: +# if not thread.is_alive() and result_q.empty(): +# break +# try: +# result = result_q.get(timeout=0.1) +# except Empty: +# continue + +# if isinstance(result, str): +# result = {"text": result} +# elif not isinstance(result, dict) or "text" not in result: +# result = {"text": str(result)} + +# history.append({"role": "assistant", "content": result["text"]}) +# yield result["text"], None # ✅ 显示模型输出,同时更新 history +# return +# else: +# while thread.is_alive(): +# try: +# result = result_q.get(timeout=0.1) +# break +# except Empty: +# continue + +# if isinstance(result, str): +# result = {"text": result} +# elif not isinstance(result, dict) or "text" not in result: +# result = {"text": str(result)} + +# yield result["text"], log_state # ✅ 其它接口只输出文本,不更新 history +# return - # while True: - # # ⚠️ 线程已结束且队列已空 → 直接 return 让生成器终止 - # if not thread.is_alive() and result_q.empty(): - # break - # # return # ← 新增这一行 - - # try: - # result = result_q.get(timeout=0.1) - # except Empty: - # continue - - # # 统一格式 - # if isinstance(result, str): - # result = {"text": result} - # elif not isinstance(result, dict) or "text" not in result: - # result = {"text": str(result)} - - # # yield result, log_state # 第一次真正把模型回复丢给前端 - # if api_suffix == "/v1/chat/completions": - # history.append({"role": "assistant", "content": result["text"]}) - # yield result["text"], None - # else: - # yield result["text"], log_state - - # return # ← 把旧的 break 换成 return - # ────────────────── Gradio UI ────────────────── with gr.Blocks(title="调试界面") as demo: gr.Markdown(f"## 💬 调试界面 \n权重 **{MODEL_PATH.name}**")