.

2025-08-01 11:54:41 +08:00 · 2025-08-01 11:54:41 +08:00 · d2df3af90f
parent 47bb4e366e
commit d2df3af90f
1 changed files with 86 additions and 47 deletions
--- a/meta_ui.py
+++ b/meta_ui.py
@ -109,24 +109,15 @@ def chat(
 ):
    from queue import Queue, Empty
    # 解析传入的 ChatInput 格式
    user = user_msg["text"] if isinstance(user_msg, dict) and "text" in user_msg else user_msg
    # 构造 OpenAI 风格 messages，仅用于 /v1/chat/completions
    if api_suffix == "/v1/chat/completions":
-        messages = []
+        # 给 LLM 的完整 history（用于上下文推理）
-        # for u, a in history:
+        messages = history[:]  
        #     messages.append({"role": "user", "content": u})
        #     messages.append({"role": "assistant", "content": a})
        messages = history[:]  # 正确使用 OpenAI 格式
        messages.append({"role": "user", "content": user})
        prompt_input = messages
        # user_input = user_msg["text"] if isinstance(user_msg, dict) and "text" in user_msg else user_msg
        # messages.append({"role": "user", "content": user_input})
        # prompt_input = messages
    else:
-        prompt_input = user  # 原来的单轮文本 prompt
+        prompt_input = user
    stop = [s.strip() for s in stop_raw.split(",") if s.strip()] or None
    samp = {
@ -142,15 +133,11 @@ def chat(
    result_q = Queue()
    def worker():
        #out = backend(user, samp, api_suffix)
        out = backend(prompt_input, samp, api_suffix)
        result_q.put(out)
-    # threading.Thread(target=worker).start()
+    thread = threading.Thread(target=worker, daemon=True)
-    thread = threading.Thread(target=worker, daemon=True) 
+    thread.start()
    thread.start()      
    # yield "⏳ 正在生成中...", log_state
    if api_suffix == "/v1/chat/completions":
        while True:
@ -166,8 +153,9 @@ def chat(
            elif not isinstance(result, dict) or "text" not in result:
                result = {"text": str(result)}
-            history.append({"role": "assistant", "content": result["text"]})
+            # ❌ 不 append 到 history（让前端 UI 不显示之前的历史）
-            yield result["text"], None  # ✅ 显示模型输出，同时更新 history
+            # ✅ 但我们已经在前面把 history 全部传给 LLM 推理了
            yield result["text"], None  # UI 只显示当前回复
        return
    else:
        while thread.is_alive():
@ -182,35 +170,86 @@ def chat(
        elif not isinstance(result, dict) or "text" not in result:
            result = {"text": str(result)}
-        yield result["text"], log_state  # ✅ 其它接口只输出文本，不更新 history
+        yield result["text"], log_state
        return
 # # ────────────────── Chat 回调 ──────────────────
 # def chat(
 #     user_msg, history,
 #     max_new, temp, top_p, top_k,
 #     rep_pen, pres_pen, stop_raw,
 #     api_suffix, log_state
 # ):
 #     from queue import Queue, Empty
 #     # 解析传入的 ChatInput 格式
 #     user = user_msg["text"] if isinstance(user_msg, dict) and "text" in user_msg else user_msg
 #     # 构造 OpenAI 风格 messages，仅用于 /v1/chat/completions
 #     if api_suffix == "/v1/chat/completions":
 #         messages = []
 #         messages = history[:]  # 正确使用 OpenAI 格式
 #         messages.append({"role": "user", "content": user})
 #         prompt_input = messages
 #     else:
 #         prompt_input = user  # 原来的单轮文本 prompt
 #     stop = [s.strip() for s in stop_raw.split(",") if s.strip()] or None
 #     samp = {
 #         ("max_tokens" if api_suffix == "/v1/completions" else "max_new_tokens"): int(max_new),
 #         "temperature": temp,
 #         "top_p": top_p,
 #         "top_k": int(top_k),
 #         "repetition_penalty": rep_pen,
 #         "presence_penalty": pres_pen,
 #         **({"stop": stop} if stop else {})
 #     }
 #     result_q = Queue()
 #     def worker():
 #         out = backend(prompt_input, samp, api_suffix)
 #         result_q.put(out)
 #     thread = threading.Thread(target=worker, daemon=True) 
 #     thread.start()      
 #     if api_suffix == "/v1/chat/completions":
 #         while True:
 #             if not thread.is_alive() and result_q.empty():
 #                 break
 #             try:
 #                 result = result_q.get(timeout=0.1)
 #             except Empty:
 #                 continue
 #             if isinstance(result, str):
 #                 result = {"text": result}
 #             elif not isinstance(result, dict) or "text" not in result:
 #                 result = {"text": str(result)}
 #             history.append({"role": "assistant", "content": result["text"]})
 #             yield result["text"], None  # ✅ 显示模型输出，同时更新 history
 #         return
 #     else:
 #         while thread.is_alive():
 #             try:
 #                 result = result_q.get(timeout=0.1)
 #                 break
 #             except Empty:
 #                 continue
 #         if isinstance(result, str):
 #             result = {"text": result}
 #         elif not isinstance(result, dict) or "text" not in result:
 #             result = {"text": str(result)}
 #         yield result["text"], log_state  # ✅ 其它接口只输出文本，不更新 history
 #         return
        # while True:
        #     # ⚠️ 线程已结束且队列已空 → 直接 return 让生成器终止
        #     if not thread.is_alive() and result_q.empty():
        #         break
        #         # return  # ← 新增这一行
        #     try:
        #         result = result_q.get(timeout=0.1)
        #     except Empty:
        #         continue
        #     # 统一格式
        #     if isinstance(result, str):
        #         result = {"text": result}
        #     elif not isinstance(result, dict) or "text" not in result:
        #         result = {"text": str(result)}
        #     # yield result, log_state       # 第一次真正把模型回复丢给前端
        #     if api_suffix == "/v1/chat/completions":
        #         history.append({"role": "assistant", "content": result["text"]})
        #         yield result["text"], None
        #     else:
        #         yield result["text"], log_state
        # return                       # ← 把旧的 break 换成 return
 # ────────────────── Gradio UI ──────────────────
 with gr.Blocks(title="调试界面") as demo:
    gr.Markdown(f"## 💬 调试界面  \n权重 **{MODEL_PATH.name}**")