.

2025-08-01 11:54:41 +08:00 · 2025-08-01 11:54:41 +08:00 · d2df3af90f
parent 47bb4e366e
commit d2df3af90f
1 changed files with 86 additions and 47 deletions
--- a/meta_ui.py
+++ b/meta_ui.py
@ -109,24 +109,15 @@ def chat(
 ):
    from queue import Queue, Empty

-    # 解析传入的 ChatInput 格式
    user = user_msg["text"] if isinstance(user_msg, dict) and "text" in user_msg else user_msg

-    # 构造 OpenAI 风格 messages，仅用于 /v1/chat/completions
    if api_suffix == "/v1/chat/completions":
-        messages = []
-        # for u, a in history:
-        #     messages.append({"role": "user", "content": u})
-        #     messages.append({"role": "assistant", "content": a})
-        messages = history[:]  # 正确使用 OpenAI 格式
+        # 给 LLM 的完整 history（用于上下文推理）
+        messages = history[:]  
        messages.append({"role": "user", "content": user})
        prompt_input = messages
-
-        # user_input = user_msg["text"] if isinstance(user_msg, dict) and "text" in user_msg else user_msg
-        # messages.append({"role": "user", "content": user_input})
-        # prompt_input = messages
    else:
-        prompt_input = user  # 原来的单轮文本 prompt
+        prompt_input = user

    stop = [s.strip() for s in stop_raw.split(",") if s.strip()] or None
    samp = {
@ -142,15 +133,11 @@ def chat(
    result_q = Queue()

    def worker():
-        #out = backend(user, samp, api_suffix)
        out = backend(prompt_input, samp, api_suffix)
        result_q.put(out)

-    # threading.Thread(target=worker).start()
    thread = threading.Thread(target=worker, daemon=True)
    thread.start()
-    # yield "⏳ 正在生成中...", log_state
-

    if api_suffix == "/v1/chat/completions":
        while True:
@ -166,8 +153,9 @@ def chat(
            elif not isinstance(result, dict) or "text" not in result:
                result = {"text": str(result)}

-            history.append({"role": "assistant", "content": result["text"]})
-            yield result["text"], None  # ✅ 显示模型输出，同时更新 history
+            # ❌ 不 append 到 history（让前端 UI 不显示之前的历史）
+            # ✅ 但我们已经在前面把 history 全部传给 LLM 推理了
+            yield result["text"], None  # UI 只显示当前回复
        return
    else:
        while thread.is_alive():
@ -182,34 +170,85 @@ def chat(
        elif not isinstance(result, dict) or "text" not in result:
            result = {"text": str(result)}

-        yield result["text"], log_state  # ✅ 其它接口只输出文本，不更新 history
+        yield result["text"], log_state
        return

-        # while True:
-        #     # ⚠️ 线程已结束且队列已空 → 直接 return 让生成器终止
-        #     if not thread.is_alive() and result_q.empty():
-        #         break
-        #         # return  # ← 新增这一行

-        #     try:
-        #         result = result_q.get(timeout=0.1)
-        #     except Empty:
-        #         continue

-        #     # 统一格式
-        #     if isinstance(result, str):
-        #         result = {"text": result}
-        #     elif not isinstance(result, dict) or "text" not in result:
-        #         result = {"text": str(result)}
+# # ────────────────── Chat 回调 ──────────────────
+# def chat(
+#     user_msg, history,
+#     max_new, temp, top_p, top_k,
+#     rep_pen, pres_pen, stop_raw,
+#     api_suffix, log_state
+# ):
+#     from queue import Queue, Empty

-        #     # yield result, log_state       # 第一次真正把模型回复丢给前端
-        #     if api_suffix == "/v1/chat/completions":
-        #         history.append({"role": "assistant", "content": result["text"]})
-        #         yield result["text"], None
-        #     else:
-        #         yield result["text"], log_state
+#     # 解析传入的 ChatInput 格式
+#     user = user_msg["text"] if isinstance(user_msg, dict) and "text" in user_msg else user_msg

-        # return                       # ← 把旧的 break 换成 return
+#     # 构造 OpenAI 风格 messages，仅用于 /v1/chat/completions
+#     if api_suffix == "/v1/chat/completions":
+#         messages = []
+#         messages = history[:]  # 正确使用 OpenAI 格式
+#         messages.append({"role": "user", "content": user})
+#         prompt_input = messages
+
+#     else:
+#         prompt_input = user  # 原来的单轮文本 prompt
+
+#     stop = [s.strip() for s in stop_raw.split(",") if s.strip()] or None
+#     samp = {
+#         ("max_tokens" if api_suffix == "/v1/completions" else "max_new_tokens"): int(max_new),
+#         "temperature": temp,
+#         "top_p": top_p,
+#         "top_k": int(top_k),
+#         "repetition_penalty": rep_pen,
+#         "presence_penalty": pres_pen,
+#         **({"stop": stop} if stop else {})
+#     }
+
+#     result_q = Queue()
+
+#     def worker():
+#         out = backend(prompt_input, samp, api_suffix)
+#         result_q.put(out)
+
+#     thread = threading.Thread(target=worker, daemon=True) 
+#     thread.start()      
+
+#     if api_suffix == "/v1/chat/completions":
+#         while True:
+#             if not thread.is_alive() and result_q.empty():
+#                 break
+#             try:
+#                 result = result_q.get(timeout=0.1)
+#             except Empty:
+#                 continue
+
+#             if isinstance(result, str):
+#                 result = {"text": result}
+#             elif not isinstance(result, dict) or "text" not in result:
+#                 result = {"text": str(result)}
+
+#             history.append({"role": "assistant", "content": result["text"]})
+#             yield result["text"], None  # ✅ 显示模型输出，同时更新 history
+#         return
+#     else:
+#         while thread.is_alive():
+#             try:
+#                 result = result_q.get(timeout=0.1)
+#                 break
+#             except Empty:
+#                 continue
+
+#         if isinstance(result, str):
+#             result = {"text": result}
+#         elif not isinstance(result, dict) or "text" not in result:
+#             result = {"text": str(result)}
+
+#         yield result["text"], log_state  # ✅ 其它接口只输出文本，不更新 history
+#         return
    
 # ────────────────── Gradio UI ──────────────────
 with gr.Blocks(title="调试界面") as demo: