This commit is contained in:
parent
47bb4e366e
commit
d2df3af90f
119
meta_ui.py
119
meta_ui.py
|
|
@ -109,24 +109,15 @@ def chat(
|
|||
):
|
||||
from queue import Queue, Empty
|
||||
|
||||
# 解析传入的 ChatInput 格式
|
||||
user = user_msg["text"] if isinstance(user_msg, dict) and "text" in user_msg else user_msg
|
||||
|
||||
# 构造 OpenAI 风格 messages,仅用于 /v1/chat/completions
|
||||
if api_suffix == "/v1/chat/completions":
|
||||
messages = []
|
||||
# for u, a in history:
|
||||
# messages.append({"role": "user", "content": u})
|
||||
# messages.append({"role": "assistant", "content": a})
|
||||
messages = history[:] # 正确使用 OpenAI 格式
|
||||
# 给 LLM 的完整 history(用于上下文推理)
|
||||
messages = history[:]
|
||||
messages.append({"role": "user", "content": user})
|
||||
prompt_input = messages
|
||||
|
||||
# user_input = user_msg["text"] if isinstance(user_msg, dict) and "text" in user_msg else user_msg
|
||||
# messages.append({"role": "user", "content": user_input})
|
||||
# prompt_input = messages
|
||||
else:
|
||||
prompt_input = user # 原来的单轮文本 prompt
|
||||
prompt_input = user
|
||||
|
||||
stop = [s.strip() for s in stop_raw.split(",") if s.strip()] or None
|
||||
samp = {
|
||||
|
|
@ -142,15 +133,11 @@ def chat(
|
|||
result_q = Queue()
|
||||
|
||||
def worker():
|
||||
#out = backend(user, samp, api_suffix)
|
||||
out = backend(prompt_input, samp, api_suffix)
|
||||
result_q.put(out)
|
||||
|
||||
# threading.Thread(target=worker).start()
|
||||
thread = threading.Thread(target=worker, daemon=True)
|
||||
thread.start()
|
||||
# yield "⏳ 正在生成中...", log_state
|
||||
|
||||
|
||||
if api_suffix == "/v1/chat/completions":
|
||||
while True:
|
||||
|
|
@ -166,8 +153,9 @@ def chat(
|
|||
elif not isinstance(result, dict) or "text" not in result:
|
||||
result = {"text": str(result)}
|
||||
|
||||
history.append({"role": "assistant", "content": result["text"]})
|
||||
yield result["text"], None # ✅ 显示模型输出,同时更新 history
|
||||
# ❌ 不 append 到 history(让前端 UI 不显示之前的历史)
|
||||
# ✅ 但我们已经在前面把 history 全部传给 LLM 推理了
|
||||
yield result["text"], None # UI 只显示当前回复
|
||||
return
|
||||
else:
|
||||
while thread.is_alive():
|
||||
|
|
@ -182,34 +170,85 @@ def chat(
|
|||
elif not isinstance(result, dict) or "text" not in result:
|
||||
result = {"text": str(result)}
|
||||
|
||||
yield result["text"], log_state # ✅ 其它接口只输出文本,不更新 history
|
||||
yield result["text"], log_state
|
||||
return
|
||||
|
||||
# while True:
|
||||
# # ⚠️ 线程已结束且队列已空 → 直接 return 让生成器终止
|
||||
# if not thread.is_alive() and result_q.empty():
|
||||
# break
|
||||
# # return # ← 新增这一行
|
||||
|
||||
# try:
|
||||
# result = result_q.get(timeout=0.1)
|
||||
# except Empty:
|
||||
# continue
|
||||
|
||||
# # 统一格式
|
||||
# if isinstance(result, str):
|
||||
# result = {"text": result}
|
||||
# elif not isinstance(result, dict) or "text" not in result:
|
||||
# result = {"text": str(result)}
|
||||
# # ────────────────── Chat 回调 ──────────────────
|
||||
# def chat(
|
||||
# user_msg, history,
|
||||
# max_new, temp, top_p, top_k,
|
||||
# rep_pen, pres_pen, stop_raw,
|
||||
# api_suffix, log_state
|
||||
# ):
|
||||
# from queue import Queue, Empty
|
||||
|
||||
# # yield result, log_state # 第一次真正把模型回复丢给前端
|
||||
# if api_suffix == "/v1/chat/completions":
|
||||
# history.append({"role": "assistant", "content": result["text"]})
|
||||
# yield result["text"], None
|
||||
# else:
|
||||
# yield result["text"], log_state
|
||||
# # 解析传入的 ChatInput 格式
|
||||
# user = user_msg["text"] if isinstance(user_msg, dict) and "text" in user_msg else user_msg
|
||||
|
||||
# return # ← 把旧的 break 换成 return
|
||||
# # 构造 OpenAI 风格 messages,仅用于 /v1/chat/completions
|
||||
# if api_suffix == "/v1/chat/completions":
|
||||
# messages = []
|
||||
# messages = history[:] # 正确使用 OpenAI 格式
|
||||
# messages.append({"role": "user", "content": user})
|
||||
# prompt_input = messages
|
||||
|
||||
# else:
|
||||
# prompt_input = user # 原来的单轮文本 prompt
|
||||
|
||||
# stop = [s.strip() for s in stop_raw.split(",") if s.strip()] or None
|
||||
# samp = {
|
||||
# ("max_tokens" if api_suffix == "/v1/completions" else "max_new_tokens"): int(max_new),
|
||||
# "temperature": temp,
|
||||
# "top_p": top_p,
|
||||
# "top_k": int(top_k),
|
||||
# "repetition_penalty": rep_pen,
|
||||
# "presence_penalty": pres_pen,
|
||||
# **({"stop": stop} if stop else {})
|
||||
# }
|
||||
|
||||
# result_q = Queue()
|
||||
|
||||
# def worker():
|
||||
# out = backend(prompt_input, samp, api_suffix)
|
||||
# result_q.put(out)
|
||||
|
||||
# thread = threading.Thread(target=worker, daemon=True)
|
||||
# thread.start()
|
||||
|
||||
# if api_suffix == "/v1/chat/completions":
|
||||
# while True:
|
||||
# if not thread.is_alive() and result_q.empty():
|
||||
# break
|
||||
# try:
|
||||
# result = result_q.get(timeout=0.1)
|
||||
# except Empty:
|
||||
# continue
|
||||
|
||||
# if isinstance(result, str):
|
||||
# result = {"text": result}
|
||||
# elif not isinstance(result, dict) or "text" not in result:
|
||||
# result = {"text": str(result)}
|
||||
|
||||
# history.append({"role": "assistant", "content": result["text"]})
|
||||
# yield result["text"], None # ✅ 显示模型输出,同时更新 history
|
||||
# return
|
||||
# else:
|
||||
# while thread.is_alive():
|
||||
# try:
|
||||
# result = result_q.get(timeout=0.1)
|
||||
# break
|
||||
# except Empty:
|
||||
# continue
|
||||
|
||||
# if isinstance(result, str):
|
||||
# result = {"text": result}
|
||||
# elif not isinstance(result, dict) or "text" not in result:
|
||||
# result = {"text": str(result)}
|
||||
|
||||
# yield result["text"], log_state # ✅ 其它接口只输出文本,不更新 history
|
||||
# return
|
||||
|
||||
# ────────────────── Gradio UI ──────────────────
|
||||
with gr.Blocks(title="调试界面") as demo:
|
||||
|
|
|
|||
Loading…
Reference in New Issue