This commit is contained in:
parent
47bb4e366e
commit
d2df3af90f
133
meta_ui.py
133
meta_ui.py
|
|
@ -109,24 +109,15 @@ def chat(
|
||||||
):
|
):
|
||||||
from queue import Queue, Empty
|
from queue import Queue, Empty
|
||||||
|
|
||||||
# 解析传入的 ChatInput 格式
|
|
||||||
user = user_msg["text"] if isinstance(user_msg, dict) and "text" in user_msg else user_msg
|
user = user_msg["text"] if isinstance(user_msg, dict) and "text" in user_msg else user_msg
|
||||||
|
|
||||||
# 构造 OpenAI 风格 messages,仅用于 /v1/chat/completions
|
|
||||||
if api_suffix == "/v1/chat/completions":
|
if api_suffix == "/v1/chat/completions":
|
||||||
messages = []
|
# 给 LLM 的完整 history(用于上下文推理)
|
||||||
# for u, a in history:
|
messages = history[:]
|
||||||
# messages.append({"role": "user", "content": u})
|
|
||||||
# messages.append({"role": "assistant", "content": a})
|
|
||||||
messages = history[:] # 正确使用 OpenAI 格式
|
|
||||||
messages.append({"role": "user", "content": user})
|
messages.append({"role": "user", "content": user})
|
||||||
prompt_input = messages
|
prompt_input = messages
|
||||||
|
|
||||||
# user_input = user_msg["text"] if isinstance(user_msg, dict) and "text" in user_msg else user_msg
|
|
||||||
# messages.append({"role": "user", "content": user_input})
|
|
||||||
# prompt_input = messages
|
|
||||||
else:
|
else:
|
||||||
prompt_input = user # 原来的单轮文本 prompt
|
prompt_input = user
|
||||||
|
|
||||||
stop = [s.strip() for s in stop_raw.split(",") if s.strip()] or None
|
stop = [s.strip() for s in stop_raw.split(",") if s.strip()] or None
|
||||||
samp = {
|
samp = {
|
||||||
|
|
@ -142,15 +133,11 @@ def chat(
|
||||||
result_q = Queue()
|
result_q = Queue()
|
||||||
|
|
||||||
def worker():
|
def worker():
|
||||||
#out = backend(user, samp, api_suffix)
|
|
||||||
out = backend(prompt_input, samp, api_suffix)
|
out = backend(prompt_input, samp, api_suffix)
|
||||||
result_q.put(out)
|
result_q.put(out)
|
||||||
|
|
||||||
# threading.Thread(target=worker).start()
|
thread = threading.Thread(target=worker, daemon=True)
|
||||||
thread = threading.Thread(target=worker, daemon=True)
|
thread.start()
|
||||||
thread.start()
|
|
||||||
# yield "⏳ 正在生成中...", log_state
|
|
||||||
|
|
||||||
|
|
||||||
if api_suffix == "/v1/chat/completions":
|
if api_suffix == "/v1/chat/completions":
|
||||||
while True:
|
while True:
|
||||||
|
|
@ -166,8 +153,9 @@ def chat(
|
||||||
elif not isinstance(result, dict) or "text" not in result:
|
elif not isinstance(result, dict) or "text" not in result:
|
||||||
result = {"text": str(result)}
|
result = {"text": str(result)}
|
||||||
|
|
||||||
history.append({"role": "assistant", "content": result["text"]})
|
# ❌ 不 append 到 history(让前端 UI 不显示之前的历史)
|
||||||
yield result["text"], None # ✅ 显示模型输出,同时更新 history
|
# ✅ 但我们已经在前面把 history 全部传给 LLM 推理了
|
||||||
|
yield result["text"], None # UI 只显示当前回复
|
||||||
return
|
return
|
||||||
else:
|
else:
|
||||||
while thread.is_alive():
|
while thread.is_alive():
|
||||||
|
|
@ -182,35 +170,86 @@ def chat(
|
||||||
elif not isinstance(result, dict) or "text" not in result:
|
elif not isinstance(result, dict) or "text" not in result:
|
||||||
result = {"text": str(result)}
|
result = {"text": str(result)}
|
||||||
|
|
||||||
yield result["text"], log_state # ✅ 其它接口只输出文本,不更新 history
|
yield result["text"], log_state
|
||||||
return
|
return
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# # ────────────────── Chat 回调 ──────────────────
|
||||||
|
# def chat(
|
||||||
|
# user_msg, history,
|
||||||
|
# max_new, temp, top_p, top_k,
|
||||||
|
# rep_pen, pres_pen, stop_raw,
|
||||||
|
# api_suffix, log_state
|
||||||
|
# ):
|
||||||
|
# from queue import Queue, Empty
|
||||||
|
|
||||||
|
# # 解析传入的 ChatInput 格式
|
||||||
|
# user = user_msg["text"] if isinstance(user_msg, dict) and "text" in user_msg else user_msg
|
||||||
|
|
||||||
|
# # 构造 OpenAI 风格 messages,仅用于 /v1/chat/completions
|
||||||
|
# if api_suffix == "/v1/chat/completions":
|
||||||
|
# messages = []
|
||||||
|
# messages = history[:] # 正确使用 OpenAI 格式
|
||||||
|
# messages.append({"role": "user", "content": user})
|
||||||
|
# prompt_input = messages
|
||||||
|
|
||||||
|
# else:
|
||||||
|
# prompt_input = user # 原来的单轮文本 prompt
|
||||||
|
|
||||||
|
# stop = [s.strip() for s in stop_raw.split(",") if s.strip()] or None
|
||||||
|
# samp = {
|
||||||
|
# ("max_tokens" if api_suffix == "/v1/completions" else "max_new_tokens"): int(max_new),
|
||||||
|
# "temperature": temp,
|
||||||
|
# "top_p": top_p,
|
||||||
|
# "top_k": int(top_k),
|
||||||
|
# "repetition_penalty": rep_pen,
|
||||||
|
# "presence_penalty": pres_pen,
|
||||||
|
# **({"stop": stop} if stop else {})
|
||||||
|
# }
|
||||||
|
|
||||||
|
# result_q = Queue()
|
||||||
|
|
||||||
|
# def worker():
|
||||||
|
# out = backend(prompt_input, samp, api_suffix)
|
||||||
|
# result_q.put(out)
|
||||||
|
|
||||||
|
# thread = threading.Thread(target=worker, daemon=True)
|
||||||
|
# thread.start()
|
||||||
|
|
||||||
|
# if api_suffix == "/v1/chat/completions":
|
||||||
|
# while True:
|
||||||
|
# if not thread.is_alive() and result_q.empty():
|
||||||
|
# break
|
||||||
|
# try:
|
||||||
|
# result = result_q.get(timeout=0.1)
|
||||||
|
# except Empty:
|
||||||
|
# continue
|
||||||
|
|
||||||
|
# if isinstance(result, str):
|
||||||
|
# result = {"text": result}
|
||||||
|
# elif not isinstance(result, dict) or "text" not in result:
|
||||||
|
# result = {"text": str(result)}
|
||||||
|
|
||||||
|
# history.append({"role": "assistant", "content": result["text"]})
|
||||||
|
# yield result["text"], None # ✅ 显示模型输出,同时更新 history
|
||||||
|
# return
|
||||||
|
# else:
|
||||||
|
# while thread.is_alive():
|
||||||
|
# try:
|
||||||
|
# result = result_q.get(timeout=0.1)
|
||||||
|
# break
|
||||||
|
# except Empty:
|
||||||
|
# continue
|
||||||
|
|
||||||
|
# if isinstance(result, str):
|
||||||
|
# result = {"text": result}
|
||||||
|
# elif not isinstance(result, dict) or "text" not in result:
|
||||||
|
# result = {"text": str(result)}
|
||||||
|
|
||||||
|
# yield result["text"], log_state # ✅ 其它接口只输出文本,不更新 history
|
||||||
|
# return
|
||||||
|
|
||||||
# while True:
|
|
||||||
# # ⚠️ 线程已结束且队列已空 → 直接 return 让生成器终止
|
|
||||||
# if not thread.is_alive() and result_q.empty():
|
|
||||||
# break
|
|
||||||
# # return # ← 新增这一行
|
|
||||||
|
|
||||||
# try:
|
|
||||||
# result = result_q.get(timeout=0.1)
|
|
||||||
# except Empty:
|
|
||||||
# continue
|
|
||||||
|
|
||||||
# # 统一格式
|
|
||||||
# if isinstance(result, str):
|
|
||||||
# result = {"text": result}
|
|
||||||
# elif not isinstance(result, dict) or "text" not in result:
|
|
||||||
# result = {"text": str(result)}
|
|
||||||
|
|
||||||
# # yield result, log_state # 第一次真正把模型回复丢给前端
|
|
||||||
# if api_suffix == "/v1/chat/completions":
|
|
||||||
# history.append({"role": "assistant", "content": result["text"]})
|
|
||||||
# yield result["text"], None
|
|
||||||
# else:
|
|
||||||
# yield result["text"], log_state
|
|
||||||
|
|
||||||
# return # ← 把旧的 break 换成 return
|
|
||||||
|
|
||||||
# ────────────────── Gradio UI ──────────────────
|
# ────────────────── Gradio UI ──────────────────
|
||||||
with gr.Blocks(title="调试界面") as demo:
|
with gr.Blocks(title="调试界面") as demo:
|
||||||
gr.Markdown(f"## 💬 调试界面 \n权重 **{MODEL_PATH.name}**")
|
gr.Markdown(f"## 💬 调试界面 \n权重 **{MODEL_PATH.name}**")
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue