This commit is contained in:
hailin 2025-08-01 11:54:41 +08:00
parent 47bb4e366e
commit d2df3af90f
1 changed files with 86 additions and 47 deletions

View File

@ -109,24 +109,15 @@ def chat(
):
from queue import Queue, Empty
# 解析传入的 ChatInput 格式
user = user_msg["text"] if isinstance(user_msg, dict) and "text" in user_msg else user_msg
# 构造 OpenAI 风格 messages仅用于 /v1/chat/completions
if api_suffix == "/v1/chat/completions":
messages = []
# for u, a in history:
# messages.append({"role": "user", "content": u})
# messages.append({"role": "assistant", "content": a})
messages = history[:] # 正确使用 OpenAI 格式
# 给 LLM 的完整 history用于上下文推理
messages = history[:]
messages.append({"role": "user", "content": user})
prompt_input = messages
# user_input = user_msg["text"] if isinstance(user_msg, dict) and "text" in user_msg else user_msg
# messages.append({"role": "user", "content": user_input})
# prompt_input = messages
else:
prompt_input = user # 原来的单轮文本 prompt
prompt_input = user
stop = [s.strip() for s in stop_raw.split(",") if s.strip()] or None
samp = {
@ -142,15 +133,11 @@ def chat(
result_q = Queue()
def worker():
#out = backend(user, samp, api_suffix)
out = backend(prompt_input, samp, api_suffix)
result_q.put(out)
# threading.Thread(target=worker).start()
thread = threading.Thread(target=worker, daemon=True)
thread.start()
# yield "⏳ 正在生成中...", log_state
thread = threading.Thread(target=worker, daemon=True)
thread.start()
if api_suffix == "/v1/chat/completions":
while True:
@ -166,8 +153,9 @@ def chat(
elif not isinstance(result, dict) or "text" not in result:
result = {"text": str(result)}
history.append({"role": "assistant", "content": result["text"]})
yield result["text"], None # ✅ 显示模型输出,同时更新 history
# ❌ 不 append 到 history让前端 UI 不显示之前的历史)
# ✅ 但我们已经在前面把 history 全部传给 LLM 推理了
yield result["text"], None # UI 只显示当前回复
return
else:
while thread.is_alive():
@ -182,35 +170,86 @@ def chat(
elif not isinstance(result, dict) or "text" not in result:
result = {"text": str(result)}
yield result["text"], log_state # ✅ 其它接口只输出文本,不更新 history
yield result["text"], log_state
return
# # ────────────────── Chat 回调 ──────────────────
# def chat(
# user_msg, history,
# max_new, temp, top_p, top_k,
# rep_pen, pres_pen, stop_raw,
# api_suffix, log_state
# ):
# from queue import Queue, Empty
# # 解析传入的 ChatInput 格式
# user = user_msg["text"] if isinstance(user_msg, dict) and "text" in user_msg else user_msg
# # 构造 OpenAI 风格 messages仅用于 /v1/chat/completions
# if api_suffix == "/v1/chat/completions":
# messages = []
# messages = history[:] # 正确使用 OpenAI 格式
# messages.append({"role": "user", "content": user})
# prompt_input = messages
# else:
# prompt_input = user # 原来的单轮文本 prompt
# stop = [s.strip() for s in stop_raw.split(",") if s.strip()] or None
# samp = {
# ("max_tokens" if api_suffix == "/v1/completions" else "max_new_tokens"): int(max_new),
# "temperature": temp,
# "top_p": top_p,
# "top_k": int(top_k),
# "repetition_penalty": rep_pen,
# "presence_penalty": pres_pen,
# **({"stop": stop} if stop else {})
# }
# result_q = Queue()
# def worker():
# out = backend(prompt_input, samp, api_suffix)
# result_q.put(out)
# thread = threading.Thread(target=worker, daemon=True)
# thread.start()
# if api_suffix == "/v1/chat/completions":
# while True:
# if not thread.is_alive() and result_q.empty():
# break
# try:
# result = result_q.get(timeout=0.1)
# except Empty:
# continue
# if isinstance(result, str):
# result = {"text": result}
# elif not isinstance(result, dict) or "text" not in result:
# result = {"text": str(result)}
# history.append({"role": "assistant", "content": result["text"]})
# yield result["text"], None # ✅ 显示模型输出,同时更新 history
# return
# else:
# while thread.is_alive():
# try:
# result = result_q.get(timeout=0.1)
# break
# except Empty:
# continue
# if isinstance(result, str):
# result = {"text": result}
# elif not isinstance(result, dict) or "text" not in result:
# result = {"text": str(result)}
# yield result["text"], log_state # ✅ 其它接口只输出文本,不更新 history
# return
# while True:
# # ⚠️ 线程已结束且队列已空 → 直接 return 让生成器终止
# if not thread.is_alive() and result_q.empty():
# break
# # return # ← 新增这一行
# try:
# result = result_q.get(timeout=0.1)
# except Empty:
# continue
# # 统一格式
# if isinstance(result, str):
# result = {"text": result}
# elif not isinstance(result, dict) or "text" not in result:
# result = {"text": str(result)}
# # yield result, log_state # 第一次真正把模型回复丢给前端
# if api_suffix == "/v1/chat/completions":
# history.append({"role": "assistant", "content": result["text"]})
# yield result["text"], None
# else:
# yield result["text"], log_state
# return # ← 把旧的 break 换成 return
# ────────────────── Gradio UI ──────────────────
with gr.Blocks(title="调试界面") as demo:
gr.Markdown(f"## 💬 调试界面 \n权重 **{MODEL_PATH.name}**")