This commit is contained in:
hailin 2025-07-27 19:07:21 +08:00
parent 095311d016
commit 0ce5191d31
1 changed files with 7 additions and 7 deletions

View File

@ -47,10 +47,10 @@ def backend(text, sampling, api_suffix):
url = f"http://localhost:30000{api_suffix}"
if api_suffix == "/generate":
payload = {"model": MODEL_NAME, "text": text, "sampling_params": sampling}
else: # "/v1/chat/completions"
elif api_suffix == "/v1/completions":
payload = {
"model": MODEL_NAME,
"messages": [{"role": "user", "content": text}],
"prompt": text,
**sampling
}
@ -70,9 +70,9 @@ def backend(text, sampling, api_suffix):
meta = data.get("meta_info", {})
fr = meta.get("finish_reason")
ctok = meta.get("completion_tokens")
else:
elif api_suffix == "/v1/completions":
choice = data.get("choices", [{}])[0]
txt = choice.get("message", {}).get("content", "").strip()
txt = choice.get("text", "").strip()
fr = choice.get("finish_reason")
ctok = data.get("usage", {}).get("completion_tokens")
@ -100,7 +100,7 @@ def chat(
stop = [s.strip() for s in stop_raw.split(",") if s.strip()] or None
samp = {
("max_tokens" if api_suffix == "/v1/chat/completions" else "max_new_tokens"): int(max_new),
("max_tokens" if api_suffix == "/v1/completions" else "max_new_tokens"): int(max_new),
"temperature": temp,
"top_p": top_p,
"top_k": int(top_k),
@ -121,7 +121,7 @@ def chat(
while True:
try:
result = result_q.get(timeout=0.1)
# ★ 不论 /generate纯 str还是 /v1/chat/completionsdict都转成 {"text": ...}
# ★ 不论 /generate纯 str还是 /v1/completionsdict都转成 {"text": ...}
if isinstance(result, str):
result = {"text": result}
elif not isinstance(result, dict) or "text" not in result:
@ -137,7 +137,7 @@ with gr.Blocks(title="调试界面") as demo:
gr.Markdown(f"## 💬 调试界面 \n权重 **{MODEL_PATH.name}**")
with gr.Row():
api_choice = gr.Dropdown(choices=["/generate", "/v1/chat/completions"],
api_choice = gr.Dropdown(choices=["/generate", "/v1/completions"],
value="/generate", label="选择推理接口")
with gr.Row():
max_new = gr.Slider(32, 32768, 128, label="max_new_tokens")