.

2025-07-27 19:07:21 +08:00 · 2025-07-27 19:07:21 +08:00 · 0ce5191d31
parent 095311d016
commit 0ce5191d31
1 changed files with 7 additions and 7 deletions
--- a/meta_ui.py
+++ b/meta_ui.py
@ -47,10 +47,10 @@ def backend(text, sampling, api_suffix):
    url = f"http://localhost:30000{api_suffix}"
    if api_suffix == "/generate":
        payload = {"model": MODEL_NAME, "text": text, "sampling_params": sampling}
-    else:  # "/v1/chat/completions"
+    elif api_suffix == "/v1/completions":
        payload = {
            "model": MODEL_NAME,
-            "messages": [{"role": "user", "content": text}],
+            "prompt": text,
            **sampling
        }

@ -70,9 +70,9 @@ def backend(text, sampling, api_suffix):
            meta = data.get("meta_info", {})
            fr = meta.get("finish_reason")
            ctok = meta.get("completion_tokens")
-        else:
+        elif api_suffix == "/v1/completions":
            choice = data.get("choices", [{}])[0]
-            txt = choice.get("message", {}).get("content", "").strip()
+            txt = choice.get("text", "").strip()
            fr = choice.get("finish_reason")
            ctok = data.get("usage", {}).get("completion_tokens")

@ -100,7 +100,7 @@ def chat(

    stop = [s.strip() for s in stop_raw.split(",") if s.strip()] or None
    samp = {
-        ("max_tokens" if api_suffix == "/v1/chat/completions" else "max_new_tokens"): int(max_new),
+        ("max_tokens" if api_suffix == "/v1/completions" else "max_new_tokens"): int(max_new),
        "temperature": temp,
        "top_p": top_p,
        "top_k": int(top_k),
@ -121,7 +121,7 @@ def chat(
    while True:
        try:
            result = result_q.get(timeout=0.1)
-            # ★ 不论 /generate（纯 str）还是 /v1/chat/completions（dict），都转成 {"text": ...}
+            # ★ 不论 /generate（纯 str）还是 /v1/completions（dict），都转成 {"text": ...}
            if isinstance(result, str):
                result = {"text": result}
            elif not isinstance(result, dict) or "text" not in result:
@ -137,7 +137,7 @@ with gr.Blocks(title="调试界面") as demo:
    gr.Markdown(f"## 💬 调试界面  \n权重 **{MODEL_PATH.name}**")

    with gr.Row():
-        api_choice = gr.Dropdown(choices=["/generate", "/v1/chat/completions"],
+        api_choice = gr.Dropdown(choices=["/generate", "/v1/completions"],
                                value="/generate", label="选择推理接口")
    with gr.Row():
        max_new = gr.Slider(32, 32768, 128, label="max_new_tokens")