From 02d459a252f95b2c5765be83795fffecd24c0e89 Mon Sep 17 00:00:00 2001
From: hailin <hailin@gdzx.xyz>
Date: Mon, 17 Nov 2025 09:53:35 +0800
Subject: [PATCH] .

---
 meta_ui.py       | 15 +++++++++------
 supervisord.conf |  2 +-
 2 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/meta_ui.py b/meta_ui.py
index 53692df..e3eb841 100644
--- a/meta_ui.py
+++ b/meta_ui.py
@@ -9,13 +9,10 @@ MODEL_PATH = Path("/root/.cradle/Alibaba/Qwen3-4B")
 
 
 def model_name(path: Path):
-    cfg = path / "config.json"
-    if cfg.exists():
-        data = json.load(cfg.open())
-        return data.get("architectures", [None])[0] or data.get("model_type") or path.name
+    # 直接返回目录名，与 --served-model-name 保持一致
     return path.name
 
-MODEL_NAME = model_name(MODEL_PATH)
+MODEL_NAME = model_name(MODEL_PATH)  # 返回 "Qwen3-4B"
 now = lambda: datetime.datetime.now().strftime("%H:%M:%S")
 
 # ────────────────── 日志队列 ──────────────────
@@ -120,8 +117,14 @@ def chat(
         prompt_input = user
 
     stop = [s.strip() for s in stop_raw.split(",") if s.strip()] or None
+
+    if api_suffix in ("/v1/completions", "/v1/chat/completions"):
+        max_tokens_key = "max_tokens"
+    else:
+        max_tokens_key = "max_new_tokens"  # 只留给 /generate 用
+
     samp = {
-        ("max_tokens" if api_suffix == "/v1/completions" else "max_new_tokens"): int(max_new),
+        max_tokens_key: int(max_new),
         "temperature": temp,
         "top_p": top_p,
         "top_k": int(top_k),
diff --git a/supervisord.conf b/supervisord.conf
index 701d82c..cfb00b9 100644
--- a/supervisord.conf
+++ b/supervisord.conf
@@ -5,7 +5,7 @@ logfile_maxbytes=0
 loglevel=info
 
 [program:vllm]
-command=python3 -m vllm.entrypoints.openai.api_server --model /root/.cradle/Alibaba/Qwen3-4B --port 30000 --api-key token-abc123
+command=python3 -m vllm.entrypoints.openai.api_server --model /root/.cradle/Alibaba/Qwen3-4B --served-model-name Qwen3-4B --port 30000 --api-key token-abc123
 autostart=true
 autorestart=true
 stdout_logfile=/dev/stdout