From 02d459a252f95b2c5765be83795fffecd24c0e89 Mon Sep 17 00:00:00 2001 From: hailin Date: Mon, 17 Nov 2025 09:53:35 +0800 Subject: [PATCH] . --- meta_ui.py | 15 +++++++++------ supervisord.conf | 2 +- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/meta_ui.py b/meta_ui.py index 53692df..e3eb841 100644 --- a/meta_ui.py +++ b/meta_ui.py @@ -9,13 +9,10 @@ MODEL_PATH = Path("/root/.cradle/Alibaba/Qwen3-4B") def model_name(path: Path): - cfg = path / "config.json" - if cfg.exists(): - data = json.load(cfg.open()) - return data.get("architectures", [None])[0] or data.get("model_type") or path.name + # 直接返回目录名,与 --served-model-name 保持一致 return path.name -MODEL_NAME = model_name(MODEL_PATH) +MODEL_NAME = model_name(MODEL_PATH) # 返回 "Qwen3-4B" now = lambda: datetime.datetime.now().strftime("%H:%M:%S") # ────────────────── 日志队列 ────────────────── @@ -120,8 +117,14 @@ def chat( prompt_input = user stop = [s.strip() for s in stop_raw.split(",") if s.strip()] or None + + if api_suffix in ("/v1/completions", "/v1/chat/completions"): + max_tokens_key = "max_tokens" + else: + max_tokens_key = "max_new_tokens" # 只留给 /generate 用 + samp = { - ("max_tokens" if api_suffix == "/v1/completions" else "max_new_tokens"): int(max_new), + max_tokens_key: int(max_new), "temperature": temp, "top_p": top_p, "top_k": int(top_k), diff --git a/supervisord.conf b/supervisord.conf index 701d82c..cfb00b9 100644 --- a/supervisord.conf +++ b/supervisord.conf @@ -5,7 +5,7 @@ logfile_maxbytes=0 loglevel=info [program:vllm] -command=python3 -m vllm.entrypoints.openai.api_server --model /root/.cradle/Alibaba/Qwen3-4B --port 30000 --api-key token-abc123 +command=python3 -m vllm.entrypoints.openai.api_server --model /root/.cradle/Alibaba/Qwen3-4B --served-model-name Qwen3-4B --port 30000 --api-key token-abc123 autostart=true autorestart=true stdout_logfile=/dev/stdout