This commit is contained in:
parent
7bb32f1ddc
commit
02d459a252
15
meta_ui.py
15
meta_ui.py
|
|
@ -9,13 +9,10 @@ MODEL_PATH = Path("/root/.cradle/Alibaba/Qwen3-4B")
|
||||||
|
|
||||||
|
|
||||||
def model_name(path: Path):
|
def model_name(path: Path):
|
||||||
cfg = path / "config.json"
|
# 直接返回目录名,与 --served-model-name 保持一致
|
||||||
if cfg.exists():
|
|
||||||
data = json.load(cfg.open())
|
|
||||||
return data.get("architectures", [None])[0] or data.get("model_type") or path.name
|
|
||||||
return path.name
|
return path.name
|
||||||
|
|
||||||
MODEL_NAME = model_name(MODEL_PATH)
|
MODEL_NAME = model_name(MODEL_PATH) # 返回 "Qwen3-4B"
|
||||||
now = lambda: datetime.datetime.now().strftime("%H:%M:%S")
|
now = lambda: datetime.datetime.now().strftime("%H:%M:%S")
|
||||||
|
|
||||||
# ────────────────── 日志队列 ──────────────────
|
# ────────────────── 日志队列 ──────────────────
|
||||||
|
|
@ -120,8 +117,14 @@ def chat(
|
||||||
prompt_input = user
|
prompt_input = user
|
||||||
|
|
||||||
stop = [s.strip() for s in stop_raw.split(",") if s.strip()] or None
|
stop = [s.strip() for s in stop_raw.split(",") if s.strip()] or None
|
||||||
|
|
||||||
|
if api_suffix in ("/v1/completions", "/v1/chat/completions"):
|
||||||
|
max_tokens_key = "max_tokens"
|
||||||
|
else:
|
||||||
|
max_tokens_key = "max_new_tokens" # 只留给 /generate 用
|
||||||
|
|
||||||
samp = {
|
samp = {
|
||||||
("max_tokens" if api_suffix == "/v1/completions" else "max_new_tokens"): int(max_new),
|
max_tokens_key: int(max_new),
|
||||||
"temperature": temp,
|
"temperature": temp,
|
||||||
"top_p": top_p,
|
"top_p": top_p,
|
||||||
"top_k": int(top_k),
|
"top_k": int(top_k),
|
||||||
|
|
|
||||||
|
|
@ -5,7 +5,7 @@ logfile_maxbytes=0
|
||||||
loglevel=info
|
loglevel=info
|
||||||
|
|
||||||
[program:vllm]
|
[program:vllm]
|
||||||
command=python3 -m vllm.entrypoints.openai.api_server --model /root/.cradle/Alibaba/Qwen3-4B --port 30000 --api-key token-abc123
|
command=python3 -m vllm.entrypoints.openai.api_server --model /root/.cradle/Alibaba/Qwen3-4B --served-model-name Qwen3-4B --port 30000 --api-key token-abc123
|
||||||
autostart=true
|
autostart=true
|
||||||
autorestart=true
|
autorestart=true
|
||||||
stdout_logfile=/dev/stdout
|
stdout_logfile=/dev/stdout
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue