sglang.0.4.8.post1/meta_ui.py

74 lines
2.3 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import gradio as gr
import requests
API_URL = "http://localhost:30000/generate" # ✅ 使用原生 generate 接口
API_KEY = "token-abc123"
MODEL_NAME = "Qwen3-14b-base"
# 🚫 不再拼接上下文,只保留用户当前输入
def build_prompt(history, user_message):
return user_message
# 主对话函数
def chat(user_message, history, max_tokens, temperature):
prompt = build_prompt(history, user_message)
headers = {
"Authorization": f"Bearer {API_KEY}",
"Content-Type": "application/json"
}
payload = {
"model": MODEL_NAME,
"text": prompt, # ✅ 注意是 text不是 prompt
"max_tokens": max_tokens,
"temperature": temperature
}
try:
response = requests.post(API_URL, headers=headers, json=payload, timeout=30)
result = response.json()
reply = result["text"].strip() # ✅ generate 接口返回字段是 text
except Exception as e:
reply = f"[请求失败] {e}"
return reply
# 测试 API 连通性
def test_api_connection(max_tokens, temperature):
headers = {
"Authorization": f"Bearer {API_KEY}",
"Content-Type": "application/json"
}
payload = {
"model": MODEL_NAME,
"text": "Ping?", # ✅ 也改成 text 字段
"max_tokens": max_tokens,
"temperature": temperature
}
try:
resp = requests.post(API_URL, headers=headers, json=payload, timeout=10)
out = resp.json()["text"].strip()
return f"✅ API 可用,响应: {out}"
except Exception as e:
return f"❌ API 请求失败: {e}"
# Gradio 界面
with gr.Blocks(title="Base 模型测试 UI") as demo:
gr.Markdown("# 💬 Base 模型对话界面")
with gr.Row():
max_tokens = gr.Slider(32, 1024, value=256, label="max_tokens")
temperature = gr.Slider(0.0, 1.5, value=0.7, step=0.05, label="temperature")
test_btn = gr.Button("🔁 测试 API 可用性")
test_output = gr.Textbox(label="API 测试结果", interactive=False)
chatbot = gr.ChatInterface(
fn=lambda msg, hist: chat(msg, hist, max_tokens.value, temperature.value),
title=None
)
test_btn.click(fn=test_api_connection, inputs=[max_tokens, temperature], outputs=test_output)
demo.launch(server_name="0.0.0.0", server_port=30001)