sglang.0.4.8.post1/meta_ui.py

80 lines
2.4 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import gradio as gr
import requests
API_URL = "http://localhost:30000/v1/completions"
API_KEY = "token-abc123"
MODEL_NAME = "Qwen3-14b-base"
# 构造 promptBase 模型靠拼接上下文
def build_prompt(history, user_message):
prompt = ""
for user, bot in history:
prompt += f"User: {user}\nAssistant: {bot}\n"
prompt += f"User: {user_message}\nAssistant:"
return prompt
# 主对话函数
def chat(user_message, history, max_tokens, temperature):
prompt = build_prompt(history, user_message)
headers = {
"Authorization": f"Bearer {API_KEY}",
"Content-Type": "application/json"
}
payload = {
"model": MODEL_NAME,
"prompt": prompt,
"max_tokens": max_tokens,
"temperature": temperature,
"stop": ["\nUser:", "\nAssistant:"]
}
try:
response = requests.post(API_URL, headers=headers, json=payload, timeout=30)
result = response.json()
reply = result["choices"][0]["text"].strip()
except Exception as e:
reply = f"[请求失败] {e}"
return reply
# 手动测试 API 功能
def test_api_connection(max_tokens, temperature):
headers = {
"Authorization": f"Bearer {API_KEY}",
"Content-Type": "application/json"
}
payload = {
"model": MODEL_NAME,
"prompt": "Ping?",
"max_tokens": max_tokens,
"temperature": temperature
}
try:
resp = requests.post(API_URL, headers=headers, json=payload, timeout=10)
out = resp.json()["choices"][0]["text"].strip()
return f"✅ API 可用,响应: {out}"
except Exception as e:
return f"❌ API 请求失败: {e}"
# Gradio 控件组合
with gr.Blocks(title="Base 模型测试 UI") as demo:
gr.Markdown("# 💬 Base 模型对话界面")
with gr.Row():
max_tokens = gr.Slider(32, 1024, value=256, label="max_tokens")
temperature = gr.Slider(0.0, 1.5, value=0.7, step=0.05, label="temperature")
test_btn = gr.Button("🔁 测试 API 可用性")
test_output = gr.Textbox(label="API 测试结果", interactive=False)
chatbot = gr.ChatInterface(
fn=lambda msg, hist: chat(msg, hist, max_tokens.value, temperature.value),
title=None
)
test_btn.click(fn=test_api_connection, inputs=[max_tokens, temperature], outputs=test_output)
# 启动服务
demo.launch(server_name="0.0.0.0", server_port=30001)