evalscope_v0.17.0/gradio_ui.py

144 lines
4.7 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import time
import gradio as gr
import subprocess
# 全局变量:当前子进程
current_process = None
# 启动 evalscope 的逻辑(支持 yield 输出)
def run_eval(inputs, native, other, outputs, api_url, api_token):
global current_process
timestamp = time.strftime("%Y%m%d-%H%M%S")
command = [
"evalscope", "perf",
"--url", api_url.strip(),
"--api", "openai",
"--model", timestamp,
"--dataset", "openqa",
"--max-tokens", "1024",
"--min-tokens", "1024",
"--parallel", "1",
"--max-prompt-length", "15360",
"--number", "100",
"--api-key", api_token.strip(),
]
full_output = f"[Eval Started @ {timestamp}]\n"
yield full_output
try:
current_process = subprocess.Popen(
command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True, bufsize=1
)
for line in current_process.stdout:
full_output += line
yield full_output
current_process.stdout.close()
current_process.wait()
except Exception as e:
full_output += f"[Error] {str(e)}\n"
yield full_output
finally:
current_process = None
full_output += "[Eval Finished]\n"
yield full_output
# 停止当前 evalscope 子进程
def stop_eval():
global current_process
if current_process and current_process.poll() is None:
current_process.terminate()
current_process = None
return "[Stopped by user]\n"
return "[No active process]\n"
# ✅ 修改后的 toggle_run必须是 generator不能 return要 yield
def toggle_run(inputs, native, other, outputs, api_url, api_token, is_running):
if not is_running:
# 运行任务并切换按钮为 Stop
yield from run_eval(inputs, native, other, outputs, api_url, api_token)
yield "", False, gr.update(value="Run Evaluation") # 运行结束后重置按钮
else:
# 用户点击 Stop终止子进程
msg = stop_eval()
yield msg, False, gr.update(value="Run Evaluation")
# 控制输入选项互斥逻辑
def enforce_input_exclusive_and_toggle_fields(selected):
group1 = {"API Models", "Local Models"}
group2 = {"Benchmarks", "Custom Datasets"}
def keep_only_one(group):
filtered = [item for item in selected if item in group]
return filtered[-1:]
final_selection = set(selected)
final_selection -= group1
final_selection |= set(keep_only_one(group1))
final_selection -= group2
final_selection |= set(keep_only_one(group2))
show_api_fields = "API Models" in final_selection
return (
gr.update(value=list(final_selection)),
gr.Row.update(visible=show_api_fields)
)
# 构建 Gradio 界面
with gr.Blocks(title="EvalScope 全功能界面") as demo:
is_running = gr.State(value=False) # 当前运行状态
with gr.Group():
with gr.Row():
input_choices = gr.CheckboxGroup(
label="选择输入源",
choices=["API Models", "Local Models", "Benchmarks", "Custom Datasets"],
interactive=True
)
with gr.Row(visible=False) as api_fields:
api_url_input = gr.Textbox(label="API 地址", placeholder="https://api.example.com/v1/chat")
api_token_input = gr.Textbox(label="Token 密钥", type="password", placeholder="sk-xxx")
with gr.Row():
with gr.Column():
native_choices = gr.CheckboxGroup(
label="启用本地模块",
choices=["Model Adapter", "Data Adapter", "Evaluator", "Perf Monitor"]
)
with gr.Column():
other_choices = gr.CheckboxGroup(
label="启用外部后端",
choices=["OpenCompass", "VLMEvalKit", "RAGAS", "MTEB/CMTEB"]
)
with gr.Row():
output_choices = gr.CheckboxGroup(
label="输出形式",
choices=["Evaluation Report", "Gradio", "WandB", "Swanlab"]
)
run_button = gr.Button("Run Evaluation")
output_text = gr.TextArea(label="执行结果", lines=20, interactive=False, show_copy_button=True)
input_choices.change(
fn=enforce_input_exclusive_and_toggle_fields,
inputs=input_choices,
outputs=[input_choices, api_fields]
)
# ✅ 修改后的绑定,支持 Run / Stop 切换
run_button.click(
fn=toggle_run,
inputs=[input_choices, native_choices, other_choices, output_choices, api_url_input, api_token_input, is_running],
outputs=[output_text, is_running, run_button],
show_progress=True
)
if __name__ == '__main__':
demo.launch(server_name="0.0.0.0", server_port=7900)