evalscope_v0.17.0/gradio_ui.py.old

143 lines
4.6 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import time
import gradio as gr
import subprocess
# 全局变量:当前子进程
current_process = None
# 启动 evalscope 的逻辑(支持 yield 输出)
def run_eval(inputs, native, other, outputs, api_url, api_token):
global current_process
timestamp = time.strftime("%Y%m%d-%H%M%S")
command = [
"evalscope", "perf",
"--url", api_url.strip(),
"--api", "openai",
"--model", timestamp,
"--dataset", "openqa",
"--max-tokens", "1024",
"--min-tokens", "1024",
"--parallel", "1",
"--max-prompt-length", "15360",
"--number", "100",
"--api-key", api_token.strip(),
]
full_output = f"[Eval Started @ {timestamp}]\n"
yield full_output, True, gr.update(value="Stop Evaluation")
try:
current_process = subprocess.Popen(
command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True, bufsize=1
)
for line in current_process.stdout:
full_output += line
yield full_output, True, gr.update(value="Stop Evaluation")
current_process.stdout.close()
current_process.wait()
except Exception as e:
full_output += f"[Error] {str(e)}\n"
yield full_output, False, gr.update(value="Run Evaluation")
finally:
current_process = None
full_output += "[Eval Finished]\n"
yield full_output, False, gr.update(value="Run Evaluation")
# 停止当前 evalscope 子进程
def stop_eval():
global current_process
if current_process and current_process.poll() is None:
current_process.terminate()
current_process = None
return "[Stopped by user]\n"
return "[No active process]\n"
# Run/Stop 按钮控制器(必须是 generator
def toggle_run(inputs, native, other, outputs, api_url, api_token, is_running):
if not is_running:
yield from run_eval(inputs, native, other, outputs, api_url, api_token)
else:
msg = stop_eval()
yield msg, False, gr.update(value="Run Evaluation")
# 控制输入互斥逻辑
def enforce_input_exclusive_and_toggle_fields(selected):
group1 = {"API Models", "Local Models"}
group2 = {"Benchmarks", "Custom Datasets"}
def keep_only_one(group):
filtered = [item for item in selected if item in group]
return filtered[-1:]
final_selection = set(selected)
final_selection -= group1
final_selection |= set(keep_only_one(group1))
final_selection -= group2
final_selection |= set(keep_only_one(group2))
show_api_fields = "API Models" in final_selection
return (
gr.update(value=list(final_selection)),
gr.Row.update(visible=show_api_fields)
)
# 构建 Gradio UI
with gr.Blocks(title="EvalScope 全功能界面") as demo:
is_running = gr.State(value=False) # 当前运行状态
with gr.Group():
with gr.Row():
input_choices = gr.CheckboxGroup(
label="选择输入源",
choices=["API Models", "Local Models", "Benchmarks", "Custom Datasets"],
interactive=True
)
with gr.Row(visible=False) as api_fields:
api_url_input = gr.Textbox(label="API 地址", placeholder="https://api.example.com/v1/chat")
api_token_input = gr.Textbox(label="Token 密钥", type="password", placeholder="sk-xxx")
with gr.Row():
with gr.Column():
native_choices = gr.CheckboxGroup(
label="启用本地模块",
choices=["Model Adapter", "Data Adapter", "Evaluator", "Perf Monitor"]
)
with gr.Column():
other_choices = gr.CheckboxGroup(
label="启用外部后端",
choices=["OpenCompass", "VLMEvalKit", "RAGAS", "MTEB/CMTEB"]
)
with gr.Row():
output_choices = gr.CheckboxGroup(
label="输出形式",
choices=["Evaluation Report", "Gradio", "WandB", "Swanlab"]
)
run_button = gr.Button("Run Evaluation")
output_text = gr.TextArea(label="执行结果", lines=20, interactive=False, show_copy_button=True)
input_choices.change(
fn=enforce_input_exclusive_and_toggle_fields,
inputs=input_choices,
outputs=[input_choices, api_fields]
)
run_button.click(
fn=toggle_run,
inputs=[
input_choices, native_choices, other_choices,
output_choices, api_url_input, api_token_input, is_running
],
outputs=[output_text, is_running, run_button],
show_progress=True
)
if __name__ == '__main__':
demo.launch(server_name="0.0.0.0", server_port=7900)