139 lines
4.4 KiB
Python
139 lines
4.4 KiB
Python
import time
|
||
import gradio as gr
|
||
import subprocess
|
||
|
||
# 全局变量:当前子进程
|
||
current_process = None
|
||
|
||
# 启动 evalscope 的逻辑(支持 yield 输出)
|
||
def run_eval(inputs, native, other, outputs, api_url, api_token):
|
||
global current_process
|
||
timestamp = time.strftime("%Y%m%d-%H%M%S")
|
||
command = [
|
||
"evalscope", "perf",
|
||
"--url", api_url.strip(),
|
||
"--api", "openai",
|
||
"--model", timestamp,
|
||
"--dataset", "openqa",
|
||
"--max-tokens", "1024",
|
||
"--min-tokens", "1024",
|
||
"--parallel", "1",
|
||
"--max-prompt-length", "15360",
|
||
"--number", "100",
|
||
"--api-key", api_token.strip(),
|
||
]
|
||
|
||
full_output = f"[Eval Started @ {timestamp}]\n"
|
||
yield full_output
|
||
|
||
try:
|
||
current_process = subprocess.Popen(
|
||
command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True, bufsize=1
|
||
)
|
||
for line in current_process.stdout:
|
||
full_output += line
|
||
yield full_output
|
||
current_process.stdout.close()
|
||
current_process.wait()
|
||
except Exception as e:
|
||
full_output += f"[Error] {str(e)}\n"
|
||
yield full_output
|
||
finally:
|
||
current_process = None
|
||
|
||
full_output += "[Eval Finished]\n"
|
||
yield full_output
|
||
|
||
# 停止当前 evalscope 子进程
|
||
def stop_eval():
|
||
global current_process
|
||
if current_process and current_process.poll() is None:
|
||
current_process.terminate()
|
||
current_process = None
|
||
return "[Stopped by user]\n"
|
||
return "[No active process]\n"
|
||
|
||
# 按钮行为切换函数:Run / Stop
|
||
def toggle_run(inputs, native, other, outputs, api_url, api_token, is_running):
|
||
if not is_running:
|
||
return run_eval(inputs, native, other, outputs, api_url, api_token), True, gr.update(value="Stop Evaluation")
|
||
else:
|
||
msg = stop_eval()
|
||
return msg, False, gr.update(value="Run Evaluation")
|
||
|
||
# 控制输入选项互斥逻辑
|
||
def enforce_input_exclusive_and_toggle_fields(selected):
|
||
group1 = {"API Models", "Local Models"}
|
||
group2 = {"Benchmarks", "Custom Datasets"}
|
||
|
||
def keep_only_one(group):
|
||
filtered = [item for item in selected if item in group]
|
||
return filtered[-1:]
|
||
|
||
final_selection = set(selected)
|
||
final_selection -= group1
|
||
final_selection |= set(keep_only_one(group1))
|
||
|
||
final_selection -= group2
|
||
final_selection |= set(keep_only_one(group2))
|
||
|
||
show_api_fields = "API Models" in final_selection
|
||
|
||
return (
|
||
gr.update(value=list(final_selection)),
|
||
gr.Row.update(visible=show_api_fields)
|
||
)
|
||
|
||
# 构建 Gradio 界面
|
||
with gr.Blocks(title="EvalScope 全功能界面") as demo:
|
||
is_running = gr.State(value=False) # 当前运行状态
|
||
with gr.Group():
|
||
with gr.Row():
|
||
input_choices = gr.CheckboxGroup(
|
||
label="选择输入源",
|
||
choices=["API Models", "Local Models", "Benchmarks", "Custom Datasets"],
|
||
interactive=True
|
||
)
|
||
|
||
with gr.Row(visible=False) as api_fields:
|
||
api_url_input = gr.Textbox(label="API 地址", placeholder="https://api.example.com/v1/chat")
|
||
api_token_input = gr.Textbox(label="Token 密钥", type="password", placeholder="sk-xxx")
|
||
|
||
with gr.Row():
|
||
with gr.Column():
|
||
native_choices = gr.CheckboxGroup(
|
||
label="启用本地模块",
|
||
choices=["Model Adapter", "Data Adapter", "Evaluator", "Perf Monitor"]
|
||
)
|
||
|
||
with gr.Column():
|
||
other_choices = gr.CheckboxGroup(
|
||
label="启用外部后端",
|
||
choices=["OpenCompass", "VLMEvalKit", "RAGAS", "MTEB/CMTEB"]
|
||
)
|
||
|
||
with gr.Row():
|
||
output_choices = gr.CheckboxGroup(
|
||
label="输出形式",
|
||
choices=["Evaluation Report", "Gradio", "WandB", "Swanlab"]
|
||
)
|
||
|
||
run_button = gr.Button("Run Evaluation")
|
||
output_text = gr.TextArea(label="执行结果", lines=20, interactive=False, show_copy_button=True)
|
||
|
||
input_choices.change(
|
||
fn=enforce_input_exclusive_and_toggle_fields,
|
||
inputs=input_choices,
|
||
outputs=[input_choices, api_fields]
|
||
)
|
||
|
||
run_button.click(
|
||
fn=toggle_run,
|
||
inputs=[input_choices, native_choices, other_choices, output_choices, api_url_input, api_token_input, is_running],
|
||
outputs=[output_text, is_running, run_button],
|
||
show_progress=True
|
||
)
|
||
|
||
if __name__ == '__main__':
|
||
demo.launch(server_name="0.0.0.0", server_port=7900)
|