diff --git a/gradio_ui.py b/gradio_ui.py index 3a322c9..5b76082 100644 --- a/gradio_ui.py +++ b/gradio_ui.py @@ -1,19 +1,29 @@ import time -import gradio as gr +import os +import glob +import threading import subprocess +import gradio as gr # 全局变量:当前子进程 current_process = None -# 启动 evalscope 的逻辑(支持 yield 输出) -def run_eval(inputs, native, other, outputs, api_url, api_token): + +# ⬇️⬇️⬇️ 运行 EvalScope 并(可选)启动可视化服务 ⬇️⬇️⬇️ +def run_eval(inputs, native, other, output_choices, api_url, api_token): + """ + 1. 调用 `evalscope perf …` 跑基准测试 + 2. 若用户勾选 “Evaluation Report”,测试完成后后台启动 + `evalscope app` Web 可视化服务,并在文本框追加访问链接 + """ global current_process + timestamp = time.strftime("%Y%m%d-%H%M%S") command = [ "evalscope", "perf", "--url", api_url.strip(), "--api", "openai", - "--model", timestamp, + "--model", timestamp, # 以时间戳当模型名,避免冲突 "--dataset", "openqa", "--max-tokens", "1024", "--min-tokens", "1024", @@ -28,23 +38,63 @@ def run_eval(inputs, native, other, outputs, api_url, api_token): try: current_process = subprocess.Popen( - command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True, bufsize=1 + command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, + text=True, bufsize=1 ) + + # 实时流式输出 for line in current_process.stdout: full_output += line yield full_output, True, gr.update(value="Stop Evaluation") + current_process.stdout.close() current_process.wait() + except Exception as e: - full_output += f"[Error] {str(e)}\n" + full_output += f"[Error] {e}\n" yield full_output, False, gr.update(value="Run Evaluation") + finally: current_process = None full_output += "[Eval Finished]\n" + + # ========== 可视化报告 ========== + if "Evaluation Report" in output_choices: + vis_port = 7861 + outputs_root = "./outputs" + # ⬇️ EvalScope perf 会在 outputs_root 下生成 timestamp 目录 + # 这里额外取最新目录备用(目前 UI 只需要根目录) + try: + latest_output = max( + glob.glob(os.path.join(outputs_root, "*")), + key=os.path.getmtime + ) + except ValueError: + latest_output = outputs_root # 保险:若 outputs 还不存在 + + vis_cmd = [ + "evalscope", "app", + "--outputs", outputs_root, + "--server-name", "0.0.0.0", + "--server-port", str(vis_port), + ] + + # 后台线程启动,不阻塞 UI + threading.Thread( + target=subprocess.Popen, + args=(vis_cmd,), + kwargs={"stdout": subprocess.DEVNULL, + "stderr": subprocess.STDOUT}, + daemon=True + ).start() + + full_output += f"[Visualization 👉] http://localhost:{vis_port}\n" + yield full_output, False, gr.update(value="Run Evaluation") -# 停止当前 evalscope 子进程 + +# ⬇️⬇️⬇️ 停止按钮逻辑 ⬇️⬇️⬇️ def stop_eval(): global current_process if current_process and current_process.poll() is None: @@ -53,15 +103,21 @@ def stop_eval(): return "[Stopped by user]\n" return "[No active process]\n" -# Run/Stop 按钮控制器(必须是 generator) -def toggle_run(inputs, native, other, outputs, api_url, api_token, is_running): + +# ⬇️⬇️⬇️ Run/Stop 控制器(必须是 generator) ⬇️⬇️⬇️ +def toggle_run(inputs, native, other, output_choices, + api_url, api_token, is_running): if not is_running: - yield from run_eval(inputs, native, other, outputs, api_url, api_token) + # 开始跑 + yield from run_eval(inputs, native, other, + output_choices, api_url, api_token) else: + # 用户点 Stop msg = stop_eval() yield msg, False, gr.update(value="Run Evaluation") -# 控制输入互斥逻辑 + +# ⬇️⬇️⬇️ 互斥逻辑:同组保留最后一个选项 ⬇️⬇️⬇️ def enforce_input_exclusive_and_toggle_fields(selected): group1 = {"API Models", "Local Models"} group2 = {"Benchmarks", "Custom Datasets"} @@ -78,39 +134,48 @@ def enforce_input_exclusive_and_toggle_fields(selected): final_selection |= set(keep_only_one(group2)) show_api_fields = "API Models" in final_selection - return ( gr.update(value=list(final_selection)), gr.Row.update(visible=show_api_fields) ) -# 构建 Gradio UI + +# ------------- 构建 Gradio UI ------------- with gr.Blocks(title="EvalScope 全功能界面") as demo: - is_running = gr.State(value=False) # 当前运行状态 + is_running = gr.State(value=False) with gr.Group(): with gr.Row(): input_choices = gr.CheckboxGroup( label="选择输入源", - choices=["API Models", "Local Models", "Benchmarks", "Custom Datasets"], + choices=["API Models", "Local Models", + "Benchmarks", "Custom Datasets"], interactive=True ) with gr.Row(visible=False) as api_fields: - api_url_input = gr.Textbox(label="API 地址", placeholder="https://api.example.com/v1/chat") - api_token_input = gr.Textbox(label="Token 密钥", type="password", placeholder="sk-xxx") + api_url_input = gr.Textbox( + label="API 地址", + placeholder="https://api.example.com/v1/chat" + ) + api_token_input = gr.Textbox( + label="Token 密钥", + type="password", + placeholder="sk-xxx" + ) with gr.Row(): with gr.Column(): native_choices = gr.CheckboxGroup( label="启用本地模块", - choices=["Model Adapter", "Data Adapter", "Evaluator", "Perf Monitor"] + choices=["Model Adapter", "Data Adapter", + "Evaluator", "Perf Monitor"] ) - with gr.Column(): other_choices = gr.CheckboxGroup( label="启用外部后端", - choices=["OpenCompass", "VLMEvalKit", "RAGAS", "MTEB/CMTEB"] + choices=["OpenCompass", "VLMEvalKit", + "RAGAS", "MTEB/CMTEB"] ) with gr.Row(): @@ -120,14 +185,21 @@ with gr.Blocks(title="EvalScope 全功能界面") as demo: ) run_button = gr.Button("Run Evaluation") - output_text = gr.TextArea(label="执行结果", lines=20, interactive=False, show_copy_button=True) + output_text = gr.TextArea( + label="执行结果", + lines=20, + interactive=False, + show_copy_button=True + ) + # 绑定输入互斥 input_choices.change( fn=enforce_input_exclusive_and_toggle_fields, inputs=input_choices, outputs=[input_choices, api_fields] ) + # 绑定 Run/Stop run_button.click( fn=toggle_run, inputs=[ @@ -138,5 +210,5 @@ with gr.Blocks(title="EvalScope 全功能界面") as demo: show_progress=True ) -if __name__ == '__main__': +if __name__ == "__main__": demo.launch(server_name="0.0.0.0", server_port=7900) diff --git a/gradio_ui.py.old b/gradio_ui.py.old new file mode 100644 index 0000000..3a322c9 --- /dev/null +++ b/gradio_ui.py.old @@ -0,0 +1,142 @@ +import time +import gradio as gr +import subprocess + +# 全局变量:当前子进程 +current_process = None + +# 启动 evalscope 的逻辑(支持 yield 输出) +def run_eval(inputs, native, other, outputs, api_url, api_token): + global current_process + timestamp = time.strftime("%Y%m%d-%H%M%S") + command = [ + "evalscope", "perf", + "--url", api_url.strip(), + "--api", "openai", + "--model", timestamp, + "--dataset", "openqa", + "--max-tokens", "1024", + "--min-tokens", "1024", + "--parallel", "1", + "--max-prompt-length", "15360", + "--number", "100", + "--api-key", api_token.strip(), + ] + + full_output = f"[Eval Started @ {timestamp}]\n" + yield full_output, True, gr.update(value="Stop Evaluation") + + try: + current_process = subprocess.Popen( + command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True, bufsize=1 + ) + for line in current_process.stdout: + full_output += line + yield full_output, True, gr.update(value="Stop Evaluation") + current_process.stdout.close() + current_process.wait() + except Exception as e: + full_output += f"[Error] {str(e)}\n" + yield full_output, False, gr.update(value="Run Evaluation") + finally: + current_process = None + + full_output += "[Eval Finished]\n" + yield full_output, False, gr.update(value="Run Evaluation") + +# 停止当前 evalscope 子进程 +def stop_eval(): + global current_process + if current_process and current_process.poll() is None: + current_process.terminate() + current_process = None + return "[Stopped by user]\n" + return "[No active process]\n" + +# Run/Stop 按钮控制器(必须是 generator) +def toggle_run(inputs, native, other, outputs, api_url, api_token, is_running): + if not is_running: + yield from run_eval(inputs, native, other, outputs, api_url, api_token) + else: + msg = stop_eval() + yield msg, False, gr.update(value="Run Evaluation") + +# 控制输入互斥逻辑 +def enforce_input_exclusive_and_toggle_fields(selected): + group1 = {"API Models", "Local Models"} + group2 = {"Benchmarks", "Custom Datasets"} + + def keep_only_one(group): + filtered = [item for item in selected if item in group] + return filtered[-1:] + + final_selection = set(selected) + final_selection -= group1 + final_selection |= set(keep_only_one(group1)) + + final_selection -= group2 + final_selection |= set(keep_only_one(group2)) + + show_api_fields = "API Models" in final_selection + + return ( + gr.update(value=list(final_selection)), + gr.Row.update(visible=show_api_fields) + ) + +# 构建 Gradio UI +with gr.Blocks(title="EvalScope 全功能界面") as demo: + is_running = gr.State(value=False) # 当前运行状态 + + with gr.Group(): + with gr.Row(): + input_choices = gr.CheckboxGroup( + label="选择输入源", + choices=["API Models", "Local Models", "Benchmarks", "Custom Datasets"], + interactive=True + ) + + with gr.Row(visible=False) as api_fields: + api_url_input = gr.Textbox(label="API 地址", placeholder="https://api.example.com/v1/chat") + api_token_input = gr.Textbox(label="Token 密钥", type="password", placeholder="sk-xxx") + + with gr.Row(): + with gr.Column(): + native_choices = gr.CheckboxGroup( + label="启用本地模块", + choices=["Model Adapter", "Data Adapter", "Evaluator", "Perf Monitor"] + ) + + with gr.Column(): + other_choices = gr.CheckboxGroup( + label="启用外部后端", + choices=["OpenCompass", "VLMEvalKit", "RAGAS", "MTEB/CMTEB"] + ) + + with gr.Row(): + output_choices = gr.CheckboxGroup( + label="输出形式", + choices=["Evaluation Report", "Gradio", "WandB", "Swanlab"] + ) + + run_button = gr.Button("Run Evaluation") + output_text = gr.TextArea(label="执行结果", lines=20, interactive=False, show_copy_button=True) + + input_choices.change( + fn=enforce_input_exclusive_and_toggle_fields, + inputs=input_choices, + outputs=[input_choices, api_fields] + ) + + run_button.click( + fn=toggle_run, + inputs=[ + input_choices, native_choices, other_choices, + output_choices, api_url_input, api_token_input, is_running + ], + outputs=[output_text, is_running, run_button], + show_progress=True + ) + +if __name__ == '__main__': + demo.launch(server_name="0.0.0.0", server_port=7900)