.

2025-07-08 16:45:45 +08:00 · 2025-07-08 16:45:45 +08:00 · 0fd04efbec
parent c89dd8dd77
commit 0fd04efbec
2 changed files with 236 additions and 22 deletions
--- a/gradio_ui.py
+++ b/gradio_ui.py
@ -1,19 +1,29 @@
 import time
-import gradio as gr
+import os
 import glob
 import threading
 import subprocess
 import gradio as gr
 # 全局变量：当前子进程
 current_process = None
-# 启动 evalscope 的逻辑（支持 yield 输出）
+
-def run_eval(inputs, native, other, outputs, api_url, api_token):
+# ⬇️⬇️⬇️ 运行 EvalScope 并（可选）启动可视化服务 ⬇️⬇️⬇️
 def run_eval(inputs, native, other, output_choices, api_url, api_token):
    """
    1. 调用 `evalscope perf …` 跑基准测试
    2. 若用户勾选 “Evaluation Report”，测试完成后后台启动
       `evalscope app` Web 可视化服务，并在文本框追加访问链接
    """
    global current_process
    timestamp = time.strftime("%Y%m%d-%H%M%S")
    command = [
        "evalscope", "perf",
        "--url", api_url.strip(),
        "--api", "openai",
-        "--model", timestamp,
+        "--model", timestamp,       # 以时间戳当模型名，避免冲突
        "--dataset", "openqa",
        "--max-tokens", "1024",
        "--min-tokens", "1024",
@ -28,23 +38,63 @@ def run_eval(inputs, native, other, outputs, api_url, api_token):
    try:
        current_process = subprocess.Popen(
-            command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True, bufsize=1
+            command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
            text=True, bufsize=1
        )
        # 实时流式输出
        for line in current_process.stdout:
            full_output += line
            yield full_output, True, gr.update(value="Stop Evaluation")
        current_process.stdout.close()
        current_process.wait()
    except Exception as e:
-        full_output += f"[Error] {str(e)}\n"
+        full_output += f"[Error] {e}\n"
        yield full_output, False, gr.update(value="Run Evaluation")
    finally:
        current_process = None
    full_output += "[Eval Finished]\n"
    # ========== 可视化报告 ==========
    if "Evaluation Report" in output_choices:
        vis_port = 7861
        outputs_root = "./outputs"
        # ⬇️ EvalScope perf 会在 outputs_root 下生成 timestamp 目录
        #    这里额外取最新目录备用（目前 UI 只需要根目录）
        try:
            latest_output = max(
                glob.glob(os.path.join(outputs_root, "*")),
                key=os.path.getmtime
            )
        except ValueError:
            latest_output = outputs_root  # 保险：若 outputs 还不存在
        vis_cmd = [
            "evalscope", "app",
            "--outputs", outputs_root,
            "--server-name", "0.0.0.0",
            "--server-port", str(vis_port),
        ]
        # 后台线程启动，不阻塞 UI
        threading.Thread(
            target=subprocess.Popen,
            args=(vis_cmd,),
            kwargs={"stdout": subprocess.DEVNULL,
                    "stderr": subprocess.STDOUT},
            daemon=True
        ).start()
        full_output += f"[Visualization 👉] http://localhost:{vis_port}\n"
    yield full_output, False, gr.update(value="Run Evaluation")
-# 停止当前 evalscope 子进程
+
 # ⬇️⬇️⬇️ 停止按钮逻辑 ⬇️⬇️⬇️
 def stop_eval():
    global current_process
    if current_process and current_process.poll() is None:
@ -53,15 +103,21 @@ def stop_eval():
        return "[Stopped by user]\n"
    return "[No active process]\n"
-# Run/Stop 按钮控制器（必须是 generator）
+
-def toggle_run(inputs, native, other, outputs, api_url, api_token, is_running):
+# ⬇️⬇️⬇️ Run/Stop 控制器（必须是 generator） ⬇️⬇️⬇️
 def toggle_run(inputs, native, other, output_choices,
               api_url, api_token, is_running):
    if not is_running:
-        yield from run_eval(inputs, native, other, outputs, api_url, api_token)
+        # 开始跑
        yield from run_eval(inputs, native, other,
                            output_choices, api_url, api_token)
    else:
        # 用户点 Stop
        msg = stop_eval()
        yield msg, False, gr.update(value="Run Evaluation")
-# 控制输入互斥逻辑
+
 # ⬇️⬇️⬇️ 互斥逻辑：同组保留最后一个选项 ⬇️⬇️⬇️
 def enforce_input_exclusive_and_toggle_fields(selected):
    group1 = {"API Models", "Local Models"}
    group2 = {"Benchmarks", "Custom Datasets"}
@ -78,39 +134,48 @@ def enforce_input_exclusive_and_toggle_fields(selected):
    final_selection |= set(keep_only_one(group2))
    show_api_fields = "API Models" in final_selection
    return (
        gr.update(value=list(final_selection)),
        gr.Row.update(visible=show_api_fields)
    )
-# 构建 Gradio UI
+
 # ------------- 构建 Gradio UI -------------
 with gr.Blocks(title="EvalScope 全功能界面") as demo:
-    is_running = gr.State(value=False)  # 当前运行状态
+    is_running = gr.State(value=False)
    with gr.Group():
        with gr.Row():
            input_choices = gr.CheckboxGroup(
                label="选择输入源",
-                choices=["API Models", "Local Models", "Benchmarks", "Custom Datasets"],
+                choices=["API Models", "Local Models",
                         "Benchmarks", "Custom Datasets"],
                interactive=True
            )
    with gr.Row(visible=False) as api_fields:
-        api_url_input = gr.Textbox(label="API 地址", placeholder="https://api.example.com/v1/chat")
+        api_url_input = gr.Textbox(
-        api_token_input = gr.Textbox(label="Token 密钥", type="password", placeholder="sk-xxx")
+            label="API 地址",
            placeholder="https://api.example.com/v1/chat"
        )
        api_token_input = gr.Textbox(
            label="Token 密钥",
            type="password",
            placeholder="sk-xxx"
        )
    with gr.Row():
        with gr.Column():
            native_choices = gr.CheckboxGroup(
                label="启用本地模块",
-                choices=["Model Adapter", "Data Adapter", "Evaluator", "Perf Monitor"]
+                choices=["Model Adapter", "Data Adapter",
                         "Evaluator", "Perf Monitor"]
            )
        with gr.Column():
            other_choices = gr.CheckboxGroup(
                label="启用外部后端",
-                choices=["OpenCompass", "VLMEvalKit", "RAGAS", "MTEB/CMTEB"]
+                choices=["OpenCompass", "VLMEvalKit",
                         "RAGAS", "MTEB/CMTEB"]
            )
    with gr.Row():
@ -120,14 +185,21 @@ with gr.Blocks(title="EvalScope 全功能界面") as demo:
        )
    run_button = gr.Button("Run Evaluation")
-    output_text = gr.TextArea(label="执行结果", lines=20, interactive=False, show_copy_button=True)
+    output_text = gr.TextArea(
        label="执行结果",
        lines=20,
        interactive=False,
        show_copy_button=True
    )
    # 绑定输入互斥
    input_choices.change(
        fn=enforce_input_exclusive_and_toggle_fields,
        inputs=input_choices,
        outputs=[input_choices, api_fields]
    )
    # 绑定 Run/Stop
    run_button.click(
        fn=toggle_run,
        inputs=[
@ -138,5 +210,5 @@ with gr.Blocks(title="EvalScope 全功能界面") as demo:
        show_progress=True
    )
-if __name__ == '__main__':
+if __name__ == "__main__":
    demo.launch(server_name="0.0.0.0", server_port=7900)
--- a/gradio_ui.py.old
+++ b/gradio_ui.py.old
@ -0,0 +1,142 @@
 import time
 import gradio as gr
 import subprocess
 # 全局变量：当前子进程
 current_process = None
 # 启动 evalscope 的逻辑（支持 yield 输出）
 def run_eval(inputs, native, other, outputs, api_url, api_token):
    global current_process
    timestamp = time.strftime("%Y%m%d-%H%M%S")
    command = [
        "evalscope", "perf",
        "--url", api_url.strip(),
        "--api", "openai",
        "--model", timestamp,
        "--dataset", "openqa",
        "--max-tokens", "1024",
        "--min-tokens", "1024",
        "--parallel", "1",
        "--max-prompt-length", "15360",
        "--number", "100",
        "--api-key", api_token.strip(),
    ]
    full_output = f"[Eval Started @ {timestamp}]\n"
    yield full_output, True, gr.update(value="Stop Evaluation")
    try:
        current_process = subprocess.Popen(
            command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True, bufsize=1
        )
        for line in current_process.stdout:
            full_output += line
            yield full_output, True, gr.update(value="Stop Evaluation")
        current_process.stdout.close()
        current_process.wait()
    except Exception as e:
        full_output += f"[Error] {str(e)}\n"
        yield full_output, False, gr.update(value="Run Evaluation")
    finally:
        current_process = None
    full_output += "[Eval Finished]\n"
    yield full_output, False, gr.update(value="Run Evaluation")
 # 停止当前 evalscope 子进程
 def stop_eval():
    global current_process
    if current_process and current_process.poll() is None:
        current_process.terminate()
        current_process = None
        return "[Stopped by user]\n"
    return "[No active process]\n"
 # Run/Stop 按钮控制器（必须是 generator）
 def toggle_run(inputs, native, other, outputs, api_url, api_token, is_running):
    if not is_running:
        yield from run_eval(inputs, native, other, outputs, api_url, api_token)
    else:
        msg = stop_eval()
        yield msg, False, gr.update(value="Run Evaluation")
 # 控制输入互斥逻辑
 def enforce_input_exclusive_and_toggle_fields(selected):
    group1 = {"API Models", "Local Models"}
    group2 = {"Benchmarks", "Custom Datasets"}
    def keep_only_one(group):
        filtered = [item for item in selected if item in group]
        return filtered[-1:]
    final_selection = set(selected)
    final_selection -= group1
    final_selection |= set(keep_only_one(group1))
    final_selection -= group2
    final_selection |= set(keep_only_one(group2))
    show_api_fields = "API Models" in final_selection
    return (
        gr.update(value=list(final_selection)),
        gr.Row.update(visible=show_api_fields)
    )
 # 构建 Gradio UI
 with gr.Blocks(title="EvalScope 全功能界面") as demo:
    is_running = gr.State(value=False)  # 当前运行状态
    with gr.Group():
        with gr.Row():
            input_choices = gr.CheckboxGroup(
                label="选择输入源",
                choices=["API Models", "Local Models", "Benchmarks", "Custom Datasets"],
                interactive=True
            )
    with gr.Row(visible=False) as api_fields:
        api_url_input = gr.Textbox(label="API 地址", placeholder="https://api.example.com/v1/chat")
        api_token_input = gr.Textbox(label="Token 密钥", type="password", placeholder="sk-xxx")
    with gr.Row():
        with gr.Column():
            native_choices = gr.CheckboxGroup(
                label="启用本地模块",
                choices=["Model Adapter", "Data Adapter", "Evaluator", "Perf Monitor"]
            )
        with gr.Column():
            other_choices = gr.CheckboxGroup(
                label="启用外部后端",
                choices=["OpenCompass", "VLMEvalKit", "RAGAS", "MTEB/CMTEB"]
            )
    with gr.Row():
        output_choices = gr.CheckboxGroup(
            label="输出形式",
            choices=["Evaluation Report", "Gradio", "WandB", "Swanlab"]
        )
    run_button = gr.Button("Run Evaluation")
    output_text = gr.TextArea(label="执行结果", lines=20, interactive=False, show_copy_button=True)
    input_choices.change(
        fn=enforce_input_exclusive_and_toggle_fields,
        inputs=input_choices,
        outputs=[input_choices, api_fields]
    )
    run_button.click(
        fn=toggle_run,
        inputs=[
            input_choices, native_choices, other_choices,
            output_choices, api_url_input, api_token_input, is_running
        ],
        outputs=[output_text, is_running, run_button],
        show_progress=True
    )
 if __name__ == '__main__':
    demo.launch(server_name="0.0.0.0", server_port=7900)