From 9f36a6d35312bfd437caec3a153dc95eafb56efc Mon Sep 17 00:00:00 2001
From: hailin <hailin@gdzx.xyz>
Date: Tue, 8 Jul 2025 18:27:05 +0800
Subject: [PATCH] .

---
 gradio_ui copy.py | 286 ----------------------------------------------
 gradio_ui.ok.py   | 214 ----------------------------------
 gradio_ui.py.old  | 142 -----------------------
 3 files changed, 642 deletions(-)
 delete mode 100644 gradio_ui copy.py
 delete mode 100644 gradio_ui.ok.py
 delete mode 100644 gradio_ui.py.old

diff --git a/gradio_ui copy.py b/gradio_ui copy.py
deleted file mode 100644
index 79244c2..0000000
--- a/gradio_ui copy.py	
+++ /dev/null
@@ -1,286 +0,0 @@
-import time
-import os
-import glob
-import threading
-import subprocess
-import gradio as gr
-
-# ---------------- 全局进程句柄 ----------------
-current_process = None
-
-
-# ---------------- 核心运行函数 ----------------
-def run_eval(
-    inputs, native, other, output_choices,
-    api_url, api_token,
-    api_provider, dataset,
-    max_tokens, min_tokens, parallel_reqs,
-    max_prompt_len, num_requests,
-    model_override
-):
-    """
-    1. 动态拼装 evalscope perf 命令
-    2. 流式打印日志
-    3. （可选）启动可视化报告
-    """
-    global current_process
-
-    timestamp = time.strftime("%Y%m%d-%H%M%S")
-    model_name = model_override.strip() or timestamp
-
-    command = [
-        "evalscope", "perf",
-        "--url", api_url.strip(),
-        "--api", api_provider,
-        "--model", model_name,
-        "--dataset", dataset,
-        "--max-tokens", str(int(max_tokens)),
-        "--min-tokens", str(int(min_tokens)),
-        "--parallel", str(int(parallel_reqs)),
-        "--max-prompt-length", str(int(max_prompt_len)),
-        "--number", str(int(num_requests)),
-        "--api-key", api_token.strip(),
-    ]
-
-    full_output = f"[Eval Started @ {timestamp}]\nCmd: {' '.join(command)}\n"
-    yield full_output, True, gr.update(value="Stop Evaluation")
-
-    try:
-        current_process = subprocess.Popen(
-            command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
-            text=True, bufsize=1
-        )
-
-        for line in current_process.stdout:
-            full_output += line
-            yield full_output, True, gr.update(value="Stop Evaluation")
-
-        current_process.stdout.close()
-        current_process.wait()
-
-    except Exception as e:
-        full_output += f"[Error] {e}\n"
-        yield full_output, False, gr.update(value="Run Evaluation")
-
-    finally:
-        current_process = None
-
-    full_output += "[Eval Finished]\n"
-
-    # ---------- 可视化报告 ----------
-    if "Evaluation Report" in output_choices:
-        vis_port = 7861
-        outputs_root = "./outputs"
-        try:
-            latest_output = max(
-                glob.glob(os.path.join(outputs_root, "*")),
-                key=os.path.getmtime
-            )
-        except ValueError:
-            latest_output = outputs_root
-
-        vis_cmd = [
-            "evalscope", "app",
-            "--outputs", outputs_root,
-            "--server-name", "0.0.0.0",
-            "--server-port", str(vis_port),
-        ]
-        threading.Thread(
-            target=subprocess.Popen,
-            args=(vis_cmd,),
-            kwargs={"stdout": subprocess.DEVNULL,
-                    "stderr": subprocess.STDOUT},
-            daemon=True
-        ).start()
-
-        full_output += f"[Visualization 👉] http://localhost:{vis_port}\n"
-
-    yield full_output, False, gr.update(value="Run Evaluation")
-
-
-# ---------------- 停止函数 ----------------
-def stop_eval():
-    global current_process
-    if current_process and current_process.poll() is None:
-        current_process.terminate()
-        current_process = None
-        return "[Stopped by user]\n"
-    return "[No active process]\n"
-
-
-# ---------------- Run/Stop 控制器 ----------------
-def toggle_run(
-    inputs, native, other, output_choices,
-    api_url, api_token,
-    api_provider, dataset,
-    max_tokens, min_tokens, parallel_reqs,
-    max_prompt_len, num_requests,
-    model_override,
-    is_running
-):
-    if not is_running:
-        yield from run_eval(
-            inputs, native, other, output_choices,
-            api_url, api_token,
-            api_provider, dataset,
-            max_tokens, min_tokens, parallel_reqs,
-            max_prompt_len, num_requests,
-            model_override
-        )
-    else:
-        msg = stop_eval()
-        yield msg, False, gr.update(value="Run Evaluation")
-
-
-# ---------------- 互斥逻辑 ----------------
-def enforce_input_exclusive_and_toggle_fields(selected):
-    order = ["API Models", "Local Models", "Benchmarks", "Custom Datasets"]
-    group1 = {"API Models", "Local Models"}
-    group2 = {"Benchmarks", "Custom Datasets"}
-
-    def keep_only_one(group):
-        filtered = [item for item in selected if item in group]
-        return filtered[-1:]
-
-    final_sel = set(selected)
-    final_sel -= group1
-    final_sel |= set(keep_only_one(group1))
-    final_sel -= group2
-    final_sel |= set(keep_only_one(group2))
-
-    final_list = [itm for itm in order if itm in final_sel]
-
-    input_update = gr.update() if list(selected) == final_list else gr.update(value=final_list)
-
-    show_api_fields = "API Models" in final_sel
-    api_row_update = gr.Row.update(visible=show_api_fields)
-
-    show_run_params = bool(final_sel & {"API Models", "Local Models"})
-    # 👇 修复：用通用 gr.update 而非 Column.update
-    run_params_update = gr.update(visible=show_run_params)
-
-    return input_update, api_row_update, run_params_update
-
-
-# ---------------- 构建 Gradio UI ----------------
-with gr.Blocks(title="EvalScope 全功能界面") as demo:
-    is_running = gr.State(value=False)
-
-    # ===== 输入源 =====
-    with gr.Group():
-        with gr.Row():
-            input_choices = gr.CheckboxGroup(
-                label="选择输入源",
-                choices=["API Models", "Local Models",
-                         "Benchmarks", "Custom Datasets"],
-                interactive=True
-            )
-
-    # ===== API 地址 & Token =====
-    with gr.Row(visible=False) as api_fields:
-        api_url_input = gr.Textbox(
-            label="API 地址",
-            placeholder="https://api.example.com/v1/chat"
-        )
-        api_token_input = gr.Textbox(
-            label="Token 密钥",
-            type="password",
-            placeholder="sk-xxx"
-        )
-
-    # ===== 本地/外部组件 =====
-    with gr.Row():
-        with gr.Column():
-            native_choices = gr.CheckboxGroup(
-                label="启用本地模块",
-                choices=["Model Adapter", "Data Adapter",
-                         "Evaluator", "Perf Monitor"]
-            )
-        with gr.Column():
-            other_choices = gr.CheckboxGroup(
-                label="启用外部后端",
-                choices=["OpenCompass", "VLMEvalKit",
-                         "RAGAS", "MTEB/CMTEB"]
-            )
-
-    # ===== 运行参数（可隐藏） =====
-    with gr.Column(visible=False) as run_params_section:
-        with gr.Accordion("运行参数（可选修改）", open=False):
-            with gr.Row():
-                api_provider_dropdown = gr.Dropdown(
-                    label="API Provider (--api)",
-                    choices=["openai", "azure", "ollama", "gemini"],
-                    value="openai"
-                )
-                dataset_dropdown = gr.Dropdown(
-                    label="评测数据集 (--dataset)",
-                    choices=["openqa", "gsm8k", "mmlu", "truthfulqa"],
-                    value="openqa"
-                )
-            model_override_input = gr.Textbox(
-                label="自定义模型名 (--model)，留空则使用时间戳",
-                placeholder="e.g. my-llm-7b"
-            )
-            with gr.Row():
-                max_tokens_slider = gr.Slider(
-                    label="Max Tokens (--max-tokens)",
-                    minimum=256, maximum=8192, step=256, value=1024
-                )
-                min_tokens_slider = gr.Slider(
-                    label="Min Tokens (--min-tokens)",
-                    minimum=0, maximum=4096, step=64, value=1024
-                )
-            with gr.Row():
-                parallel_slider = gr.Slider(
-                    label="并发请求数 (--parallel)",
-                    minimum=1, maximum=16, step=1, value=1
-                )
-                num_req_slider = gr.Slider(
-                    label="请求条数 (--number)",
-                    minimum=1, maximum=1000, step=1, value=100
-                )
-            max_prompt_len_slider = gr.Slider(
-                label="最大 Prompt 长度 (--max-prompt-length)",
-                minimum=2048, maximum=32768, step=512, value=15360
-            )
-
-    # ===== 输出形式 =====
-    output_choices = gr.CheckboxGroup(
-        label="输出形式",
-        choices=["Evaluation Report", "Gradio", "WandB", "Swanlab"]
-    )
-
-    # ===== 控制按钮 & 日志 =====
-    run_button = gr.Button("Run Evaluation")
-    output_text = gr.TextArea(
-        label="执行结果",
-        lines=20,
-        interactive=False,
-        show_copy_button=True
-    )
-
-    # ===== 绑定事件 =====
-    input_choices.change(
-        fn=enforce_input_exclusive_and_toggle_fields,
-        inputs=input_choices,
-        outputs=[input_choices, api_fields, run_params_section]
-    )
-
-    run_button.click(
-        fn=toggle_run,
-        inputs=[
-            input_choices, native_choices, other_choices,
-            output_choices,
-            api_url_input, api_token_input,
-            api_provider_dropdown, dataset_dropdown,
-            max_tokens_slider, min_tokens_slider, parallel_slider,
-            max_prompt_len_slider, num_req_slider,
-            model_override_input,
-            is_running
-        ],
-        outputs=[output_text, is_running, run_button],
-        show_progress=True
-    )
-
-if __name__ == "__main__":
-    demo.launch(server_name="0.0.0.0", server_port=7900)
diff --git a/gradio_ui.ok.py b/gradio_ui.ok.py
deleted file mode 100644
index 5b76082..0000000
--- a/gradio_ui.ok.py
+++ /dev/null
@@ -1,214 +0,0 @@
-import time
-import os
-import glob
-import threading
-import subprocess
-import gradio as gr
-
-# 全局变量：当前子进程
-current_process = None
-
-
-# ⬇️⬇️⬇️ 运行 EvalScope 并（可选）启动可视化服务 ⬇️⬇️⬇️
-def run_eval(inputs, native, other, output_choices, api_url, api_token):
-    """
-    1. 调用 `evalscope perf …` 跑基准测试
-    2. 若用户勾选 “Evaluation Report”，测试完成后后台启动
-       `evalscope app` Web 可视化服务，并在文本框追加访问链接
-    """
-    global current_process
-
-    timestamp = time.strftime("%Y%m%d-%H%M%S")
-    command = [
-        "evalscope", "perf",
-        "--url", api_url.strip(),
-        "--api", "openai",
-        "--model", timestamp,       # 以时间戳当模型名，避免冲突
-        "--dataset", "openqa",
-        "--max-tokens", "1024",
-        "--min-tokens", "1024",
-        "--parallel", "1",
-        "--max-prompt-length", "15360",
-        "--number", "100",
-        "--api-key", api_token.strip(),
-    ]
-
-    full_output = f"[Eval Started @ {timestamp}]\n"
-    yield full_output, True, gr.update(value="Stop Evaluation")
-
-    try:
-        current_process = subprocess.Popen(
-            command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
-            text=True, bufsize=1
-        )
-
-        # 实时流式输出
-        for line in current_process.stdout:
-            full_output += line
-            yield full_output, True, gr.update(value="Stop Evaluation")
-
-        current_process.stdout.close()
-        current_process.wait()
-
-    except Exception as e:
-        full_output += f"[Error] {e}\n"
-        yield full_output, False, gr.update(value="Run Evaluation")
-
-    finally:
-        current_process = None
-
-    full_output += "[Eval Finished]\n"
-
-    # ========== 可视化报告 ==========
-    if "Evaluation Report" in output_choices:
-        vis_port = 7861
-        outputs_root = "./outputs"
-        # ⬇️ EvalScope perf 会在 outputs_root 下生成 timestamp 目录
-        #    这里额外取最新目录备用（目前 UI 只需要根目录）
-        try:
-            latest_output = max(
-                glob.glob(os.path.join(outputs_root, "*")),
-                key=os.path.getmtime
-            )
-        except ValueError:
-            latest_output = outputs_root  # 保险：若 outputs 还不存在
-
-        vis_cmd = [
-            "evalscope", "app",
-            "--outputs", outputs_root,
-            "--server-name", "0.0.0.0",
-            "--server-port", str(vis_port),
-        ]
-
-        # 后台线程启动，不阻塞 UI
-        threading.Thread(
-            target=subprocess.Popen,
-            args=(vis_cmd,),
-            kwargs={"stdout": subprocess.DEVNULL,
-                    "stderr": subprocess.STDOUT},
-            daemon=True
-        ).start()
-
-        full_output += f"[Visualization 👉] http://localhost:{vis_port}\n"
-
-    yield full_output, False, gr.update(value="Run Evaluation")
-
-
-# ⬇️⬇️⬇️ 停止按钮逻辑 ⬇️⬇️⬇️
-def stop_eval():
-    global current_process
-    if current_process and current_process.poll() is None:
-        current_process.terminate()
-        current_process = None
-        return "[Stopped by user]\n"
-    return "[No active process]\n"
-
-
-# ⬇️⬇️⬇️ Run/Stop 控制器（必须是 generator） ⬇️⬇️⬇️
-def toggle_run(inputs, native, other, output_choices,
-               api_url, api_token, is_running):
-    if not is_running:
-        # 开始跑
-        yield from run_eval(inputs, native, other,
-                            output_choices, api_url, api_token)
-    else:
-        # 用户点 Stop
-        msg = stop_eval()
-        yield msg, False, gr.update(value="Run Evaluation")
-
-
-# ⬇️⬇️⬇️ 互斥逻辑：同组保留最后一个选项 ⬇️⬇️⬇️
-def enforce_input_exclusive_and_toggle_fields(selected):
-    group1 = {"API Models", "Local Models"}
-    group2 = {"Benchmarks", "Custom Datasets"}
-
-    def keep_only_one(group):
-        filtered = [item for item in selected if item in group]
-        return filtered[-1:]
-
-    final_selection = set(selected)
-    final_selection -= group1
-    final_selection |= set(keep_only_one(group1))
-
-    final_selection -= group2
-    final_selection |= set(keep_only_one(group2))
-
-    show_api_fields = "API Models" in final_selection
-    return (
-        gr.update(value=list(final_selection)),
-        gr.Row.update(visible=show_api_fields)
-    )
-
-
-# ------------- 构建 Gradio UI -------------
-with gr.Blocks(title="EvalScope 全功能界面") as demo:
-    is_running = gr.State(value=False)
-
-    with gr.Group():
-        with gr.Row():
-            input_choices = gr.CheckboxGroup(
-                label="选择输入源",
-                choices=["API Models", "Local Models",
-                         "Benchmarks", "Custom Datasets"],
-                interactive=True
-            )
-
-    with gr.Row(visible=False) as api_fields:
-        api_url_input = gr.Textbox(
-            label="API 地址",
-            placeholder="https://api.example.com/v1/chat"
-        )
-        api_token_input = gr.Textbox(
-            label="Token 密钥",
-            type="password",
-            placeholder="sk-xxx"
-        )
-
-    with gr.Row():
-        with gr.Column():
-            native_choices = gr.CheckboxGroup(
-                label="启用本地模块",
-                choices=["Model Adapter", "Data Adapter",
-                         "Evaluator", "Perf Monitor"]
-            )
-        with gr.Column():
-            other_choices = gr.CheckboxGroup(
-                label="启用外部后端",
-                choices=["OpenCompass", "VLMEvalKit",
-                         "RAGAS", "MTEB/CMTEB"]
-            )
-
-    with gr.Row():
-        output_choices = gr.CheckboxGroup(
-            label="输出形式",
-            choices=["Evaluation Report", "Gradio", "WandB", "Swanlab"]
-        )
-
-    run_button = gr.Button("Run Evaluation")
-    output_text = gr.TextArea(
-        label="执行结果",
-        lines=20,
-        interactive=False,
-        show_copy_button=True
-    )
-
-    # 绑定输入互斥
-    input_choices.change(
-        fn=enforce_input_exclusive_and_toggle_fields,
-        inputs=input_choices,
-        outputs=[input_choices, api_fields]
-    )
-
-    # 绑定 Run/Stop
-    run_button.click(
-        fn=toggle_run,
-        inputs=[
-            input_choices, native_choices, other_choices,
-            output_choices, api_url_input, api_token_input, is_running
-        ],
-        outputs=[output_text, is_running, run_button],
-        show_progress=True
-    )
-
-if __name__ == "__main__":
-    demo.launch(server_name="0.0.0.0", server_port=7900)
diff --git a/gradio_ui.py.old b/gradio_ui.py.old
deleted file mode 100644
index 3a322c9..0000000
--- a/gradio_ui.py.old
+++ /dev/null
@@ -1,142 +0,0 @@
-import time
-import gradio as gr
-import subprocess
-
-# 全局变量：当前子进程
-current_process = None
-
-# 启动 evalscope 的逻辑（支持 yield 输出）
-def run_eval(inputs, native, other, outputs, api_url, api_token):
-    global current_process
-    timestamp = time.strftime("%Y%m%d-%H%M%S")
-    command = [
-        "evalscope", "perf",
-        "--url", api_url.strip(),
-        "--api", "openai",
-        "--model", timestamp,
-        "--dataset", "openqa",
-        "--max-tokens", "1024",
-        "--min-tokens", "1024",
-        "--parallel", "1",
-        "--max-prompt-length", "15360",
-        "--number", "100",
-        "--api-key", api_token.strip(),
-    ]
-
-    full_output = f"[Eval Started @ {timestamp}]\n"
-    yield full_output, True, gr.update(value="Stop Evaluation")
-
-    try:
-        current_process = subprocess.Popen(
-            command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True, bufsize=1
-        )
-        for line in current_process.stdout:
-            full_output += line
-            yield full_output, True, gr.update(value="Stop Evaluation")
-        current_process.stdout.close()
-        current_process.wait()
-    except Exception as e:
-        full_output += f"[Error] {str(e)}\n"
-        yield full_output, False, gr.update(value="Run Evaluation")
-    finally:
-        current_process = None
-
-    full_output += "[Eval Finished]\n"
-    yield full_output, False, gr.update(value="Run Evaluation")
-
-# 停止当前 evalscope 子进程
-def stop_eval():
-    global current_process
-    if current_process and current_process.poll() is None:
-        current_process.terminate()
-        current_process = None
-        return "[Stopped by user]\n"
-    return "[No active process]\n"
-
-# Run/Stop 按钮控制器（必须是 generator）
-def toggle_run(inputs, native, other, outputs, api_url, api_token, is_running):
-    if not is_running:
-        yield from run_eval(inputs, native, other, outputs, api_url, api_token)
-    else:
-        msg = stop_eval()
-        yield msg, False, gr.update(value="Run Evaluation")
-
-# 控制输入互斥逻辑
-def enforce_input_exclusive_and_toggle_fields(selected):
-    group1 = {"API Models", "Local Models"}
-    group2 = {"Benchmarks", "Custom Datasets"}
-
-    def keep_only_one(group):
-        filtered = [item for item in selected if item in group]
-        return filtered[-1:]
-
-    final_selection = set(selected)
-    final_selection -= group1
-    final_selection |= set(keep_only_one(group1))
-
-    final_selection -= group2
-    final_selection |= set(keep_only_one(group2))
-
-    show_api_fields = "API Models" in final_selection
-
-    return (
-        gr.update(value=list(final_selection)),
-        gr.Row.update(visible=show_api_fields)
-    )
-
-# 构建 Gradio UI
-with gr.Blocks(title="EvalScope 全功能界面") as demo:
-    is_running = gr.State(value=False)  # 当前运行状态
-
-    with gr.Group():
-        with gr.Row():
-            input_choices = gr.CheckboxGroup(
-                label="选择输入源",
-                choices=["API Models", "Local Models", "Benchmarks", "Custom Datasets"],
-                interactive=True
-            )
-
-    with gr.Row(visible=False) as api_fields:
-        api_url_input = gr.Textbox(label="API 地址", placeholder="https://api.example.com/v1/chat")
-        api_token_input = gr.Textbox(label="Token 密钥", type="password", placeholder="sk-xxx")
-
-    with gr.Row():
-        with gr.Column():
-            native_choices = gr.CheckboxGroup(
-                label="启用本地模块",
-                choices=["Model Adapter", "Data Adapter", "Evaluator", "Perf Monitor"]
-            )
-
-        with gr.Column():
-            other_choices = gr.CheckboxGroup(
-                label="启用外部后端",
-                choices=["OpenCompass", "VLMEvalKit", "RAGAS", "MTEB/CMTEB"]
-            )
-
-    with gr.Row():
-        output_choices = gr.CheckboxGroup(
-            label="输出形式",
-            choices=["Evaluation Report", "Gradio", "WandB", "Swanlab"]
-        )
-
-    run_button = gr.Button("Run Evaluation")
-    output_text = gr.TextArea(label="执行结果", lines=20, interactive=False, show_copy_button=True)
-
-    input_choices.change(
-        fn=enforce_input_exclusive_and_toggle_fields,
-        inputs=input_choices,
-        outputs=[input_choices, api_fields]
-    )
-
-    run_button.click(
-        fn=toggle_run,
-        inputs=[
-            input_choices, native_choices, other_choices,
-            output_choices, api_url_input, api_token_input, is_running
-        ],
-        outputs=[output_text, is_running, run_button],
-        show_progress=True
-    )
-
-if __name__ == '__main__':
-    demo.launch(server_name="0.0.0.0", server_port=7900)