.

2025-07-08 17:37:19 +08:00 · 2025-07-08 17:37:19 +08:00 · c48ad6ca73
parent 0582fcd439
commit c48ad6ca73
1 changed files with 286 additions and 0 deletions
--- a/copy.py
+++ b/copy.py
@ -0,0 +1,286 @@
+import time
+import os
+import glob
+import threading
+import subprocess
+import gradio as gr
+
+# ---------------- 全局进程句柄 ----------------
+current_process = None
+
+
+# ---------------- 核心运行函数 ----------------
+def run_eval(
+    inputs, native, other, output_choices,
+    api_url, api_token,
+    api_provider, dataset,
+    max_tokens, min_tokens, parallel_reqs,
+    max_prompt_len, num_requests,
+    model_override
+):
+    """
+    1. 动态拼装 evalscope perf 命令
+    2. 流式打印日志
+    3. （可选）启动可视化报告
+    """
+    global current_process
+
+    timestamp = time.strftime("%Y%m%d-%H%M%S")
+    model_name = model_override.strip() or timestamp
+
+    command = [
+        "evalscope", "perf",
+        "--url", api_url.strip(),
+        "--api", api_provider,
+        "--model", model_name,
+        "--dataset", dataset,
+        "--max-tokens", str(int(max_tokens)),
+        "--min-tokens", str(int(min_tokens)),
+        "--parallel", str(int(parallel_reqs)),
+        "--max-prompt-length", str(int(max_prompt_len)),
+        "--number", str(int(num_requests)),
+        "--api-key", api_token.strip(),
+    ]
+
+    full_output = f"[Eval Started @ {timestamp}]\nCmd: {' '.join(command)}\n"
+    yield full_output, True, gr.update(value="Stop Evaluation")
+
+    try:
+        current_process = subprocess.Popen(
+            command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
+            text=True, bufsize=1
+        )
+
+        for line in current_process.stdout:
+            full_output += line
+            yield full_output, True, gr.update(value="Stop Evaluation")
+
+        current_process.stdout.close()
+        current_process.wait()
+
+    except Exception as e:
+        full_output += f"[Error] {e}\n"
+        yield full_output, False, gr.update(value="Run Evaluation")
+
+    finally:
+        current_process = None
+
+    full_output += "[Eval Finished]\n"
+
+    # ---------- 可视化报告 ----------
+    if "Evaluation Report" in output_choices:
+        vis_port = 7861
+        outputs_root = "./outputs"
+        try:
+            latest_output = max(
+                glob.glob(os.path.join(outputs_root, "*")),
+                key=os.path.getmtime
+            )
+        except ValueError:
+            latest_output = outputs_root
+
+        vis_cmd = [
+            "evalscope", "app",
+            "--outputs", outputs_root,
+            "--server-name", "0.0.0.0",
+            "--server-port", str(vis_port),
+        ]
+        threading.Thread(
+            target=subprocess.Popen,
+            args=(vis_cmd,),
+            kwargs={"stdout": subprocess.DEVNULL,
+                    "stderr": subprocess.STDOUT},
+            daemon=True
+        ).start()
+
+        full_output += f"[Visualization 👉] http://localhost:{vis_port}\n"
+
+    yield full_output, False, gr.update(value="Run Evaluation")
+
+
+# ---------------- 停止函数 ----------------
+def stop_eval():
+    global current_process
+    if current_process and current_process.poll() is None:
+        current_process.terminate()
+        current_process = None
+        return "[Stopped by user]\n"
+    return "[No active process]\n"
+
+
+# ---------------- Run/Stop 控制器 ----------------
+def toggle_run(
+    inputs, native, other, output_choices,
+    api_url, api_token,
+    api_provider, dataset,
+    max_tokens, min_tokens, parallel_reqs,
+    max_prompt_len, num_requests,
+    model_override,
+    is_running
+):
+    if not is_running:
+        yield from run_eval(
+            inputs, native, other, output_choices,
+            api_url, api_token,
+            api_provider, dataset,
+            max_tokens, min_tokens, parallel_reqs,
+            max_prompt_len, num_requests,
+            model_override
+        )
+    else:
+        msg = stop_eval()
+        yield msg, False, gr.update(value="Run Evaluation")
+
+
+# ---------------- 互斥逻辑 ----------------
+def enforce_input_exclusive_and_toggle_fields(selected):
+    order = ["API Models", "Local Models", "Benchmarks", "Custom Datasets"]
+    group1 = {"API Models", "Local Models"}
+    group2 = {"Benchmarks", "Custom Datasets"}
+
+    def keep_only_one(group):
+        filtered = [item for item in selected if item in group]
+        return filtered[-1:]
+
+    final_sel = set(selected)
+    final_sel -= group1
+    final_sel |= set(keep_only_one(group1))
+    final_sel -= group2
+    final_sel |= set(keep_only_one(group2))
+
+    final_list = [itm for itm in order if itm in final_sel]
+
+    input_update = gr.update() if list(selected) == final_list else gr.update(value=final_list)
+
+    show_api_fields = "API Models" in final_sel
+    api_row_update = gr.Row.update(visible=show_api_fields)
+
+    show_run_params = bool(final_sel & {"API Models", "Local Models"})
+    # 👇 修复：用通用 gr.update 而非 Column.update
+    run_params_update = gr.update(visible=show_run_params)
+
+    return input_update, api_row_update, run_params_update
+
+
+# ---------------- 构建 Gradio UI ----------------
+with gr.Blocks(title="EvalScope 全功能界面") as demo:
+    is_running = gr.State(value=False)
+
+    # ===== 输入源 =====
+    with gr.Group():
+        with gr.Row():
+            input_choices = gr.CheckboxGroup(
+                label="选择输入源",
+                choices=["API Models", "Local Models",
+                         "Benchmarks", "Custom Datasets"],
+                interactive=True
+            )
+
+    # ===== API 地址 & Token =====
+    with gr.Row(visible=False) as api_fields:
+        api_url_input = gr.Textbox(
+            label="API 地址",
+            placeholder="https://api.example.com/v1/chat"
+        )
+        api_token_input = gr.Textbox(
+            label="Token 密钥",
+            type="password",
+            placeholder="sk-xxx"
+        )
+
+    # ===== 本地/外部组件 =====
+    with gr.Row():
+        with gr.Column():
+            native_choices = gr.CheckboxGroup(
+                label="启用本地模块",
+                choices=["Model Adapter", "Data Adapter",
+                         "Evaluator", "Perf Monitor"]
+            )
+        with gr.Column():
+            other_choices = gr.CheckboxGroup(
+                label="启用外部后端",
+                choices=["OpenCompass", "VLMEvalKit",
+                         "RAGAS", "MTEB/CMTEB"]
+            )
+
+    # ===== 运行参数（可隐藏） =====
+    with gr.Column(visible=False) as run_params_section:
+        with gr.Accordion("运行参数（可选修改）", open=False):
+            with gr.Row():
+                api_provider_dropdown = gr.Dropdown(
+                    label="API Provider (--api)",
+                    choices=["openai", "azure", "ollama", "gemini"],
+                    value="openai"
+                )
+                dataset_dropdown = gr.Dropdown(
+                    label="评测数据集 (--dataset)",
+                    choices=["openqa", "gsm8k", "mmlu", "truthfulqa"],
+                    value="openqa"
+                )
+            model_override_input = gr.Textbox(
+                label="自定义模型名 (--model)，留空则使用时间戳",
+                placeholder="e.g. my-llm-7b"
+            )
+            with gr.Row():
+                max_tokens_slider = gr.Slider(
+                    label="Max Tokens (--max-tokens)",
+                    minimum=256, maximum=8192, step=256, value=1024
+                )
+                min_tokens_slider = gr.Slider(
+                    label="Min Tokens (--min-tokens)",
+                    minimum=0, maximum=4096, step=64, value=1024
+                )
+            with gr.Row():
+                parallel_slider = gr.Slider(
+                    label="并发请求数 (--parallel)",
+                    minimum=1, maximum=16, step=1, value=1
+                )
+                num_req_slider = gr.Slider(
+                    label="请求条数 (--number)",
+                    minimum=1, maximum=1000, step=1, value=100
+                )
+            max_prompt_len_slider = gr.Slider(
+                label="最大 Prompt 长度 (--max-prompt-length)",
+                minimum=2048, maximum=32768, step=512, value=15360
+            )
+
+    # ===== 输出形式 =====
+    output_choices = gr.CheckboxGroup(
+        label="输出形式",
+        choices=["Evaluation Report", "Gradio", "WandB", "Swanlab"]
+    )
+
+    # ===== 控制按钮 & 日志 =====
+    run_button = gr.Button("Run Evaluation")
+    output_text = gr.TextArea(
+        label="执行结果",
+        lines=20,
+        interactive=False,
+        show_copy_button=True
+    )
+
+    # ===== 绑定事件 =====
+    input_choices.change(
+        fn=enforce_input_exclusive_and_toggle_fields,
+        inputs=input_choices,
+        outputs=[input_choices, api_fields, run_params_section]
+    )
+
+    run_button.click(
+        fn=toggle_run,
+        inputs=[
+            input_choices, native_choices, other_choices,
+            output_choices,
+            api_url_input, api_token_input,
+            api_provider_dropdown, dataset_dropdown,
+            max_tokens_slider, min_tokens_slider, parallel_slider,
+            max_prompt_len_slider, num_req_slider,
+            model_override_input,
+            is_running
+        ],
+        outputs=[output_text, is_running, run_button],
+        show_progress=True
+    )
+
+if __name__ == "__main__":
+    demo.launch(server_name="0.0.0.0", server_port=7900)