From 9f36a6d35312bfd437caec3a153dc95eafb56efc Mon Sep 17 00:00:00 2001 From: hailin Date: Tue, 8 Jul 2025 18:27:05 +0800 Subject: [PATCH] . --- gradio_ui copy.py | 286 ---------------------------------------------- gradio_ui.ok.py | 214 ---------------------------------- gradio_ui.py.old | 142 ----------------------- 3 files changed, 642 deletions(-) delete mode 100644 gradio_ui copy.py delete mode 100644 gradio_ui.ok.py delete mode 100644 gradio_ui.py.old diff --git a/gradio_ui copy.py b/gradio_ui copy.py deleted file mode 100644 index 79244c2..0000000 --- a/gradio_ui copy.py +++ /dev/null @@ -1,286 +0,0 @@ -import time -import os -import glob -import threading -import subprocess -import gradio as gr - -# ---------------- 全局进程句柄 ---------------- -current_process = None - - -# ---------------- 核心运行函数 ---------------- -def run_eval( - inputs, native, other, output_choices, - api_url, api_token, - api_provider, dataset, - max_tokens, min_tokens, parallel_reqs, - max_prompt_len, num_requests, - model_override -): - """ - 1. 动态拼装 evalscope perf 命令 - 2. 流式打印日志 - 3. (可选)启动可视化报告 - """ - global current_process - - timestamp = time.strftime("%Y%m%d-%H%M%S") - model_name = model_override.strip() or timestamp - - command = [ - "evalscope", "perf", - "--url", api_url.strip(), - "--api", api_provider, - "--model", model_name, - "--dataset", dataset, - "--max-tokens", str(int(max_tokens)), - "--min-tokens", str(int(min_tokens)), - "--parallel", str(int(parallel_reqs)), - "--max-prompt-length", str(int(max_prompt_len)), - "--number", str(int(num_requests)), - "--api-key", api_token.strip(), - ] - - full_output = f"[Eval Started @ {timestamp}]\nCmd: {' '.join(command)}\n" - yield full_output, True, gr.update(value="Stop Evaluation") - - try: - current_process = subprocess.Popen( - command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, - text=True, bufsize=1 - ) - - for line in current_process.stdout: - full_output += line - yield full_output, True, gr.update(value="Stop Evaluation") - - current_process.stdout.close() - current_process.wait() - - except Exception as e: - full_output += f"[Error] {e}\n" - yield full_output, False, gr.update(value="Run Evaluation") - - finally: - current_process = None - - full_output += "[Eval Finished]\n" - - # ---------- 可视化报告 ---------- - if "Evaluation Report" in output_choices: - vis_port = 7861 - outputs_root = "./outputs" - try: - latest_output = max( - glob.glob(os.path.join(outputs_root, "*")), - key=os.path.getmtime - ) - except ValueError: - latest_output = outputs_root - - vis_cmd = [ - "evalscope", "app", - "--outputs", outputs_root, - "--server-name", "0.0.0.0", - "--server-port", str(vis_port), - ] - threading.Thread( - target=subprocess.Popen, - args=(vis_cmd,), - kwargs={"stdout": subprocess.DEVNULL, - "stderr": subprocess.STDOUT}, - daemon=True - ).start() - - full_output += f"[Visualization 👉] http://localhost:{vis_port}\n" - - yield full_output, False, gr.update(value="Run Evaluation") - - -# ---------------- 停止函数 ---------------- -def stop_eval(): - global current_process - if current_process and current_process.poll() is None: - current_process.terminate() - current_process = None - return "[Stopped by user]\n" - return "[No active process]\n" - - -# ---------------- Run/Stop 控制器 ---------------- -def toggle_run( - inputs, native, other, output_choices, - api_url, api_token, - api_provider, dataset, - max_tokens, min_tokens, parallel_reqs, - max_prompt_len, num_requests, - model_override, - is_running -): - if not is_running: - yield from run_eval( - inputs, native, other, output_choices, - api_url, api_token, - api_provider, dataset, - max_tokens, min_tokens, parallel_reqs, - max_prompt_len, num_requests, - model_override - ) - else: - msg = stop_eval() - yield msg, False, gr.update(value="Run Evaluation") - - -# ---------------- 互斥逻辑 ---------------- -def enforce_input_exclusive_and_toggle_fields(selected): - order = ["API Models", "Local Models", "Benchmarks", "Custom Datasets"] - group1 = {"API Models", "Local Models"} - group2 = {"Benchmarks", "Custom Datasets"} - - def keep_only_one(group): - filtered = [item for item in selected if item in group] - return filtered[-1:] - - final_sel = set(selected) - final_sel -= group1 - final_sel |= set(keep_only_one(group1)) - final_sel -= group2 - final_sel |= set(keep_only_one(group2)) - - final_list = [itm for itm in order if itm in final_sel] - - input_update = gr.update() if list(selected) == final_list else gr.update(value=final_list) - - show_api_fields = "API Models" in final_sel - api_row_update = gr.Row.update(visible=show_api_fields) - - show_run_params = bool(final_sel & {"API Models", "Local Models"}) - # 👇 修复:用通用 gr.update 而非 Column.update - run_params_update = gr.update(visible=show_run_params) - - return input_update, api_row_update, run_params_update - - -# ---------------- 构建 Gradio UI ---------------- -with gr.Blocks(title="EvalScope 全功能界面") as demo: - is_running = gr.State(value=False) - - # ===== 输入源 ===== - with gr.Group(): - with gr.Row(): - input_choices = gr.CheckboxGroup( - label="选择输入源", - choices=["API Models", "Local Models", - "Benchmarks", "Custom Datasets"], - interactive=True - ) - - # ===== API 地址 & Token ===== - with gr.Row(visible=False) as api_fields: - api_url_input = gr.Textbox( - label="API 地址", - placeholder="https://api.example.com/v1/chat" - ) - api_token_input = gr.Textbox( - label="Token 密钥", - type="password", - placeholder="sk-xxx" - ) - - # ===== 本地/外部组件 ===== - with gr.Row(): - with gr.Column(): - native_choices = gr.CheckboxGroup( - label="启用本地模块", - choices=["Model Adapter", "Data Adapter", - "Evaluator", "Perf Monitor"] - ) - with gr.Column(): - other_choices = gr.CheckboxGroup( - label="启用外部后端", - choices=["OpenCompass", "VLMEvalKit", - "RAGAS", "MTEB/CMTEB"] - ) - - # ===== 运行参数(可隐藏) ===== - with gr.Column(visible=False) as run_params_section: - with gr.Accordion("运行参数(可选修改)", open=False): - with gr.Row(): - api_provider_dropdown = gr.Dropdown( - label="API Provider (--api)", - choices=["openai", "azure", "ollama", "gemini"], - value="openai" - ) - dataset_dropdown = gr.Dropdown( - label="评测数据集 (--dataset)", - choices=["openqa", "gsm8k", "mmlu", "truthfulqa"], - value="openqa" - ) - model_override_input = gr.Textbox( - label="自定义模型名 (--model),留空则使用时间戳", - placeholder="e.g. my-llm-7b" - ) - with gr.Row(): - max_tokens_slider = gr.Slider( - label="Max Tokens (--max-tokens)", - minimum=256, maximum=8192, step=256, value=1024 - ) - min_tokens_slider = gr.Slider( - label="Min Tokens (--min-tokens)", - minimum=0, maximum=4096, step=64, value=1024 - ) - with gr.Row(): - parallel_slider = gr.Slider( - label="并发请求数 (--parallel)", - minimum=1, maximum=16, step=1, value=1 - ) - num_req_slider = gr.Slider( - label="请求条数 (--number)", - minimum=1, maximum=1000, step=1, value=100 - ) - max_prompt_len_slider = gr.Slider( - label="最大 Prompt 长度 (--max-prompt-length)", - minimum=2048, maximum=32768, step=512, value=15360 - ) - - # ===== 输出形式 ===== - output_choices = gr.CheckboxGroup( - label="输出形式", - choices=["Evaluation Report", "Gradio", "WandB", "Swanlab"] - ) - - # ===== 控制按钮 & 日志 ===== - run_button = gr.Button("Run Evaluation") - output_text = gr.TextArea( - label="执行结果", - lines=20, - interactive=False, - show_copy_button=True - ) - - # ===== 绑定事件 ===== - input_choices.change( - fn=enforce_input_exclusive_and_toggle_fields, - inputs=input_choices, - outputs=[input_choices, api_fields, run_params_section] - ) - - run_button.click( - fn=toggle_run, - inputs=[ - input_choices, native_choices, other_choices, - output_choices, - api_url_input, api_token_input, - api_provider_dropdown, dataset_dropdown, - max_tokens_slider, min_tokens_slider, parallel_slider, - max_prompt_len_slider, num_req_slider, - model_override_input, - is_running - ], - outputs=[output_text, is_running, run_button], - show_progress=True - ) - -if __name__ == "__main__": - demo.launch(server_name="0.0.0.0", server_port=7900) diff --git a/gradio_ui.ok.py b/gradio_ui.ok.py deleted file mode 100644 index 5b76082..0000000 --- a/gradio_ui.ok.py +++ /dev/null @@ -1,214 +0,0 @@ -import time -import os -import glob -import threading -import subprocess -import gradio as gr - -# 全局变量:当前子进程 -current_process = None - - -# ⬇️⬇️⬇️ 运行 EvalScope 并(可选)启动可视化服务 ⬇️⬇️⬇️ -def run_eval(inputs, native, other, output_choices, api_url, api_token): - """ - 1. 调用 `evalscope perf …` 跑基准测试 - 2. 若用户勾选 “Evaluation Report”,测试完成后后台启动 - `evalscope app` Web 可视化服务,并在文本框追加访问链接 - """ - global current_process - - timestamp = time.strftime("%Y%m%d-%H%M%S") - command = [ - "evalscope", "perf", - "--url", api_url.strip(), - "--api", "openai", - "--model", timestamp, # 以时间戳当模型名,避免冲突 - "--dataset", "openqa", - "--max-tokens", "1024", - "--min-tokens", "1024", - "--parallel", "1", - "--max-prompt-length", "15360", - "--number", "100", - "--api-key", api_token.strip(), - ] - - full_output = f"[Eval Started @ {timestamp}]\n" - yield full_output, True, gr.update(value="Stop Evaluation") - - try: - current_process = subprocess.Popen( - command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, - text=True, bufsize=1 - ) - - # 实时流式输出 - for line in current_process.stdout: - full_output += line - yield full_output, True, gr.update(value="Stop Evaluation") - - current_process.stdout.close() - current_process.wait() - - except Exception as e: - full_output += f"[Error] {e}\n" - yield full_output, False, gr.update(value="Run Evaluation") - - finally: - current_process = None - - full_output += "[Eval Finished]\n" - - # ========== 可视化报告 ========== - if "Evaluation Report" in output_choices: - vis_port = 7861 - outputs_root = "./outputs" - # ⬇️ EvalScope perf 会在 outputs_root 下生成 timestamp 目录 - # 这里额外取最新目录备用(目前 UI 只需要根目录) - try: - latest_output = max( - glob.glob(os.path.join(outputs_root, "*")), - key=os.path.getmtime - ) - except ValueError: - latest_output = outputs_root # 保险:若 outputs 还不存在 - - vis_cmd = [ - "evalscope", "app", - "--outputs", outputs_root, - "--server-name", "0.0.0.0", - "--server-port", str(vis_port), - ] - - # 后台线程启动,不阻塞 UI - threading.Thread( - target=subprocess.Popen, - args=(vis_cmd,), - kwargs={"stdout": subprocess.DEVNULL, - "stderr": subprocess.STDOUT}, - daemon=True - ).start() - - full_output += f"[Visualization 👉] http://localhost:{vis_port}\n" - - yield full_output, False, gr.update(value="Run Evaluation") - - -# ⬇️⬇️⬇️ 停止按钮逻辑 ⬇️⬇️⬇️ -def stop_eval(): - global current_process - if current_process and current_process.poll() is None: - current_process.terminate() - current_process = None - return "[Stopped by user]\n" - return "[No active process]\n" - - -# ⬇️⬇️⬇️ Run/Stop 控制器(必须是 generator) ⬇️⬇️⬇️ -def toggle_run(inputs, native, other, output_choices, - api_url, api_token, is_running): - if not is_running: - # 开始跑 - yield from run_eval(inputs, native, other, - output_choices, api_url, api_token) - else: - # 用户点 Stop - msg = stop_eval() - yield msg, False, gr.update(value="Run Evaluation") - - -# ⬇️⬇️⬇️ 互斥逻辑:同组保留最后一个选项 ⬇️⬇️⬇️ -def enforce_input_exclusive_and_toggle_fields(selected): - group1 = {"API Models", "Local Models"} - group2 = {"Benchmarks", "Custom Datasets"} - - def keep_only_one(group): - filtered = [item for item in selected if item in group] - return filtered[-1:] - - final_selection = set(selected) - final_selection -= group1 - final_selection |= set(keep_only_one(group1)) - - final_selection -= group2 - final_selection |= set(keep_only_one(group2)) - - show_api_fields = "API Models" in final_selection - return ( - gr.update(value=list(final_selection)), - gr.Row.update(visible=show_api_fields) - ) - - -# ------------- 构建 Gradio UI ------------- -with gr.Blocks(title="EvalScope 全功能界面") as demo: - is_running = gr.State(value=False) - - with gr.Group(): - with gr.Row(): - input_choices = gr.CheckboxGroup( - label="选择输入源", - choices=["API Models", "Local Models", - "Benchmarks", "Custom Datasets"], - interactive=True - ) - - with gr.Row(visible=False) as api_fields: - api_url_input = gr.Textbox( - label="API 地址", - placeholder="https://api.example.com/v1/chat" - ) - api_token_input = gr.Textbox( - label="Token 密钥", - type="password", - placeholder="sk-xxx" - ) - - with gr.Row(): - with gr.Column(): - native_choices = gr.CheckboxGroup( - label="启用本地模块", - choices=["Model Adapter", "Data Adapter", - "Evaluator", "Perf Monitor"] - ) - with gr.Column(): - other_choices = gr.CheckboxGroup( - label="启用外部后端", - choices=["OpenCompass", "VLMEvalKit", - "RAGAS", "MTEB/CMTEB"] - ) - - with gr.Row(): - output_choices = gr.CheckboxGroup( - label="输出形式", - choices=["Evaluation Report", "Gradio", "WandB", "Swanlab"] - ) - - run_button = gr.Button("Run Evaluation") - output_text = gr.TextArea( - label="执行结果", - lines=20, - interactive=False, - show_copy_button=True - ) - - # 绑定输入互斥 - input_choices.change( - fn=enforce_input_exclusive_and_toggle_fields, - inputs=input_choices, - outputs=[input_choices, api_fields] - ) - - # 绑定 Run/Stop - run_button.click( - fn=toggle_run, - inputs=[ - input_choices, native_choices, other_choices, - output_choices, api_url_input, api_token_input, is_running - ], - outputs=[output_text, is_running, run_button], - show_progress=True - ) - -if __name__ == "__main__": - demo.launch(server_name="0.0.0.0", server_port=7900) diff --git a/gradio_ui.py.old b/gradio_ui.py.old deleted file mode 100644 index 3a322c9..0000000 --- a/gradio_ui.py.old +++ /dev/null @@ -1,142 +0,0 @@ -import time -import gradio as gr -import subprocess - -# 全局变量:当前子进程 -current_process = None - -# 启动 evalscope 的逻辑(支持 yield 输出) -def run_eval(inputs, native, other, outputs, api_url, api_token): - global current_process - timestamp = time.strftime("%Y%m%d-%H%M%S") - command = [ - "evalscope", "perf", - "--url", api_url.strip(), - "--api", "openai", - "--model", timestamp, - "--dataset", "openqa", - "--max-tokens", "1024", - "--min-tokens", "1024", - "--parallel", "1", - "--max-prompt-length", "15360", - "--number", "100", - "--api-key", api_token.strip(), - ] - - full_output = f"[Eval Started @ {timestamp}]\n" - yield full_output, True, gr.update(value="Stop Evaluation") - - try: - current_process = subprocess.Popen( - command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True, bufsize=1 - ) - for line in current_process.stdout: - full_output += line - yield full_output, True, gr.update(value="Stop Evaluation") - current_process.stdout.close() - current_process.wait() - except Exception as e: - full_output += f"[Error] {str(e)}\n" - yield full_output, False, gr.update(value="Run Evaluation") - finally: - current_process = None - - full_output += "[Eval Finished]\n" - yield full_output, False, gr.update(value="Run Evaluation") - -# 停止当前 evalscope 子进程 -def stop_eval(): - global current_process - if current_process and current_process.poll() is None: - current_process.terminate() - current_process = None - return "[Stopped by user]\n" - return "[No active process]\n" - -# Run/Stop 按钮控制器(必须是 generator) -def toggle_run(inputs, native, other, outputs, api_url, api_token, is_running): - if not is_running: - yield from run_eval(inputs, native, other, outputs, api_url, api_token) - else: - msg = stop_eval() - yield msg, False, gr.update(value="Run Evaluation") - -# 控制输入互斥逻辑 -def enforce_input_exclusive_and_toggle_fields(selected): - group1 = {"API Models", "Local Models"} - group2 = {"Benchmarks", "Custom Datasets"} - - def keep_only_one(group): - filtered = [item for item in selected if item in group] - return filtered[-1:] - - final_selection = set(selected) - final_selection -= group1 - final_selection |= set(keep_only_one(group1)) - - final_selection -= group2 - final_selection |= set(keep_only_one(group2)) - - show_api_fields = "API Models" in final_selection - - return ( - gr.update(value=list(final_selection)), - gr.Row.update(visible=show_api_fields) - ) - -# 构建 Gradio UI -with gr.Blocks(title="EvalScope 全功能界面") as demo: - is_running = gr.State(value=False) # 当前运行状态 - - with gr.Group(): - with gr.Row(): - input_choices = gr.CheckboxGroup( - label="选择输入源", - choices=["API Models", "Local Models", "Benchmarks", "Custom Datasets"], - interactive=True - ) - - with gr.Row(visible=False) as api_fields: - api_url_input = gr.Textbox(label="API 地址", placeholder="https://api.example.com/v1/chat") - api_token_input = gr.Textbox(label="Token 密钥", type="password", placeholder="sk-xxx") - - with gr.Row(): - with gr.Column(): - native_choices = gr.CheckboxGroup( - label="启用本地模块", - choices=["Model Adapter", "Data Adapter", "Evaluator", "Perf Monitor"] - ) - - with gr.Column(): - other_choices = gr.CheckboxGroup( - label="启用外部后端", - choices=["OpenCompass", "VLMEvalKit", "RAGAS", "MTEB/CMTEB"] - ) - - with gr.Row(): - output_choices = gr.CheckboxGroup( - label="输出形式", - choices=["Evaluation Report", "Gradio", "WandB", "Swanlab"] - ) - - run_button = gr.Button("Run Evaluation") - output_text = gr.TextArea(label="执行结果", lines=20, interactive=False, show_copy_button=True) - - input_choices.change( - fn=enforce_input_exclusive_and_toggle_fields, - inputs=input_choices, - outputs=[input_choices, api_fields] - ) - - run_button.click( - fn=toggle_run, - inputs=[ - input_choices, native_choices, other_choices, - output_choices, api_url_input, api_token_input, is_running - ], - outputs=[output_text, is_running, run_button], - show_progress=True - ) - -if __name__ == '__main__': - demo.launch(server_name="0.0.0.0", server_port=7900)