diff --git a/gradio_ui.py b/gradio_ui.py index 33c066d..a53b52e 100644 --- a/gradio_ui.py +++ b/gradio_ui.py @@ -1,3 +1,14 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Gradio UI + EvalScope 调度脚本(量产版) + +关键改动 +1. 独立 Stop Evaluation 按钮,queue=False,立即触发 `stop_eval()` +2. `stop_eval()` 使用 psutil 递归杀进程树并 wait(),杜绝僵尸 +3. 所有生成器统一返回 4 个输出:output_text ‖ is_running ‖ run_button 更新 ‖ stop_button 更新 +""" + import time import os import glob @@ -17,8 +28,10 @@ EVAL_DATASETS = [ "hellaswag", "humaneval", "mmlu", "mmlu_pro", "race", "trivia_qa", "truthful_qa" ] - -PERF_DATASETS = ["openqa", "flickr8k", "longalpaca", "random_dataset", "line_by_line", "custom", "speed_benchmark"] +PERF_DATASETS = [ + "openqa", "flickr8k", "longalpaca", "random_dataset", + "line_by_line", "custom", "speed_benchmark" +] # ---------------- perf 模式运行 ---------------- def run_perf( @@ -49,32 +62,37 @@ def run_perf( ] full_output = f"[Eval Started @ {timestamp}]\nCmd: {' '.join(command)}\n" - yield full_output, True, gr.update(value="Stop Evaluation") + yield full_output, True, gr.update(interactive=False), gr.update(visible=True) try: current_process = subprocess.Popen( - command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, - text=True, bufsize=1, start_new_session=True + command, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + text=True, + bufsize=1, + start_new_session=True, # 独立进程组,便于后续 killpg / psutil ) for line in current_process.stdout: if should_stop: break full_output += line - yield full_output, True, gr.update(value="Stop Evaluation") + yield full_output, True, gr.update(interactive=False), gr.update(visible=True) current_process.stdout.close() current_process.wait() except Exception as e: full_output += f"[Error] {e}\n" - yield full_output, False, gr.update(value="Run Evaluation") + yield full_output, False, gr.update(value="Run Evaluation", interactive=True), gr.update(visible=False) finally: current_process = None full_output += "[Eval Finished]\n" + # 自动启动可视化 if "Evaluation Report" in output_choices: vis_port = 7901 outputs_root = "./outputs" @@ -101,7 +119,7 @@ def run_perf( full_output += f"[Visualization 👉] http://localhost:{vis_port}\n" - yield full_output, False, gr.update(value="Run Evaluation") + yield full_output, False, gr.update(value="Run Evaluation", interactive=True), gr.update(visible=False) # ---------------- eval 模式运行 ---------------- def run_eval_tool( @@ -132,26 +150,30 @@ def run_eval_tool( command += ["--limit", str(int(num_requests))] full_output = f"[Eval Started @ {timestamp}]\nCmd: {' '.join(command)}\n" - yield full_output, True, gr.update(value="Stop Evaluation") + yield full_output, True, gr.update(interactive=False), gr.update(visible=True) try: current_process = subprocess.Popen( - command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, - text=True, bufsize=1, start_new_session=True + command, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + text=True, + bufsize=1, + start_new_session=True ) for line in current_process.stdout: if should_stop: break full_output += line - yield full_output, True, gr.update(value="Stop Evaluation") + yield full_output, True, gr.update(interactive=False), gr.update(visible=True) current_process.stdout.close() current_process.wait() except Exception as e: full_output += f"[Error] {e}\n" - yield full_output, False, gr.update(value="Run Evaluation") + yield full_output, False, gr.update(value="Run Evaluation", interactive=True), gr.update(visible=False) finally: current_process = None @@ -184,15 +206,15 @@ def run_eval_tool( full_output += f"[Visualization 👉] http://localhost:{vis_port}\n" - yield full_output, False, gr.update(value="Run Evaluation") + yield full_output, False, gr.update(value="Run Evaluation", interactive=True), gr.update(visible=False) # ---------------- 停止函数 ---------------- -def stop_eval(): +def stop_eval() -> str: """ 彻底终止 current_process 及其全部子孙进程: - 1. 先发 SIGINT(Ctrl‑C)尝试优雅退出 - 2. 3 秒内仍存活的进程升级为 SIGKILL - 3. 最后 wait() 主进程,防止僵尸 + 1. SIGINT(优雅退出,3 秒宽限) + 2. 仍存活则 SIGKILL + 3. wait() 主进程,防止僵尸 """ global current_process, should_stop should_stop = True @@ -202,34 +224,29 @@ def stop_eval(): try: parent = psutil.Process(current_process.pid) - family = parent.children(recursive=True) + [parent] # 整棵进程树 + family = parent.children(recursive=True) + [parent] - # ── 1) 尝试优雅终止 ────────────────────── + # 1) SIGINT for p in family: p.send_signal(signal.SIGINT) + _, alive = psutil.wait_procs(family, timeout=3) - # 给 10 秒宽限期 - _, alive = psutil.wait_procs(family, timeout=10) - - # ── 2) 强制 kill 仍存活的 ──────────────── + # 2) SIGKILL for p in alive: p.kill() - psutil.wait_procs(alive, timeout=10) - - # ── 3) 回收僵尸,确保句柄关闭 ──────────── - current_process.wait(timeout=10) + psutil.wait_procs(alive, timeout=3) + # 3) reap + current_process.wait(timeout=3) return "[✅ 已终止进程树 (SIGINT ➜ SIGKILL fallback)]\n" - except Exception as e: - return f"[❌ 终止失败: {e}]\n" + except Exception as exc: + return f"[❌ 终止失败: {exc}]\n" finally: current_process = None - - -# ---------------- 控制器 ---------------- +# ---------------- 控制器(仅负责启动) ---------------- def toggle_run( inputs, native, other, output_choices, api_url, api_token, @@ -244,35 +261,31 @@ def toggle_run( if not inputs: msg = "[❌ 错误] 必须至少选择一个输入源(API、本地、基准或自定义)才能开始运行。\n" - yield msg, False, gr.update(value="Run Evaluation") + yield msg, False, gr.update(value="Run Evaluation", interactive=True), gr.update(visible=False) return - if not is_running: - should_stop = False - if run_mode == "perf": - yield from run_perf( - inputs, native, other, output_choices, - api_url, api_token, - api_provider, dataset, - max_tokens, min_tokens, parallel_reqs, - max_prompt_len, num_requests, - model_override - ) - elif run_mode == "eval": - yield from run_eval_tool( - inputs, native, other, output_choices, - api_url, api_token, - api_provider, dataset, - max_tokens, min_tokens, parallel_reqs, - max_prompt_len, num_requests, - model_override - ) - elif run_mode == "app": - yield "[⚠️ 当前为 app 模式,请手动打开 http://localhost:7901 查看报告]", False, gr.update(value="Run Evaluation") - else: - msg = stop_eval() - yield msg, False, gr.update(value="Run Evaluation") - + should_stop = False + if run_mode == "perf": + yield from run_perf( + inputs, native, other, output_choices, + api_url, api_token, + api_provider, dataset, + max_tokens, min_tokens, parallel_reqs, + max_prompt_len, num_requests, + model_override + ) + elif run_mode == "eval": + yield from run_eval_tool( + inputs, native, other, output_choices, + api_url, api_token, + api_provider, dataset, + max_tokens, min_tokens, parallel_reqs, + max_prompt_len, num_requests, + model_override + ) + elif run_mode == "app": + info = "[⚠️ 当前为 app 模式,请手动打开 http://localhost:7901 查看报告]\n" + yield info, False, gr.update(value="Run Evaluation", interactive=True), gr.update(visible=False) # ---------------- 输入源互斥逻辑 ---------------- def enforce_input_exclusive_and_toggle_fields(selected): @@ -299,6 +312,7 @@ def enforce_input_exclusive_and_toggle_fields(selected): with gr.Blocks(title="EvalScope 全功能界面") as demo: is_running = gr.State(value=False) + # ── 顶栏:模式选择 ───────────────────────────── with gr.Group(): with gr.Row(): mode_dropdown = gr.Dropdown( @@ -308,6 +322,7 @@ with gr.Blocks(title="EvalScope 全功能界面") as demo: info="eval: 智力评测;perf: 性能评测;app: 可视化" ) + # ── 输入源选择 ──────────────────────────────── with gr.Group(): with gr.Row(): input_choices = gr.CheckboxGroup( @@ -316,32 +331,62 @@ with gr.Blocks(title="EvalScope 全功能界面") as demo: interactive=True ) + # ── API 参数 ───────────────────────────────── with gr.Column(visible=False) as api_fields: - api_url_input = gr.Textbox(label="API 地址", placeholder="https://.../v1/chat/completions") + api_url_input = gr.Textbox(label="API 地址", placeholder="https://.../v1/chat/completions") api_token_input = gr.Textbox(label="Token 密钥", type="password", placeholder="sk-xxx") with gr.Accordion("运行参数(可选修改)", open=False): with gr.Row(): - api_provider_dropdown = gr.Dropdown(label="API Provider", choices=["openai", "azure", "ollama", "gemini"], value="openai") - dataset_dropdown = gr.Dropdown(label="评测数据集 (--dataset)", choices=PERF_DATASETS, value=PERF_DATASETS[0]) + api_provider_dropdown = gr.Dropdown( + label="API Provider", + choices=["openai", "azure", "ollama", "gemini"], + value="openai" + ) + dataset_dropdown = gr.Dropdown( + label="评测数据集 (--dataset)", + choices=PERF_DATASETS, + value=PERF_DATASETS[0] + ) model_override_input = gr.Textbox(label="自定义模型名 (--model)", placeholder="llm-name") with gr.Row(): max_tokens_slider = gr.Slider(label="Max Tokens", minimum=256, maximum=8192, step=256, value=1024) min_tokens_slider = gr.Slider(label="Min Tokens", minimum=0, maximum=4096, step=64, value=1024) with gr.Row(): - parallel_slider = gr.Slider(label="并发请求数", minimum=1, maximum=100, step=1, value=1) - num_req_slider = gr.Slider(label="请求条数", minimum=1, maximum=1000, step=1, value=100) - max_prompt_len_slider = gr.Slider(label="最大 Prompt 长度", minimum=2048, maximum=262144, step=512, value=15360) + parallel_slider = gr.Slider(label="并发请求数", minimum=1, maximum=100, step=1, value=1) + num_req_slider = gr.Slider(label="请求条数", minimum=1, maximum=1000, step=1, value=100) + max_prompt_len_slider = gr.Slider( + label="最大 Prompt 长度", minimum=2048, maximum=262144, step=512, value=15360 + ) + # ── 本地/外部模块勾选 ────────────────────────── with gr.Row(): with gr.Column(): - native_choices = gr.CheckboxGroup(label="启用本地模块", choices=["Model Adapter", "Data Adapter", "Evaluator", "Perf Monitor"]) + native_choices = gr.CheckboxGroup( + label="启用本地模块", + choices=["Model Adapter", "Data Adapter", "Evaluator", "Perf Monitor"] + ) with gr.Column(): - other_choices = gr.CheckboxGroup(label="启用外部后端", choices=["OpenCompass", "VLMEvalKit", "RAGAS", "MTEB/CMTEB"]) + other_choices = gr.CheckboxGroup( + label="启用外部后端", + choices=["OpenCompass", "VLMEvalKit", "RAGAS", "MTEB/CMTEB"] + ) - output_choices = gr.CheckboxGroup(label="输出形式", choices=["Evaluation Report", "Gradio", "WandB", "Swanlab"]) - run_button = gr.Button("Run Evaluation") - output_text = gr.TextArea(label="执行结果", lines=20, interactive=False, show_copy_button=True) + # ── 输出开关 ───────────────────────────────── + output_choices = gr.CheckboxGroup( + label="输出形式", + choices=["Evaluation Report", "Gradio", "WandB", "Swanlab"] + ) + # ── Run & Stop 按钮 ───────────────────────── + run_button = gr.Button("Run Evaluation", variant="primary") + stop_button = gr.Button("Stop Evaluation", variant="stop", visible=False) + + # ── 输出区域 ───────────────────────────────── + output_text = gr.TextArea( + label="执行结果", lines=20, interactive=False, show_copy_button=True + ) + + # ── 逻辑绑定 ───────────────────────────────── input_choices.change( fn=enforce_input_exclusive_and_toggle_fields, inputs=input_choices, @@ -357,6 +402,7 @@ with gr.Blocks(title="EvalScope 全功能界面") as demo: outputs=dataset_dropdown ) + # ---- Run 按钮(queue=True)---- run_button.click( fn=toggle_run, inputs=[ @@ -370,9 +416,23 @@ with gr.Blocks(title="EvalScope 全功能界面") as demo: is_running, mode_dropdown ], - outputs=[output_text, is_running, run_button], - show_progress=True + outputs=[output_text, is_running, run_button, stop_button], + show_progress=True, + queue=True ) + # ---- Stop 按钮(queue=False)---- + def stop_action(): + msg = stop_eval() + return msg, False, gr.update(value="Run Evaluation", interactive=True), gr.update(visible=False) + + stop_button.click( + fn=stop_action, + inputs=None, + outputs=[output_text, is_running, run_button, stop_button], + queue=False + ) + +# ---------------- 入口 ---------------- if __name__ == "__main__": demo.launch(server_name="0.0.0.0", server_port=7900) diff --git a/gradio_ui_2025_7_18.py b/gradio_ui_2025_7_18.py new file mode 100644 index 0000000..65aeb8f --- /dev/null +++ b/gradio_ui_2025_7_18.py @@ -0,0 +1,378 @@ +import time +import os +import glob +import threading +import subprocess +import gradio as gr +import psutil +import signal + +# ---------------- 全局进程句柄 ---------------- +current_process = None +should_stop = False + +# ---------------- 可选数据集 ---------------- +EVAL_DATASETS = [ + "arc", "bbh", "ceval", "cmmlu", "competition_math", "gsm8k", + "hellaswag", "humaneval", "mmlu", "mmlu_pro", "race", + "trivia_qa", "truthful_qa" +] + +PERF_DATASETS = ["openqa", "flickr8k", "longalpaca", "random_dataset", "line_by_line", "custom", "speed_benchmark"] + +# ---------------- perf 模式运行 ---------------- +def run_perf( + inputs, native, other, output_choices, + api_url, api_token, + api_provider, dataset, + max_tokens, min_tokens, parallel_reqs, + max_prompt_len, num_requests, + model_override +): + global current_process + + timestamp = time.strftime("%Y%m%d-%H%M%S") + model_name = model_override.strip() or timestamp + + command = [ + "evalscope", "perf", + "--url", api_url.strip(), + "--api", api_provider, + "--model", model_name, + "--dataset", dataset, + "--max-tokens", str(int(max_tokens)), + "--min-tokens", str(int(min_tokens)), + "--parallel", str(int(parallel_reqs)), + "--max-prompt-length", str(int(max_prompt_len)), + "--number", str(int(num_requests)), + "--api-key", api_token.strip(), + ] + + full_output = f"[Eval Started @ {timestamp}]\nCmd: {' '.join(command)}\n" + yield full_output, True, gr.update(value="Stop Evaluation") + + try: + current_process = subprocess.Popen( + command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, + text=True, bufsize=1, start_new_session=True + ) + + for line in current_process.stdout: + if should_stop: + break + full_output += line + yield full_output, True, gr.update(value="Stop Evaluation") + + current_process.stdout.close() + current_process.wait() + + except Exception as e: + full_output += f"[Error] {e}\n" + yield full_output, False, gr.update(value="Run Evaluation") + + finally: + current_process = None + + full_output += "[Eval Finished]\n" + + if "Evaluation Report" in output_choices: + vis_port = 7901 + outputs_root = "./outputs" + try: + latest_output = max( + glob.glob(os.path.join(outputs_root, "*")), + key=os.path.getmtime + ) + except ValueError: + latest_output = outputs_root + + vis_cmd = [ + "evalscope", "app", + "--outputs", outputs_root, + "--server-name", "0.0.0.0", + "--server-port", str(vis_port), + ] + threading.Thread( + target=subprocess.Popen, + args=(vis_cmd,), + kwargs={"stdout": subprocess.DEVNULL, "stderr": subprocess.STDOUT}, + daemon=True + ).start() + + full_output += f"[Visualization 👉] http://localhost:{vis_port}\n" + + yield full_output, False, gr.update(value="Run Evaluation") + +# ---------------- eval 模式运行 ---------------- +def run_eval_tool( + inputs, native, other, output_choices, + api_url, api_token, + api_provider, dataset, + max_tokens, min_tokens, parallel_reqs, + max_prompt_len, num_requests, + model_override +): + global current_process + + timestamp = time.strftime("%Y%m%d-%H%M%S") + model_name = model_override.strip() or timestamp + + command = [ + "evalscope", "eval", + "--model", model_name, + "--datasets", dataset + ] + if api_url.strip(): + command += [ + "--eval-type", "service", + "--api-url", api_url.strip(), + "--api-key", api_token.strip() + ] + if num_requests: + command += ["--limit", str(int(num_requests))] + + full_output = f"[Eval Started @ {timestamp}]\nCmd: {' '.join(command)}\n" + yield full_output, True, gr.update(value="Stop Evaluation") + + try: + current_process = subprocess.Popen( + command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, + text=True, bufsize=1, start_new_session=True + ) + + for line in current_process.stdout: + if should_stop: + break + full_output += line + yield full_output, True, gr.update(value="Stop Evaluation") + + current_process.stdout.close() + current_process.wait() + + except Exception as e: + full_output += f"[Error] {e}\n" + yield full_output, False, gr.update(value="Run Evaluation") + + finally: + current_process = None + + full_output += "[Eval Finished]\n" + + if "Evaluation Report" in output_choices: + vis_port = 7901 + outputs_root = "./outputs" + try: + latest_output = max( + glob.glob(os.path.join(outputs_root, "*")), + key=os.path.getmtime + ) + except ValueError: + latest_output = outputs_root + + vis_cmd = [ + "evalscope", "app", + "--outputs", outputs_root, + "--server-name", "0.0.0.0", + "--server-port", str(vis_port), + ] + threading.Thread( + target=subprocess.Popen, + args=(vis_cmd,), + kwargs={"stdout": subprocess.DEVNULL, "stderr": subprocess.STDOUT}, + daemon=True + ).start() + + full_output += f"[Visualization 👉] http://localhost:{vis_port}\n" + + yield full_output, False, gr.update(value="Run Evaluation") + +# ---------------- 停止函数 ---------------- +def stop_eval(): + """ + 彻底终止 current_process 及其全部子孙进程: + 1. 先发 SIGINT(Ctrl‑C)尝试优雅退出 + 2. 3 秒内仍存活的进程升级为 SIGKILL + 3. 最后 wait() 主进程,防止僵尸 + """ + global current_process, should_stop + should_stop = True + + if not (current_process and current_process.poll() is None): + return "[⚠️ 无活动 evalscope 进程]\n" + + try: + parent = psutil.Process(current_process.pid) + family = parent.children(recursive=True) + [parent] # 整棵进程树 + + # ── 1) 尝试优雅终止 ────────────────────── + for p in family: + p.send_signal(signal.SIGINT) + + # 给 3 秒宽限期 + _, alive = psutil.wait_procs(family, timeout=3) + + # ── 2) 强制 kill 仍存活的 ──────────────── + for p in alive: + p.kill() + psutil.wait_procs(alive, timeout=3) + + # ── 3) 回收僵尸,确保句柄关闭 ──────────── + current_process.wait(timeout=3) + + return "[✅ 已终止进程树 (SIGINT ➜ SIGKILL fallback)]\n" + + except Exception as e: + return f"[❌ 终止失败: {e}]\n" + + finally: + current_process = None + + + +# ---------------- 控制器 ---------------- +def toggle_run( + inputs, native, other, output_choices, + api_url, api_token, + api_provider, dataset, + max_tokens, min_tokens, parallel_reqs, + max_prompt_len, num_requests, + model_override, + is_running, + run_mode +): + global should_stop + + if not inputs: + msg = "[❌ 错误] 必须至少选择一个输入源(API、本地、基准或自定义)才能开始运行。\n" + yield msg, False, gr.update(value="Run Evaluation") + return + + if not is_running: + should_stop = False + if run_mode == "perf": + yield from run_perf( + inputs, native, other, output_choices, + api_url, api_token, + api_provider, dataset, + max_tokens, min_tokens, parallel_reqs, + max_prompt_len, num_requests, + model_override + ) + elif run_mode == "eval": + yield from run_eval_tool( + inputs, native, other, output_choices, + api_url, api_token, + api_provider, dataset, + max_tokens, min_tokens, parallel_reqs, + max_prompt_len, num_requests, + model_override + ) + elif run_mode == "app": + yield "[⚠️ 当前为 app 模式,请手动打开 http://localhost:7901 查看报告]", False, gr.update(value="Run Evaluation") + else: + msg = stop_eval() + yield msg, False, gr.update(value="Run Evaluation") + + +# ---------------- 输入源互斥逻辑 ---------------- +def enforce_input_exclusive_and_toggle_fields(selected): + order = ["API Models", "Local Models", "Benchmarks", "Custom Datasets"] + group1 = {"API Models", "Local Models"} + group2 = {"Benchmarks", "Custom Datasets"} + + def keep_only_one(group): + filtered = [item for item in selected if item in group] + return filtered[-1:] + + final_sel = set(selected) + final_sel -= group1 + final_sel |= set(keep_only_one(group1)) + final_sel -= group2 + final_sel |= set(keep_only_one(group2)) + + final_list = [itm for itm in order if itm in final_sel] + input_update = gr.update() if list(selected) == final_list else gr.update(value=final_list) + api_field_update = gr.update(visible="API Models" in final_sel) + return input_update, api_field_update + +# ---------------- UI 构建 ---------------- +with gr.Blocks(title="EvalScope 全功能界面") as demo: + is_running = gr.State(value=False) + + with gr.Group(): + with gr.Row(): + mode_dropdown = gr.Dropdown( + label="评测类型", + choices=["eval", "perf", "app"], + value="perf", + info="eval: 智力评测;perf: 性能评测;app: 可视化" + ) + + with gr.Group(): + with gr.Row(): + input_choices = gr.CheckboxGroup( + label="选择输入源", + choices=["API Models", "Local Models", "Benchmarks", "Custom Datasets"], + interactive=True + ) + + with gr.Column(visible=False) as api_fields: + api_url_input = gr.Textbox(label="API 地址", placeholder="https://.../v1/chat/completions") + api_token_input = gr.Textbox(label="Token 密钥", type="password", placeholder="sk-xxx") + with gr.Accordion("运行参数(可选修改)", open=False): + with gr.Row(): + api_provider_dropdown = gr.Dropdown(label="API Provider", choices=["openai", "azure", "ollama", "gemini"], value="openai") + dataset_dropdown = gr.Dropdown(label="评测数据集 (--dataset)", choices=PERF_DATASETS, value=PERF_DATASETS[0]) + model_override_input = gr.Textbox(label="自定义模型名 (--model)", placeholder="llm-name") + with gr.Row(): + max_tokens_slider = gr.Slider(label="Max Tokens", minimum=256, maximum=8192, step=256, value=1024) + min_tokens_slider = gr.Slider(label="Min Tokens", minimum=0, maximum=4096, step=64, value=1024) + with gr.Row(): + parallel_slider = gr.Slider(label="并发请求数", minimum=1, maximum=100, step=1, value=1) + num_req_slider = gr.Slider(label="请求条数", minimum=1, maximum=1000, step=1, value=100) + max_prompt_len_slider = gr.Slider(label="最大 Prompt 长度", minimum=2048, maximum=262144, step=512, value=15360) + + with gr.Row(): + with gr.Column(): + native_choices = gr.CheckboxGroup(label="启用本地模块", choices=["Model Adapter", "Data Adapter", "Evaluator", "Perf Monitor"]) + with gr.Column(): + other_choices = gr.CheckboxGroup(label="启用外部后端", choices=["OpenCompass", "VLMEvalKit", "RAGAS", "MTEB/CMTEB"]) + + output_choices = gr.CheckboxGroup(label="输出形式", choices=["Evaluation Report", "Gradio", "WandB", "Swanlab"]) + run_button = gr.Button("Run Evaluation") + output_text = gr.TextArea(label="执行结果", lines=20, interactive=False, show_copy_button=True) + + input_choices.change( + fn=enforce_input_exclusive_and_toggle_fields, + inputs=input_choices, + outputs=[input_choices, api_fields] + ) + + mode_dropdown.change( + lambda mode: gr.update( + choices=EVAL_DATASETS if mode == "eval" else PERF_DATASETS, + value=EVAL_DATASETS[0] if mode == "eval" else PERF_DATASETS[0] + ), + inputs=mode_dropdown, + outputs=dataset_dropdown + ) + + run_button.click( + fn=toggle_run, + inputs=[ + input_choices, native_choices, other_choices, + output_choices, + api_url_input, api_token_input, + api_provider_dropdown, dataset_dropdown, + max_tokens_slider, min_tokens_slider, parallel_slider, + max_prompt_len_slider, num_req_slider, + model_override_input, + is_running, + mode_dropdown + ], + outputs=[output_text, is_running, run_button], + show_progress=True + ) + +if __name__ == "__main__": + demo.launch(server_name="0.0.0.0", server_port=7900)