diff --git a/gradio_ui.py b/gradio_ui.py index f7f1e96..be854c7 100644 --- a/gradio_ui.py +++ b/gradio_ui.py @@ -18,6 +18,7 @@ import gradio as gr import psutil import signal import shlex +import pathlib # ---------------- 全局进程句柄 ---------------- current_process = None @@ -34,11 +35,15 @@ PERF_DATASETS = [ "line_by_line", "custom", "speed_benchmark" ] +def toggle_dataset_file_visibility(ds): + return gr.update(visible=(ds == "line_by_line")) + # ---------------- perf 模式运行 ---------------- def run_perf( inputs, native, other, output_choices, api_url, api_token, api_provider, dataset, + dataset_path, max_tokens, min_tokens, parallel_reqs, max_prompt_len, num_requests, model_override, @@ -46,6 +51,11 @@ def run_perf( ): global current_process + if dataset == "line_by_line" and dataset_path is None: + msg = "[❌] 请选择 line_by_line 数据集文件 (.txt)" + yield msg, False, gr.update(value="Run Evaluation", interactive=True), gr.update(visible=False) + return + timestamp = time.strftime("%Y%m%d-%H%M%S") model_name = model_override.strip() or timestamp @@ -63,6 +73,8 @@ def run_perf( "--api-key", api_token.strip(), ] + if dataset == "line_by_line" and dataset_path: + command += ["--dataset-path", dataset_path] if extra_args.strip(): command += shlex.split(extra_args.strip()) @@ -95,6 +107,8 @@ def run_perf( finally: current_process = None + if dataset_path: + pathlib.Path(dataset_path).unlink(missing_ok=True) full_output += "[Eval Finished]\n" @@ -132,6 +146,7 @@ def run_eval_tool( inputs, native, other, output_choices, api_url, api_token, api_provider, dataset, + dataset_path, max_tokens, min_tokens, parallel_reqs, max_prompt_len, num_requests, model_override, extra_args @@ -139,6 +154,11 @@ def run_eval_tool( ): global current_process + if dataset == "line_by_line" and dataset_path is None: + msg = "[❌] 请选择 line_by_line 数据集文件 (.txt)" + yield msg, False, gr.update(value="Run Evaluation", interactive=True), gr.update(visible=False) + return + timestamp = time.strftime("%Y%m%d-%H%M%S") model_name = model_override.strip() or timestamp @@ -156,10 +176,12 @@ def run_eval_tool( if num_requests: command += ["--limit", str(int(num_requests))] - if extra_args.strip(): command += shlex.split(extra_args.strip()) + if dataset == "line_by_line" and dataset_path: + command += ["--dataset-path", dataset_path] + full_output = f"[Eval Started @ {timestamp}]\nCmd: {' '.join(command)}\n" yield full_output, True, gr.update(interactive=False), gr.update(visible=True) @@ -188,6 +210,8 @@ def run_eval_tool( finally: current_process = None + if dataset_path: + pathlib.Path(dataset_path).unlink(missing_ok=True) full_output += "[Eval Finished]\n" @@ -261,7 +285,7 @@ def stop_eval() -> str: def toggle_run( inputs, native, other, output_choices, api_url, api_token, - api_provider, dataset, + api_provider, dataset, dataset_file, max_tokens, min_tokens, parallel_reqs, max_prompt_len, num_requests, model_override, @@ -271,6 +295,8 @@ def toggle_run( ): global should_stop + dataset_path = dataset_file.name if dataset_file else None + if not inputs: msg = "[❌ 错误] 必须至少选择一个输入源(API、本地、基准或自定义)才能开始运行。\n" yield msg, False, gr.update(value="Run Evaluation", interactive=True), gr.update(visible=False) @@ -282,6 +308,7 @@ def toggle_run( inputs, native, other, output_choices, api_url, api_token, api_provider, dataset, + dataset_path, max_tokens, min_tokens, parallel_reqs, max_prompt_len, num_requests, model_override, @@ -292,6 +319,7 @@ def toggle_run( inputs, native, other, output_choices, api_url, api_token, api_provider, dataset, + dataset_path, max_tokens, min_tokens, parallel_reqs, max_prompt_len, num_requests, model_override, @@ -361,6 +389,11 @@ with gr.Blocks(title="EvalScope 全功能界面") as demo: choices=PERF_DATASETS, value=PERF_DATASETS[0] ) + dataset_file_input = gr.File( + label="Line‑by‑line 数据集文件(txt)", + file_types=[".txt"], # 可改为 ["text/plain"] + visible=False # 默认隐藏,选了 line_by_line 时再显示 + ) model_override_input = gr.Textbox(label="自定义模型名 (--model)", placeholder="llm-name") extra_args_input = gr.Textbox(label="额外 EvalScope 参数", placeholder="例如: --disable-cache --temperature 0.7") with gr.Row(): @@ -417,6 +450,12 @@ with gr.Blocks(title="EvalScope 全功能界面") as demo: outputs=dataset_dropdown ) + dataset_dropdown.change( + toggle_dataset_file_visibility, + inputs=dataset_dropdown, + outputs=dataset_file_input + ) + # ---- Run 按钮(queue=True)---- run_button.click( fn=toggle_run, @@ -424,7 +463,7 @@ with gr.Blocks(title="EvalScope 全功能界面") as demo: input_choices, native_choices, other_choices, output_choices, api_url_input, api_token_input, - api_provider_dropdown, dataset_dropdown, + api_provider_dropdown, dataset_dropdown, dataset_file_input, max_tokens_slider, min_tokens_slider, parallel_slider, max_prompt_len_slider, num_req_slider, model_override_input,