This commit is contained in:
parent
fcc5990439
commit
206d4e4370
45
gradio_ui.py
45
gradio_ui.py
|
|
@ -18,6 +18,7 @@ import gradio as gr
|
||||||
import psutil
|
import psutil
|
||||||
import signal
|
import signal
|
||||||
import shlex
|
import shlex
|
||||||
|
import pathlib
|
||||||
|
|
||||||
# ---------------- 全局进程句柄 ----------------
|
# ---------------- 全局进程句柄 ----------------
|
||||||
current_process = None
|
current_process = None
|
||||||
|
|
@ -34,11 +35,15 @@ PERF_DATASETS = [
|
||||||
"line_by_line", "custom", "speed_benchmark"
|
"line_by_line", "custom", "speed_benchmark"
|
||||||
]
|
]
|
||||||
|
|
||||||
|
def toggle_dataset_file_visibility(ds):
|
||||||
|
return gr.update(visible=(ds == "line_by_line"))
|
||||||
|
|
||||||
# ---------------- perf 模式运行 ----------------
|
# ---------------- perf 模式运行 ----------------
|
||||||
def run_perf(
|
def run_perf(
|
||||||
inputs, native, other, output_choices,
|
inputs, native, other, output_choices,
|
||||||
api_url, api_token,
|
api_url, api_token,
|
||||||
api_provider, dataset,
|
api_provider, dataset,
|
||||||
|
dataset_path,
|
||||||
max_tokens, min_tokens, parallel_reqs,
|
max_tokens, min_tokens, parallel_reqs,
|
||||||
max_prompt_len, num_requests,
|
max_prompt_len, num_requests,
|
||||||
model_override,
|
model_override,
|
||||||
|
|
@ -46,6 +51,11 @@ def run_perf(
|
||||||
):
|
):
|
||||||
global current_process
|
global current_process
|
||||||
|
|
||||||
|
if dataset == "line_by_line" and dataset_path is None:
|
||||||
|
msg = "[❌] 请选择 line_by_line 数据集文件 (.txt)"
|
||||||
|
yield msg, False, gr.update(value="Run Evaluation", interactive=True), gr.update(visible=False)
|
||||||
|
return
|
||||||
|
|
||||||
timestamp = time.strftime("%Y%m%d-%H%M%S")
|
timestamp = time.strftime("%Y%m%d-%H%M%S")
|
||||||
model_name = model_override.strip() or timestamp
|
model_name = model_override.strip() or timestamp
|
||||||
|
|
||||||
|
|
@ -63,6 +73,8 @@ def run_perf(
|
||||||
"--api-key", api_token.strip(),
|
"--api-key", api_token.strip(),
|
||||||
]
|
]
|
||||||
|
|
||||||
|
if dataset == "line_by_line" and dataset_path:
|
||||||
|
command += ["--dataset-path", dataset_path]
|
||||||
|
|
||||||
if extra_args.strip():
|
if extra_args.strip():
|
||||||
command += shlex.split(extra_args.strip())
|
command += shlex.split(extra_args.strip())
|
||||||
|
|
@ -95,6 +107,8 @@ def run_perf(
|
||||||
|
|
||||||
finally:
|
finally:
|
||||||
current_process = None
|
current_process = None
|
||||||
|
if dataset_path:
|
||||||
|
pathlib.Path(dataset_path).unlink(missing_ok=True)
|
||||||
|
|
||||||
full_output += "[Eval Finished]\n"
|
full_output += "[Eval Finished]\n"
|
||||||
|
|
||||||
|
|
@ -132,6 +146,7 @@ def run_eval_tool(
|
||||||
inputs, native, other, output_choices,
|
inputs, native, other, output_choices,
|
||||||
api_url, api_token,
|
api_url, api_token,
|
||||||
api_provider, dataset,
|
api_provider, dataset,
|
||||||
|
dataset_path,
|
||||||
max_tokens, min_tokens, parallel_reqs,
|
max_tokens, min_tokens, parallel_reqs,
|
||||||
max_prompt_len, num_requests,
|
max_prompt_len, num_requests,
|
||||||
model_override, extra_args
|
model_override, extra_args
|
||||||
|
|
@ -139,6 +154,11 @@ def run_eval_tool(
|
||||||
):
|
):
|
||||||
global current_process
|
global current_process
|
||||||
|
|
||||||
|
if dataset == "line_by_line" and dataset_path is None:
|
||||||
|
msg = "[❌] 请选择 line_by_line 数据集文件 (.txt)"
|
||||||
|
yield msg, False, gr.update(value="Run Evaluation", interactive=True), gr.update(visible=False)
|
||||||
|
return
|
||||||
|
|
||||||
timestamp = time.strftime("%Y%m%d-%H%M%S")
|
timestamp = time.strftime("%Y%m%d-%H%M%S")
|
||||||
model_name = model_override.strip() or timestamp
|
model_name = model_override.strip() or timestamp
|
||||||
|
|
||||||
|
|
@ -156,10 +176,12 @@ def run_eval_tool(
|
||||||
if num_requests:
|
if num_requests:
|
||||||
command += ["--limit", str(int(num_requests))]
|
command += ["--limit", str(int(num_requests))]
|
||||||
|
|
||||||
|
|
||||||
if extra_args.strip():
|
if extra_args.strip():
|
||||||
command += shlex.split(extra_args.strip())
|
command += shlex.split(extra_args.strip())
|
||||||
|
|
||||||
|
if dataset == "line_by_line" and dataset_path:
|
||||||
|
command += ["--dataset-path", dataset_path]
|
||||||
|
|
||||||
full_output = f"[Eval Started @ {timestamp}]\nCmd: {' '.join(command)}\n"
|
full_output = f"[Eval Started @ {timestamp}]\nCmd: {' '.join(command)}\n"
|
||||||
yield full_output, True, gr.update(interactive=False), gr.update(visible=True)
|
yield full_output, True, gr.update(interactive=False), gr.update(visible=True)
|
||||||
|
|
||||||
|
|
@ -188,6 +210,8 @@ def run_eval_tool(
|
||||||
|
|
||||||
finally:
|
finally:
|
||||||
current_process = None
|
current_process = None
|
||||||
|
if dataset_path:
|
||||||
|
pathlib.Path(dataset_path).unlink(missing_ok=True)
|
||||||
|
|
||||||
full_output += "[Eval Finished]\n"
|
full_output += "[Eval Finished]\n"
|
||||||
|
|
||||||
|
|
@ -261,7 +285,7 @@ def stop_eval() -> str:
|
||||||
def toggle_run(
|
def toggle_run(
|
||||||
inputs, native, other, output_choices,
|
inputs, native, other, output_choices,
|
||||||
api_url, api_token,
|
api_url, api_token,
|
||||||
api_provider, dataset,
|
api_provider, dataset, dataset_file,
|
||||||
max_tokens, min_tokens, parallel_reqs,
|
max_tokens, min_tokens, parallel_reqs,
|
||||||
max_prompt_len, num_requests,
|
max_prompt_len, num_requests,
|
||||||
model_override,
|
model_override,
|
||||||
|
|
@ -271,6 +295,8 @@ def toggle_run(
|
||||||
):
|
):
|
||||||
global should_stop
|
global should_stop
|
||||||
|
|
||||||
|
dataset_path = dataset_file.name if dataset_file else None
|
||||||
|
|
||||||
if not inputs:
|
if not inputs:
|
||||||
msg = "[❌ 错误] 必须至少选择一个输入源(API、本地、基准或自定义)才能开始运行。\n"
|
msg = "[❌ 错误] 必须至少选择一个输入源(API、本地、基准或自定义)才能开始运行。\n"
|
||||||
yield msg, False, gr.update(value="Run Evaluation", interactive=True), gr.update(visible=False)
|
yield msg, False, gr.update(value="Run Evaluation", interactive=True), gr.update(visible=False)
|
||||||
|
|
@ -282,6 +308,7 @@ def toggle_run(
|
||||||
inputs, native, other, output_choices,
|
inputs, native, other, output_choices,
|
||||||
api_url, api_token,
|
api_url, api_token,
|
||||||
api_provider, dataset,
|
api_provider, dataset,
|
||||||
|
dataset_path,
|
||||||
max_tokens, min_tokens, parallel_reqs,
|
max_tokens, min_tokens, parallel_reqs,
|
||||||
max_prompt_len, num_requests,
|
max_prompt_len, num_requests,
|
||||||
model_override,
|
model_override,
|
||||||
|
|
@ -292,6 +319,7 @@ def toggle_run(
|
||||||
inputs, native, other, output_choices,
|
inputs, native, other, output_choices,
|
||||||
api_url, api_token,
|
api_url, api_token,
|
||||||
api_provider, dataset,
|
api_provider, dataset,
|
||||||
|
dataset_path,
|
||||||
max_tokens, min_tokens, parallel_reqs,
|
max_tokens, min_tokens, parallel_reqs,
|
||||||
max_prompt_len, num_requests,
|
max_prompt_len, num_requests,
|
||||||
model_override,
|
model_override,
|
||||||
|
|
@ -361,6 +389,11 @@ with gr.Blocks(title="EvalScope 全功能界面") as demo:
|
||||||
choices=PERF_DATASETS,
|
choices=PERF_DATASETS,
|
||||||
value=PERF_DATASETS[0]
|
value=PERF_DATASETS[0]
|
||||||
)
|
)
|
||||||
|
dataset_file_input = gr.File(
|
||||||
|
label="Line‑by‑line 数据集文件(txt)",
|
||||||
|
file_types=[".txt"], # 可改为 ["text/plain"]
|
||||||
|
visible=False # 默认隐藏,选了 line_by_line 时再显示
|
||||||
|
)
|
||||||
model_override_input = gr.Textbox(label="自定义模型名 (--model)", placeholder="llm-name")
|
model_override_input = gr.Textbox(label="自定义模型名 (--model)", placeholder="llm-name")
|
||||||
extra_args_input = gr.Textbox(label="额外 EvalScope 参数", placeholder="例如: --disable-cache --temperature 0.7")
|
extra_args_input = gr.Textbox(label="额外 EvalScope 参数", placeholder="例如: --disable-cache --temperature 0.7")
|
||||||
with gr.Row():
|
with gr.Row():
|
||||||
|
|
@ -417,6 +450,12 @@ with gr.Blocks(title="EvalScope 全功能界面") as demo:
|
||||||
outputs=dataset_dropdown
|
outputs=dataset_dropdown
|
||||||
)
|
)
|
||||||
|
|
||||||
|
dataset_dropdown.change(
|
||||||
|
toggle_dataset_file_visibility,
|
||||||
|
inputs=dataset_dropdown,
|
||||||
|
outputs=dataset_file_input
|
||||||
|
)
|
||||||
|
|
||||||
# ---- Run 按钮(queue=True)----
|
# ---- Run 按钮(queue=True)----
|
||||||
run_button.click(
|
run_button.click(
|
||||||
fn=toggle_run,
|
fn=toggle_run,
|
||||||
|
|
@ -424,7 +463,7 @@ with gr.Blocks(title="EvalScope 全功能界面") as demo:
|
||||||
input_choices, native_choices, other_choices,
|
input_choices, native_choices, other_choices,
|
||||||
output_choices,
|
output_choices,
|
||||||
api_url_input, api_token_input,
|
api_url_input, api_token_input,
|
||||||
api_provider_dropdown, dataset_dropdown,
|
api_provider_dropdown, dataset_dropdown, dataset_file_input,
|
||||||
max_tokens_slider, min_tokens_slider, parallel_slider,
|
max_tokens_slider, min_tokens_slider, parallel_slider,
|
||||||
max_prompt_len_slider, num_req_slider,
|
max_prompt_len_slider, num_req_slider,
|
||||||
model_override_input,
|
model_override_input,
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue