This commit is contained in:
parent
c89dd8dd77
commit
0fd04efbec
116
gradio_ui.py
116
gradio_ui.py
|
|
@ -1,19 +1,29 @@
|
||||||
import time
|
import time
|
||||||
import gradio as gr
|
import os
|
||||||
|
import glob
|
||||||
|
import threading
|
||||||
import subprocess
|
import subprocess
|
||||||
|
import gradio as gr
|
||||||
|
|
||||||
# 全局变量:当前子进程
|
# 全局变量:当前子进程
|
||||||
current_process = None
|
current_process = None
|
||||||
|
|
||||||
# 启动 evalscope 的逻辑(支持 yield 输出)
|
|
||||||
def run_eval(inputs, native, other, outputs, api_url, api_token):
|
# ⬇️⬇️⬇️ 运行 EvalScope 并(可选)启动可视化服务 ⬇️⬇️⬇️
|
||||||
|
def run_eval(inputs, native, other, output_choices, api_url, api_token):
|
||||||
|
"""
|
||||||
|
1. 调用 `evalscope perf …` 跑基准测试
|
||||||
|
2. 若用户勾选 “Evaluation Report”,测试完成后后台启动
|
||||||
|
`evalscope app` Web 可视化服务,并在文本框追加访问链接
|
||||||
|
"""
|
||||||
global current_process
|
global current_process
|
||||||
|
|
||||||
timestamp = time.strftime("%Y%m%d-%H%M%S")
|
timestamp = time.strftime("%Y%m%d-%H%M%S")
|
||||||
command = [
|
command = [
|
||||||
"evalscope", "perf",
|
"evalscope", "perf",
|
||||||
"--url", api_url.strip(),
|
"--url", api_url.strip(),
|
||||||
"--api", "openai",
|
"--api", "openai",
|
||||||
"--model", timestamp,
|
"--model", timestamp, # 以时间戳当模型名,避免冲突
|
||||||
"--dataset", "openqa",
|
"--dataset", "openqa",
|
||||||
"--max-tokens", "1024",
|
"--max-tokens", "1024",
|
||||||
"--min-tokens", "1024",
|
"--min-tokens", "1024",
|
||||||
|
|
@ -28,23 +38,63 @@ def run_eval(inputs, native, other, outputs, api_url, api_token):
|
||||||
|
|
||||||
try:
|
try:
|
||||||
current_process = subprocess.Popen(
|
current_process = subprocess.Popen(
|
||||||
command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True, bufsize=1
|
command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
|
||||||
|
text=True, bufsize=1
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# 实时流式输出
|
||||||
for line in current_process.stdout:
|
for line in current_process.stdout:
|
||||||
full_output += line
|
full_output += line
|
||||||
yield full_output, True, gr.update(value="Stop Evaluation")
|
yield full_output, True, gr.update(value="Stop Evaluation")
|
||||||
|
|
||||||
current_process.stdout.close()
|
current_process.stdout.close()
|
||||||
current_process.wait()
|
current_process.wait()
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
full_output += f"[Error] {str(e)}\n"
|
full_output += f"[Error] {e}\n"
|
||||||
yield full_output, False, gr.update(value="Run Evaluation")
|
yield full_output, False, gr.update(value="Run Evaluation")
|
||||||
|
|
||||||
finally:
|
finally:
|
||||||
current_process = None
|
current_process = None
|
||||||
|
|
||||||
full_output += "[Eval Finished]\n"
|
full_output += "[Eval Finished]\n"
|
||||||
|
|
||||||
|
# ========== 可视化报告 ==========
|
||||||
|
if "Evaluation Report" in output_choices:
|
||||||
|
vis_port = 7861
|
||||||
|
outputs_root = "./outputs"
|
||||||
|
# ⬇️ EvalScope perf 会在 outputs_root 下生成 timestamp 目录
|
||||||
|
# 这里额外取最新目录备用(目前 UI 只需要根目录)
|
||||||
|
try:
|
||||||
|
latest_output = max(
|
||||||
|
glob.glob(os.path.join(outputs_root, "*")),
|
||||||
|
key=os.path.getmtime
|
||||||
|
)
|
||||||
|
except ValueError:
|
||||||
|
latest_output = outputs_root # 保险:若 outputs 还不存在
|
||||||
|
|
||||||
|
vis_cmd = [
|
||||||
|
"evalscope", "app",
|
||||||
|
"--outputs", outputs_root,
|
||||||
|
"--server-name", "0.0.0.0",
|
||||||
|
"--server-port", str(vis_port),
|
||||||
|
]
|
||||||
|
|
||||||
|
# 后台线程启动,不阻塞 UI
|
||||||
|
threading.Thread(
|
||||||
|
target=subprocess.Popen,
|
||||||
|
args=(vis_cmd,),
|
||||||
|
kwargs={"stdout": subprocess.DEVNULL,
|
||||||
|
"stderr": subprocess.STDOUT},
|
||||||
|
daemon=True
|
||||||
|
).start()
|
||||||
|
|
||||||
|
full_output += f"[Visualization 👉] http://localhost:{vis_port}\n"
|
||||||
|
|
||||||
yield full_output, False, gr.update(value="Run Evaluation")
|
yield full_output, False, gr.update(value="Run Evaluation")
|
||||||
|
|
||||||
# 停止当前 evalscope 子进程
|
|
||||||
|
# ⬇️⬇️⬇️ 停止按钮逻辑 ⬇️⬇️⬇️
|
||||||
def stop_eval():
|
def stop_eval():
|
||||||
global current_process
|
global current_process
|
||||||
if current_process and current_process.poll() is None:
|
if current_process and current_process.poll() is None:
|
||||||
|
|
@ -53,15 +103,21 @@ def stop_eval():
|
||||||
return "[Stopped by user]\n"
|
return "[Stopped by user]\n"
|
||||||
return "[No active process]\n"
|
return "[No active process]\n"
|
||||||
|
|
||||||
# Run/Stop 按钮控制器(必须是 generator)
|
|
||||||
def toggle_run(inputs, native, other, outputs, api_url, api_token, is_running):
|
# ⬇️⬇️⬇️ Run/Stop 控制器(必须是 generator) ⬇️⬇️⬇️
|
||||||
|
def toggle_run(inputs, native, other, output_choices,
|
||||||
|
api_url, api_token, is_running):
|
||||||
if not is_running:
|
if not is_running:
|
||||||
yield from run_eval(inputs, native, other, outputs, api_url, api_token)
|
# 开始跑
|
||||||
|
yield from run_eval(inputs, native, other,
|
||||||
|
output_choices, api_url, api_token)
|
||||||
else:
|
else:
|
||||||
|
# 用户点 Stop
|
||||||
msg = stop_eval()
|
msg = stop_eval()
|
||||||
yield msg, False, gr.update(value="Run Evaluation")
|
yield msg, False, gr.update(value="Run Evaluation")
|
||||||
|
|
||||||
# 控制输入互斥逻辑
|
|
||||||
|
# ⬇️⬇️⬇️ 互斥逻辑:同组保留最后一个选项 ⬇️⬇️⬇️
|
||||||
def enforce_input_exclusive_and_toggle_fields(selected):
|
def enforce_input_exclusive_and_toggle_fields(selected):
|
||||||
group1 = {"API Models", "Local Models"}
|
group1 = {"API Models", "Local Models"}
|
||||||
group2 = {"Benchmarks", "Custom Datasets"}
|
group2 = {"Benchmarks", "Custom Datasets"}
|
||||||
|
|
@ -78,39 +134,48 @@ def enforce_input_exclusive_and_toggle_fields(selected):
|
||||||
final_selection |= set(keep_only_one(group2))
|
final_selection |= set(keep_only_one(group2))
|
||||||
|
|
||||||
show_api_fields = "API Models" in final_selection
|
show_api_fields = "API Models" in final_selection
|
||||||
|
|
||||||
return (
|
return (
|
||||||
gr.update(value=list(final_selection)),
|
gr.update(value=list(final_selection)),
|
||||||
gr.Row.update(visible=show_api_fields)
|
gr.Row.update(visible=show_api_fields)
|
||||||
)
|
)
|
||||||
|
|
||||||
# 构建 Gradio UI
|
|
||||||
|
# ------------- 构建 Gradio UI -------------
|
||||||
with gr.Blocks(title="EvalScope 全功能界面") as demo:
|
with gr.Blocks(title="EvalScope 全功能界面") as demo:
|
||||||
is_running = gr.State(value=False) # 当前运行状态
|
is_running = gr.State(value=False)
|
||||||
|
|
||||||
with gr.Group():
|
with gr.Group():
|
||||||
with gr.Row():
|
with gr.Row():
|
||||||
input_choices = gr.CheckboxGroup(
|
input_choices = gr.CheckboxGroup(
|
||||||
label="选择输入源",
|
label="选择输入源",
|
||||||
choices=["API Models", "Local Models", "Benchmarks", "Custom Datasets"],
|
choices=["API Models", "Local Models",
|
||||||
|
"Benchmarks", "Custom Datasets"],
|
||||||
interactive=True
|
interactive=True
|
||||||
)
|
)
|
||||||
|
|
||||||
with gr.Row(visible=False) as api_fields:
|
with gr.Row(visible=False) as api_fields:
|
||||||
api_url_input = gr.Textbox(label="API 地址", placeholder="https://api.example.com/v1/chat")
|
api_url_input = gr.Textbox(
|
||||||
api_token_input = gr.Textbox(label="Token 密钥", type="password", placeholder="sk-xxx")
|
label="API 地址",
|
||||||
|
placeholder="https://api.example.com/v1/chat"
|
||||||
|
)
|
||||||
|
api_token_input = gr.Textbox(
|
||||||
|
label="Token 密钥",
|
||||||
|
type="password",
|
||||||
|
placeholder="sk-xxx"
|
||||||
|
)
|
||||||
|
|
||||||
with gr.Row():
|
with gr.Row():
|
||||||
with gr.Column():
|
with gr.Column():
|
||||||
native_choices = gr.CheckboxGroup(
|
native_choices = gr.CheckboxGroup(
|
||||||
label="启用本地模块",
|
label="启用本地模块",
|
||||||
choices=["Model Adapter", "Data Adapter", "Evaluator", "Perf Monitor"]
|
choices=["Model Adapter", "Data Adapter",
|
||||||
|
"Evaluator", "Perf Monitor"]
|
||||||
)
|
)
|
||||||
|
|
||||||
with gr.Column():
|
with gr.Column():
|
||||||
other_choices = gr.CheckboxGroup(
|
other_choices = gr.CheckboxGroup(
|
||||||
label="启用外部后端",
|
label="启用外部后端",
|
||||||
choices=["OpenCompass", "VLMEvalKit", "RAGAS", "MTEB/CMTEB"]
|
choices=["OpenCompass", "VLMEvalKit",
|
||||||
|
"RAGAS", "MTEB/CMTEB"]
|
||||||
)
|
)
|
||||||
|
|
||||||
with gr.Row():
|
with gr.Row():
|
||||||
|
|
@ -120,14 +185,21 @@ with gr.Blocks(title="EvalScope 全功能界面") as demo:
|
||||||
)
|
)
|
||||||
|
|
||||||
run_button = gr.Button("Run Evaluation")
|
run_button = gr.Button("Run Evaluation")
|
||||||
output_text = gr.TextArea(label="执行结果", lines=20, interactive=False, show_copy_button=True)
|
output_text = gr.TextArea(
|
||||||
|
label="执行结果",
|
||||||
|
lines=20,
|
||||||
|
interactive=False,
|
||||||
|
show_copy_button=True
|
||||||
|
)
|
||||||
|
|
||||||
|
# 绑定输入互斥
|
||||||
input_choices.change(
|
input_choices.change(
|
||||||
fn=enforce_input_exclusive_and_toggle_fields,
|
fn=enforce_input_exclusive_and_toggle_fields,
|
||||||
inputs=input_choices,
|
inputs=input_choices,
|
||||||
outputs=[input_choices, api_fields]
|
outputs=[input_choices, api_fields]
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# 绑定 Run/Stop
|
||||||
run_button.click(
|
run_button.click(
|
||||||
fn=toggle_run,
|
fn=toggle_run,
|
||||||
inputs=[
|
inputs=[
|
||||||
|
|
@ -138,5 +210,5 @@ with gr.Blocks(title="EvalScope 全功能界面") as demo:
|
||||||
show_progress=True
|
show_progress=True
|
||||||
)
|
)
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == "__main__":
|
||||||
demo.launch(server_name="0.0.0.0", server_port=7900)
|
demo.launch(server_name="0.0.0.0", server_port=7900)
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,142 @@
|
||||||
|
import time
|
||||||
|
import gradio as gr
|
||||||
|
import subprocess
|
||||||
|
|
||||||
|
# 全局变量:当前子进程
|
||||||
|
current_process = None
|
||||||
|
|
||||||
|
# 启动 evalscope 的逻辑(支持 yield 输出)
|
||||||
|
def run_eval(inputs, native, other, outputs, api_url, api_token):
|
||||||
|
global current_process
|
||||||
|
timestamp = time.strftime("%Y%m%d-%H%M%S")
|
||||||
|
command = [
|
||||||
|
"evalscope", "perf",
|
||||||
|
"--url", api_url.strip(),
|
||||||
|
"--api", "openai",
|
||||||
|
"--model", timestamp,
|
||||||
|
"--dataset", "openqa",
|
||||||
|
"--max-tokens", "1024",
|
||||||
|
"--min-tokens", "1024",
|
||||||
|
"--parallel", "1",
|
||||||
|
"--max-prompt-length", "15360",
|
||||||
|
"--number", "100",
|
||||||
|
"--api-key", api_token.strip(),
|
||||||
|
]
|
||||||
|
|
||||||
|
full_output = f"[Eval Started @ {timestamp}]\n"
|
||||||
|
yield full_output, True, gr.update(value="Stop Evaluation")
|
||||||
|
|
||||||
|
try:
|
||||||
|
current_process = subprocess.Popen(
|
||||||
|
command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True, bufsize=1
|
||||||
|
)
|
||||||
|
for line in current_process.stdout:
|
||||||
|
full_output += line
|
||||||
|
yield full_output, True, gr.update(value="Stop Evaluation")
|
||||||
|
current_process.stdout.close()
|
||||||
|
current_process.wait()
|
||||||
|
except Exception as e:
|
||||||
|
full_output += f"[Error] {str(e)}\n"
|
||||||
|
yield full_output, False, gr.update(value="Run Evaluation")
|
||||||
|
finally:
|
||||||
|
current_process = None
|
||||||
|
|
||||||
|
full_output += "[Eval Finished]\n"
|
||||||
|
yield full_output, False, gr.update(value="Run Evaluation")
|
||||||
|
|
||||||
|
# 停止当前 evalscope 子进程
|
||||||
|
def stop_eval():
|
||||||
|
global current_process
|
||||||
|
if current_process and current_process.poll() is None:
|
||||||
|
current_process.terminate()
|
||||||
|
current_process = None
|
||||||
|
return "[Stopped by user]\n"
|
||||||
|
return "[No active process]\n"
|
||||||
|
|
||||||
|
# Run/Stop 按钮控制器(必须是 generator)
|
||||||
|
def toggle_run(inputs, native, other, outputs, api_url, api_token, is_running):
|
||||||
|
if not is_running:
|
||||||
|
yield from run_eval(inputs, native, other, outputs, api_url, api_token)
|
||||||
|
else:
|
||||||
|
msg = stop_eval()
|
||||||
|
yield msg, False, gr.update(value="Run Evaluation")
|
||||||
|
|
||||||
|
# 控制输入互斥逻辑
|
||||||
|
def enforce_input_exclusive_and_toggle_fields(selected):
|
||||||
|
group1 = {"API Models", "Local Models"}
|
||||||
|
group2 = {"Benchmarks", "Custom Datasets"}
|
||||||
|
|
||||||
|
def keep_only_one(group):
|
||||||
|
filtered = [item for item in selected if item in group]
|
||||||
|
return filtered[-1:]
|
||||||
|
|
||||||
|
final_selection = set(selected)
|
||||||
|
final_selection -= group1
|
||||||
|
final_selection |= set(keep_only_one(group1))
|
||||||
|
|
||||||
|
final_selection -= group2
|
||||||
|
final_selection |= set(keep_only_one(group2))
|
||||||
|
|
||||||
|
show_api_fields = "API Models" in final_selection
|
||||||
|
|
||||||
|
return (
|
||||||
|
gr.update(value=list(final_selection)),
|
||||||
|
gr.Row.update(visible=show_api_fields)
|
||||||
|
)
|
||||||
|
|
||||||
|
# 构建 Gradio UI
|
||||||
|
with gr.Blocks(title="EvalScope 全功能界面") as demo:
|
||||||
|
is_running = gr.State(value=False) # 当前运行状态
|
||||||
|
|
||||||
|
with gr.Group():
|
||||||
|
with gr.Row():
|
||||||
|
input_choices = gr.CheckboxGroup(
|
||||||
|
label="选择输入源",
|
||||||
|
choices=["API Models", "Local Models", "Benchmarks", "Custom Datasets"],
|
||||||
|
interactive=True
|
||||||
|
)
|
||||||
|
|
||||||
|
with gr.Row(visible=False) as api_fields:
|
||||||
|
api_url_input = gr.Textbox(label="API 地址", placeholder="https://api.example.com/v1/chat")
|
||||||
|
api_token_input = gr.Textbox(label="Token 密钥", type="password", placeholder="sk-xxx")
|
||||||
|
|
||||||
|
with gr.Row():
|
||||||
|
with gr.Column():
|
||||||
|
native_choices = gr.CheckboxGroup(
|
||||||
|
label="启用本地模块",
|
||||||
|
choices=["Model Adapter", "Data Adapter", "Evaluator", "Perf Monitor"]
|
||||||
|
)
|
||||||
|
|
||||||
|
with gr.Column():
|
||||||
|
other_choices = gr.CheckboxGroup(
|
||||||
|
label="启用外部后端",
|
||||||
|
choices=["OpenCompass", "VLMEvalKit", "RAGAS", "MTEB/CMTEB"]
|
||||||
|
)
|
||||||
|
|
||||||
|
with gr.Row():
|
||||||
|
output_choices = gr.CheckboxGroup(
|
||||||
|
label="输出形式",
|
||||||
|
choices=["Evaluation Report", "Gradio", "WandB", "Swanlab"]
|
||||||
|
)
|
||||||
|
|
||||||
|
run_button = gr.Button("Run Evaluation")
|
||||||
|
output_text = gr.TextArea(label="执行结果", lines=20, interactive=False, show_copy_button=True)
|
||||||
|
|
||||||
|
input_choices.change(
|
||||||
|
fn=enforce_input_exclusive_and_toggle_fields,
|
||||||
|
inputs=input_choices,
|
||||||
|
outputs=[input_choices, api_fields]
|
||||||
|
)
|
||||||
|
|
||||||
|
run_button.click(
|
||||||
|
fn=toggle_run,
|
||||||
|
inputs=[
|
||||||
|
input_choices, native_choices, other_choices,
|
||||||
|
output_choices, api_url_input, api_token_input, is_running
|
||||||
|
],
|
||||||
|
outputs=[output_text, is_running, run_button],
|
||||||
|
show_progress=True
|
||||||
|
)
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
demo.launch(server_name="0.0.0.0", server_port=7900)
|
||||||
Loading…
Reference in New Issue