evalscope_v0.17.0/gradio_ui.py

import time
import gradio as gr
import subprocess

def run_eval(inputs, native, other, outputs, api_url, api_token):
    timestamp = time.strftime("%Y%m%d-%H%M%S")  # 生成当前时间戳作为 model 名

    command = [
        "evalscope", "perf",
        "--url", api_url.strip(),
        "--api", "openai",
        "--model", timestamp,  # ✅ 使用时间戳作为 --model
        "--dataset", "openqa",
        "--max-tokens", "1024",
        "--min-tokens", "1024",
        "--parallel", "1",
        "--max-prompt-length", "15360",
        "--number", "100",
        "--api-key", api_token.strip(),
    ]

    yield f"[Eval Started @ {timestamp}]\n"
    try:
        process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True, bufsize=1)
        for line in process.stdout:
            yield line
        process.stdout.close()
        process.wait()
    except Exception as e:
        yield f"[Error] {str(e)}\n"
    yield "[Eval Finished]\n"
def enforce_input_exclusive_and_toggle_fields(selected):
    group1 = {"API Models", "Local Models"}
    group2 = {"Benchmarks", "Custom Datasets"}

    def keep_only_one(group):
        filtered = [item for item in selected if item in group]
        return filtered[-1:]

    final_selection = set(selected)
    final_selection -= group1
    final_selection |= set(keep_only_one(group1))

    final_selection -= group2
    final_selection |= set(keep_only_one(group2))

    show_api_fields = "API Models" in final_selection

    return (
        gr.update(value=list(final_selection)),
        gr.Row.update(visible=show_api_fields)
    )

with gr.Blocks(title="EvalScope 全功能界面") as demo:
    with gr.Group():
        with gr.Row():
            input_choices = gr.CheckboxGroup(
                label="选择输入源",
                choices=["API Models", "Local Models", "Benchmarks", "Custom Datasets"],
                interactive=True
            )

    with gr.Row(visible=False) as api_fields:
        api_url_input = gr.Textbox(label="API 地址", placeholder="https://api.example.com/v1/chat")
        api_token_input = gr.Textbox(label="Token 密钥", type="password", placeholder="sk-xxx")

    with gr.Row():
        with gr.Column():
            native_choices = gr.CheckboxGroup(
                label="启用本地模块",
                choices=["Model Adapter", "Data Adapter", "Evaluator", "Perf Monitor"]
            )

        with gr.Column():
            other_choices = gr.CheckboxGroup(
                label="启用外部后端",
                choices=["OpenCompass", "VLMEvalKit", "RAGAS", "MTEB/CMTEB"]
            )

    with gr.Row():
        output_choices = gr.CheckboxGroup(
            label="输出形式",
            choices=["Evaluation Report", "Gradio", "WandB", "Swanlab"]
        )

    run_button = gr.Button("Run Evaluation")
    output_text = gr.TextArea(label="执行结果", lines=20, interactive=False, show_copy_button=True)

    input_choices.change(
        fn=enforce_input_exclusive_and_toggle_fields,
        inputs=input_choices,
        outputs=[input_choices, api_fields]
    )

    run_button.click(
        fn=run_eval,
        inputs=[input_choices, native_choices, other_choices, output_choices, api_url_input, api_token_input],
        outputs=output_text,
        show_progress=True
    )

if __name__ == '__main__':
    demo.launch(server_name="0.0.0.0", server_port=7900)