import time import gradio as gr import subprocess def run_eval(inputs, native, other, outputs, api_url, api_token): timestamp = time.strftime("%Y%m%d-%H%M%S") # 生成当前时间戳作为 model 名 command = [ "evalscope", "perf", "--url", api_url.strip(), "--api", "openai", "--model", timestamp, # ✅ 使用时间戳作为 --model "--dataset", "openqa", "--max-tokens", "1024", "--min-tokens", "1024", "--parallel", "1", "--max-prompt-length", "15360", "--number", "100", "--api-key", api_token.strip(), ] yield f"[Eval Started @ {timestamp}]\n" try: process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True, bufsize=1) for line in process.stdout: yield line process.stdout.close() process.wait() except Exception as e: yield f"[Error] {str(e)}\n" yield "[Eval Finished]\n" def enforce_input_exclusive_and_toggle_fields(selected): group1 = {"API Models", "Local Models"} group2 = {"Benchmarks", "Custom Datasets"} def keep_only_one(group): filtered = [item for item in selected if item in group] return filtered[-1:] final_selection = set(selected) final_selection -= group1 final_selection |= set(keep_only_one(group1)) final_selection -= group2 final_selection |= set(keep_only_one(group2)) show_api_fields = "API Models" in final_selection return ( gr.update(value=list(final_selection)), gr.Row.update(visible=show_api_fields) ) with gr.Blocks(title="EvalScope 全功能界面") as demo: with gr.Group(): with gr.Row(): input_choices = gr.CheckboxGroup( label="选择输入源", choices=["API Models", "Local Models", "Benchmarks", "Custom Datasets"], interactive=True ) with gr.Row(visible=False) as api_fields: api_url_input = gr.Textbox(label="API 地址", placeholder="https://api.example.com/v1/chat") api_token_input = gr.Textbox(label="Token 密钥", type="password", placeholder="sk-xxx") with gr.Row(): with gr.Column(): native_choices = gr.CheckboxGroup( label="启用本地模块", choices=["Model Adapter", "Data Adapter", "Evaluator", "Perf Monitor"] ) with gr.Column(): other_choices = gr.CheckboxGroup( label="启用外部后端", choices=["OpenCompass", "VLMEvalKit", "RAGAS", "MTEB/CMTEB"] ) with gr.Row(): output_choices = gr.CheckboxGroup( label="输出形式", choices=["Evaluation Report", "Gradio", "WandB", "Swanlab"] ) run_button = gr.Button("Run Evaluation") output_text = gr.TextArea(label="执行结果", lines=20, interactive=False, show_copy_button=True) input_choices.change( fn=enforce_input_exclusive_and_toggle_fields, inputs=input_choices, outputs=[input_choices, api_fields] ) run_button.click( fn=run_eval, inputs=[input_choices, native_choices, other_choices, output_choices, api_url_input, api_token_input], outputs=output_text, show_progress=True ) if __name__ == '__main__': demo.launch(server_name="0.0.0.0", server_port=7900)