diff --git a/gradio_ui.py b/gradio_ui.py
index f7f1e96..be854c7 100644
--- a/gradio_ui.py
+++ b/gradio_ui.py
@@ -18,6 +18,7 @@ import gradio as gr
 import psutil
 import signal
 import shlex
+import pathlib
 
 # ---------------- 全局进程句柄 ----------------
 current_process = None
@@ -34,11 +35,15 @@ PERF_DATASETS = [
     "line_by_line", "custom", "speed_benchmark"
 ]
 
+def toggle_dataset_file_visibility(ds):
+    return gr.update(visible=(ds == "line_by_line"))
+
 # ---------------- perf 模式运行 ----------------
 def run_perf(
     inputs, native, other, output_choices,
     api_url, api_token,
     api_provider, dataset,
+    dataset_path, 
     max_tokens, min_tokens, parallel_reqs,
     max_prompt_len, num_requests,
     model_override,
@@ -46,6 +51,11 @@ def run_perf(
 ):
     global current_process
 
+    if dataset == "line_by_line" and dataset_path is None:
+        msg = "[❌] 请选择 line_by_line 数据集文件 (.txt)"
+        yield msg, False, gr.update(value="Run Evaluation", interactive=True), gr.update(visible=False)
+        return
+
     timestamp = time.strftime("%Y%m%d-%H%M%S")
     model_name = model_override.strip() or timestamp
 
@@ -63,6 +73,8 @@ def run_perf(
         "--api-key", api_token.strip(),
     ]
 
+    if dataset == "line_by_line" and dataset_path:
+        command += ["--dataset-path", dataset_path]
     
     if extra_args.strip():
         command += shlex.split(extra_args.strip())
@@ -95,6 +107,8 @@ def run_perf(
 
     finally:
         current_process = None
+        if dataset_path:
+            pathlib.Path(dataset_path).unlink(missing_ok=True)
 
     full_output += "[Eval Finished]\n"
 
@@ -132,6 +146,7 @@ def run_eval_tool(
     inputs, native, other, output_choices,
     api_url, api_token,
     api_provider, dataset,
+    dataset_path,
     max_tokens, min_tokens, parallel_reqs,
     max_prompt_len, num_requests,
     model_override, extra_args
@@ -139,6 +154,11 @@ def run_eval_tool(
 ):
     global current_process
 
+    if dataset == "line_by_line" and dataset_path is None:
+        msg = "[❌] 请选择 line_by_line 数据集文件 (.txt)"
+        yield msg, False, gr.update(value="Run Evaluation", interactive=True), gr.update(visible=False)
+        return
+    
     timestamp = time.strftime("%Y%m%d-%H%M%S")
     model_name = model_override.strip() or timestamp
 
@@ -156,10 +176,12 @@ def run_eval_tool(
     if num_requests:
         command += ["--limit", str(int(num_requests))]
 
-
     if extra_args.strip():
         command += shlex.split(extra_args.strip())
 
+    if dataset == "line_by_line" and dataset_path:
+        command += ["--dataset-path", dataset_path]
+
     full_output = f"[Eval Started @ {timestamp}]\nCmd: {' '.join(command)}\n"
     yield full_output, True, gr.update(interactive=False), gr.update(visible=True)
 
@@ -188,6 +210,8 @@ def run_eval_tool(
 
     finally:
         current_process = None
+        if dataset_path:
+            pathlib.Path(dataset_path).unlink(missing_ok=True)
 
     full_output += "[Eval Finished]\n"
 
@@ -261,7 +285,7 @@ def stop_eval() -> str:
 def toggle_run(
     inputs, native, other, output_choices,
     api_url, api_token,
-    api_provider, dataset,
+    api_provider, dataset,  dataset_file,
     max_tokens, min_tokens, parallel_reqs,
     max_prompt_len, num_requests,
     model_override,
@@ -271,6 +295,8 @@ def toggle_run(
 ):
     global should_stop
 
+    dataset_path = dataset_file.name if dataset_file else None
+
     if not inputs:
         msg = "[❌ 错误] 必须至少选择一个输入源（API、本地、基准或自定义）才能开始运行。\n"
         yield msg, False, gr.update(value="Run Evaluation", interactive=True), gr.update(visible=False)
@@ -282,6 +308,7 @@ def toggle_run(
             inputs, native, other, output_choices,
             api_url, api_token,
             api_provider, dataset,
+            dataset_path,
             max_tokens, min_tokens, parallel_reqs,
             max_prompt_len, num_requests,
             model_override,
@@ -292,6 +319,7 @@ def toggle_run(
             inputs, native, other, output_choices,
             api_url, api_token,
             api_provider, dataset,
+            dataset_path,
             max_tokens, min_tokens, parallel_reqs,
             max_prompt_len, num_requests,
             model_override,
@@ -361,6 +389,11 @@ with gr.Blocks(title="EvalScope 全功能界面") as demo:
                     choices=PERF_DATASETS,
                     value=PERF_DATASETS[0]
                 )
+                dataset_file_input = gr.File(
+                    label="Line‑by‑line 数据集文件（txt）",
+                    file_types=[".txt"],           # 可改为 ["text/plain"]
+                    visible=False                  # 默认隐藏，选了 line_by_line 时再显示
+                )
             model_override_input = gr.Textbox(label="自定义模型名 (--model)", placeholder="llm-name")
             extra_args_input = gr.Textbox(label="额外 EvalScope 参数", placeholder="例如: --disable-cache --temperature 0.7")
             with gr.Row():
@@ -417,6 +450,12 @@ with gr.Blocks(title="EvalScope 全功能界面") as demo:
         outputs=dataset_dropdown
     )
 
+    dataset_dropdown.change(
+        toggle_dataset_file_visibility,
+        inputs=dataset_dropdown,
+        outputs=dataset_file_input
+    )
+
     # ---- Run 按钮（queue=True）----
     run_button.click(
         fn=toggle_run,
@@ -424,7 +463,7 @@ with gr.Blocks(title="EvalScope 全功能界面") as demo:
             input_choices, native_choices, other_choices,
             output_choices,
             api_url_input, api_token_input,
-            api_provider_dropdown, dataset_dropdown,
+            api_provider_dropdown, dataset_dropdown, dataset_file_input,
             max_tokens_slider, min_tokens_slider, parallel_slider,
             max_prompt_len_slider, num_req_slider,
             model_override_input,