From ba8955b82b2808f80d212356a7836d4fe6cd9894 Mon Sep 17 00:00:00 2001 From: hailin Date: Fri, 29 Aug 2025 09:54:36 +0800 Subject: [PATCH] . --- train_sft_ds.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/train_sft_ds.py b/train_sft_ds.py index 3d5f1f0..e6bd4cb 100644 --- a/train_sft_ds.py +++ b/train_sft_ds.py @@ -487,7 +487,7 @@ def main(): torch_dtype=dtype, low_cpu_mem_usage=True, trust_remote_code=True, - attn_implementation="eager" + attn_implementation="sdpa" ) print(f"GC enabled? {getattr(model, 'is_gradient_checkpointing', False)}", flush=True)