From fc42feee4fc5ac3a77b2e5707b2385c38b404cb3 Mon Sep 17 00:00:00 2001 From: hailin Date: Thu, 28 Aug 2025 23:19:34 +0800 Subject: [PATCH] . --- train_sft_ds.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/train_sft_ds.py b/train_sft_ds.py index e6bd4cb..3d5f1f0 100644 --- a/train_sft_ds.py +++ b/train_sft_ds.py @@ -487,7 +487,7 @@ def main(): torch_dtype=dtype, low_cpu_mem_usage=True, trust_remote_code=True, - attn_implementation="sdpa" + attn_implementation="eager" ) print(f"GC enabled? {getattr(model, 'is_gradient_checkpointing', False)}", flush=True)