From 1e989784d37903ae449e4000bb5085927fbf282a Mon Sep 17 00:00:00 2001 From: hailin Date: Thu, 28 Aug 2025 22:54:46 +0800 Subject: [PATCH] . --- train_sft_ds.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/train_sft_ds.py b/train_sft_ds.py index c255748..e6bd4cb 100644 --- a/train_sft_ds.py +++ b/train_sft_ds.py @@ -487,7 +487,7 @@ def main(): torch_dtype=dtype, low_cpu_mem_usage=True, trust_remote_code=True, - attn_implementation="flash_attention_2" + attn_implementation="sdpa" ) print(f"GC enabled? {getattr(model, 'is_gradient_checkpointing', False)}", flush=True)