Remove flash attention since it is already implemented in BetterTransformer

2026-05-25 01:58:21 -05:00 · 2025-03-31 10:40:32 -04:00
parent fa09dc37b5
commit 72ebc032e5
1 changed files with 0 additions and 1 deletions
@@ -18,7 +18,6 @@ class LLM:
            model_name,
            torch_dtype=torch.bfloat16,
            device_map=device,
-            attn_implementation='flash_attention_2',
        ).to(device)

        self.kv_cache = None