Remove flash attention since it is already implemented in BetterTransformer

This commit is contained in:
apun
2025-03-31 10:40:32 -04:00
parent fa09dc37b5
commit 72ebc032e5
-1
View File
@@ -18,7 +18,6 @@ class LLM:
model_name,
torch_dtype=torch.bfloat16,
device_map=device,
attn_implementation='flash_attention_2',
).to(device)
self.kv_cache = None