diff --git a/backend/python/vllm/requirements-cublas12-after.txt b/backend/python/vllm/requirements-cublas12-after.txt index 7bfe8efeb..9251ba608 100644 --- a/backend/python/vllm/requirements-cublas12-after.txt +++ b/backend/python/vllm/requirements-cublas12-after.txt @@ -1 +1 @@ -flash-attn \ No newline at end of file +https://github.com/Dao-AILab/flash-attention/releases/download/v2.8.3/flash_attn-2.8.3+cu12torch2.7cxx11abiTRUE-cp310-cp310-linux_x86_64.whl