From 9ead7c2129d7811a44e3be32b0eef2a12c062e65 Mon Sep 17 00:00:00 2001 From: Vito Gamberini Date: Thu, 20 Feb 2025 13:03:37 -0500 Subject: [PATCH] FindBLAS: Add support for NVPL --- .../ci/configure_debian12_aarch64_ninja.cmake | 13 +++- Help/release/dev/nvpl-support.rst | 5 ++ Modules/FindBLAS.cmake | 76 +++++++++++++++++++ Modules/FindLAPACK.cmake | 70 +++++++++++++++++ Tests/FindBLAS/add_BLAS_LAPACK_tests.cmake | 7 +- 5 files changed, 168 insertions(+), 3 deletions(-) create mode 100644 Help/release/dev/nvpl-support.rst diff --git a/.gitlab/ci/configure_debian12_aarch64_ninja.cmake b/.gitlab/ci/configure_debian12_aarch64_ninja.cmake index d2a401e9af..00843d5742 100644 --- a/.gitlab/ci/configure_debian12_aarch64_ninja.cmake +++ b/.gitlab/ci/configure_debian12_aarch64_ninja.cmake @@ -1,13 +1,22 @@ set(CMake_TEST_C_STANDARDS "90;99;11;17;23" CACHE STRING "") set(CMake_TEST_CXX_STANDARDS "98;11;14;17;20;23" CACHE STRING "") +set(blas_lapack_cases + All + static=1 Generic + model=lp64 + static=0 thread=SEQ NVPL thread=OMP NVPL thread= NVPL + model=ilp64 + static=0 thread=SEQ NVPL thread=OMP NVPL thread= NVPL + ) + set(CMake_TEST_CTestUpdate_BZR "ON" CACHE BOOL "") set(CMake_TEST_CTestUpdate_CVS "ON" CACHE BOOL "") set(CMake_TEST_CTestUpdate_GIT "ON" CACHE BOOL "") set(CMake_TEST_CTestUpdate_HG "ON" CACHE BOOL "") set(CMake_TEST_CTestUpdate_SVN "ON" CACHE BOOL "") set(CMake_TEST_FindALSA "ON" CACHE BOOL "") -set(CMake_TEST_FindBLAS "All;static=1;Generic" CACHE STRING "") +set(CMake_TEST_FindBLAS "${blas_lapack_cases}" CACHE STRING "") set(CMake_TEST_FindBoost "ON" CACHE BOOL "") set(CMake_TEST_FindBoost_Python "ON" CACHE BOOL "") set(CMake_TEST_FindBZip2 "ON" CACHE BOOL "") @@ -51,7 +60,7 @@ set(CMake_TEST_FindIntl "ON" CACHE BOOL "") set(CMake_TEST_FindJNI "ON" CACHE BOOL "") set(CMake_TEST_FindJPEG "ON" CACHE BOOL "") set(CMake_TEST_FindJsonCpp "ON" CACHE BOOL "") -set(CMake_TEST_FindLAPACK "All;static=1;Generic" CACHE STRING "") +set(CMake_TEST_FindLAPACK "${blas_lapack_cases}" CACHE STRING "") set(CMake_TEST_FindLibArchive "ON" CACHE BOOL "") set(CMake_TEST_FindLibinput "ON" CACHE BOOL "") set(CMake_TEST_FindLibLZMA "ON" CACHE BOOL "") diff --git a/Help/release/dev/nvpl-support.rst b/Help/release/dev/nvpl-support.rst new file mode 100644 index 0000000000..e8e5791245 --- /dev/null +++ b/Help/release/dev/nvpl-support.rst @@ -0,0 +1,5 @@ +NVPL Support +------------ + +* The :module:`FindBLAS` and :module:`FindLAPACK` modules now support the + NVIDIA Performance Libraries (NVPL). diff --git a/Modules/FindBLAS.cmake b/Modules/FindBLAS.cmake index 6d60c9d86a..b655abe378 100644 --- a/Modules/FindBLAS.cmake +++ b/Modules/FindBLAS.cmake @@ -54,6 +54,21 @@ The following variables may be set to influence this module's behavior: Search for any BLAS/LAPACK. Most likely, a BLAS/LAPACK with 32-bit integer interfaces will be found. +``BLA_THREAD`` + .. versionadded:: 4.1 + + Specify the BLAS/LAPACK threading model: + + ``SEQ`` + Sequential model + ``OMP`` + OpenMP model + ``ANY`` + Search for any BLAS/LAPACK, if both are available most likely ``OMP`` will + be found. + + This is currently only supported by NVIDIA NVPL. + Imported Targets ^^^^^^^^^^^^^^^^ @@ -169,6 +184,11 @@ BLAS/LAPACK Vendors A BLAS/LAPACK demuxing library using PLT trampolines +``NVPL`` + .. versionadded:: 4.1 + + NVIDIA Performance Libraries + ``NVHPC`` .. versionadded:: 3.21 @@ -417,6 +437,16 @@ else() message(FATAL_ERROR "BLA_SIZEOF_INTEGER can have only , ANY, 4, or 8 values") endif() +if(NOT BLA_THREAD) + set(_blas_thread "ANY") +elseif((BLA_THREAD STREQUAL "ANY") OR + (BLA_THREAD STREQUAL "SEQ") OR + (BLA_THREAD STREQUAL "OMP")) + set(_blas_thread ${BLA_THREAD}) +else() + message(FATAL_ERROR "BLA_THREAD can have only , ANY, SEQ, or OMP values") +endif() + # Implicitly linked BLAS libraries? if(BLA_VENDOR STREQUAL "All") if(NOT BLAS_LIBRARIES) @@ -1315,6 +1345,52 @@ if(NOT BLAS_LIBRARIES unset(_blas_fjlapack_lib) endif() +# nVidia NVPL? (https://developer.nvidia.com/nvpl) +if(BLA_VENDOR STREQUAL "NVPL" OR BLA_VENDOR STREQUAL "All") + # Prefer lp64 unless ilp64 is requested. + if((_blas_sizeof_integer EQUAL 4) OR (_blas_sizeof_integer STREQUAL "ANY")) + list(APPEND _blas_nvpl_ints "_lp64") + endif() + if((_blas_sizeof_integer EQUAL 8) OR (_blas_sizeof_integer STREQUAL "ANY")) + list(APPEND _blas_nvpl_ints "_ilp64") + endif() + + # Prefer OMP if available + if((_blas_thread STREQUAL "OMP") OR (_blas_thread STREQUAL "ANY")) + list(APPEND _blas_nvpl_threads "_omp") + endif() + if((_blas_thread STREQUAL "SEQ") OR (_blas_thread STREQUAL "ANY")) + list(APPEND _blas_nvpl_threads "_seq") + endif() + + if(NOT BLAS_LIBRARIES) + find_package(nvpl) + if(nvpl_FOUND) + foreach(_nvpl_thread IN LISTS _blas_nvpl_threads) + foreach(_nvpl_int IN LISTS _blas_nvpl_ints) + + set(_blas_lib "nvpl::blas${_nvpl_int}${_nvpl_thread}") + + if(TARGET ${_blas_lib}) + set(BLAS_LIBRARIES ${_blas_lib}) + break() + endif() + + endforeach() + + if(BLAS_LIBRARIES) + break() + endif() + + endforeach() + endif() + endif() + + unset(_blas_lib) + unset(_blas_nvpl_ints) + unset(_blas_nvpl_threads) +endif() + # BLAS in nVidia HPC SDK? (https://developer.nvidia.com/hpc-sdk) if(BLA_VENDOR STREQUAL "NVHPC" OR BLA_VENDOR STREQUAL "All") set(_blas_nvhpc_lib "blas") diff --git a/Modules/FindLAPACK.cmake b/Modules/FindLAPACK.cmake index 8c9d0b4773..92c6295079 100644 --- a/Modules/FindLAPACK.cmake +++ b/Modules/FindLAPACK.cmake @@ -55,6 +55,21 @@ The following variables may be set to influence this module's behavior: Search for any BLAS/LAPACK. Most likely, a BLAS/LAPACK with 32-bit integer interfaces will be found. +``BLA_THREAD`` + .. versionadded:: 4.1 + + Specify the BLAS/LAPACK threading model: + + ``SEQ`` + Sequential model + ``OMP`` + OpenMP model + ``ANY`` + Search for any BLAS/LAPACK, if both are available most likely ``OMP`` will + be found. + + This is currently only supported by NVIDIA NVPL. + Imported Targets ^^^^^^^^^^^^^^^^ @@ -278,6 +293,16 @@ else() message(FATAL_ERROR "BLA_SIZEOF_INTEGER can have only , ANY, 4, or 8 values") endif() +if(NOT BLA_THREAD) + set(_lapack_thread "ANY") +elseif((BLA_THREAD STREQUAL "ANY") OR + (BLA_THREAD STREQUAL "SEQ") OR + (BLA_THREAD STREQUAL "OMP")) + set(_lapack_thread ${BLA_THREAD}) +else() + message(FATAL_ERROR "BLA_THREAD can have only , ANY, SEQ, or OMP values") +endif() + # Load BLAS if(NOT LAPACK_NOT_FOUND_MESSAGE) _lapack_find_dependency(BLAS) @@ -693,6 +718,51 @@ if(NOT LAPACK_NOT_FOUND_MESSAGE) endif() endif() + # nVidia NVPL? (https://developer.nvidia.com/nvpl) + if(NOT LAPACK_LIBRARIES + AND (BLA_VENDOR MATCHES "NVPL" OR BLA_VENDOR STREQUAL "All")) + # Prefer lp64 unless ilp64 is requested. + if((_lapack_sizeof_integer EQUAL 4) OR (_lapack_sizeof_integer STREQUAL "ANY")) + list(APPEND _lapack_nvpl_ints "_lp64") + endif() + if((_lapack_sizeof_integer EQUAL 8) OR (_lapack_sizeof_integer STREQUAL "ANY")) + list(APPEND _lapack_nvpl_ints "_ilp64") + endif() + + # Prefer OMP if available + if((_lapack_thread STREQUAL "OMP") OR (_lapack_thread STREQUAL "ANY")) + list(APPEND _lapack_nvpl_threads "_omp") + endif() + if((_lapack_thread STREQUAL "SEQ") OR (_lapack_thread STREQUAL "ANY")) + list(APPEND _lapack_nvpl_threads "_seq") + endif() + + find_package(nvpl) + if(nvpl_FOUND) + foreach(_nvpl_thread IN LISTS _lapack_nvpl_threads) + foreach(_nvpl_int IN LISTS _lapack_nvpl_ints) + + set(_lapack_lib "nvpl::lapack${_nvpl_int}${_nvpl_thread}") + + if(TARGET ${_lapack_lib}) + set(LAPACK_LIBRARIES ${_lapack_lib}) + break() + endif() + + endforeach() + + if(LAPACK_LIBRARIES) + break() + endif() + + endforeach() + endif() + + unset(_lapack_lib) + unset(_lapack_nvpl_ints) + unset(_lapack_nvpl_threads) + endif() + # NVHPC Library? if(NOT LAPACK_LIBRARIES diff --git a/Tests/FindBLAS/add_BLAS_LAPACK_tests.cmake b/Tests/FindBLAS/add_BLAS_LAPACK_tests.cmake index 42fe386f9e..4fe0426e89 100644 --- a/Tests/FindBLAS/add_BLAS_LAPACK_tests.cmake +++ b/Tests/FindBLAS/add_BLAS_LAPACK_tests.cmake @@ -9,12 +9,13 @@ function(add_BLAS_LAPACK_tests var) set(compiler "") set(model "") set(static "") + set(thread "") set(sizeof_int_lp64 4) set(sizeof_int_ilp64 8) foreach(variant IN LISTS ${var}) - if(variant MATCHES "^(all|compiler|model|static)=(.*)$") + if(variant MATCHES "^(all|compiler|model|static|thread)=(.*)$") set("${CMAKE_MATCH_1}" "${CMAKE_MATCH_2}") continue() elseif(variant MATCHES "^([^=]+)=(.*)$") @@ -39,6 +40,10 @@ function(add_BLAS_LAPACK_tests var) string(APPEND variant_name "_Static") list(APPEND variant_options "-DBLA_STATIC=ON") endif() + if(thread) + string(APPEND variant_name "_${thread}") + list(APPEND variant_options "-DBLA_THREAD=${thread}") + endif() add_test(NAME Find${package}.Test_${variant_name} COMMAND ${CMAKE_CTEST_COMMAND} -C $ --build-and-test