CUDA/NVIDIA: fix LTO with explicit architectures

This commit is contained in:
Rafael Stahl
2023-05-26 03:23:15 -04:00
committed by Brad King
parent a645a80040
commit 45584508fa
2 changed files with 10 additions and 3 deletions

View File

@@ -3504,7 +3504,7 @@ void cmGeneratorTarget::AddCUDAArchitectureFlags(cmBuildStep compileOrLink,
if (architecture.virtual_) {
flags += "compute_" + architecture.name;
if (architecture.real) {
if (ipoEnabled || architecture.real) {
flags += ",";
}
}

View File

@@ -9,16 +9,23 @@ project(DeviceLTO CUDA)
add_library(CUDA_dlto STATIC file1.cu file2.cu file3.cu)
add_executable(CudaOnlyDeviceLTO main.cu)
set(archs_to_test "${CMAKE_CUDA_ARCHITECTURES_ALL}")
if(CMAKE_CUDA_COMPILER_ID STREQUAL "NVIDIA")
# Also test with at least one virtual architecture.
list(POP_BACK CMAKE_CUDA_ARCHITECTURES_ALL_MAJOR latest_arch)
list(APPEND archs_to_test ${latest_arch}-virtual)
endif()
set_target_properties(CUDA_dlto
PROPERTIES
CUDA_ARCHITECTURES "${CMAKE_CUDA_ARCHITECTURES_ALL}"
CUDA_ARCHITECTURES "${archs_to_test}"
CUDA_SEPARABLE_COMPILATION ON
POSITION_INDEPENDENT_CODE ON)
set_target_properties(CudaOnlyDeviceLTO
PROPERTIES
CUDA_SEPARABLE_COMPILATION ON
CUDA_ARCHITECTURES "${CMAKE_CUDA_ARCHITECTURES_ALL}"
CUDA_ARCHITECTURES "${archs_to_test}"
)
target_link_libraries(CudaOnlyDeviceLTO PRIVATE CUDA_dlto)