CUDA: Add Device LTO support for nvcc

Fixes #22200
This commit is contained in:
Robert Maynard
2022-04-22 12:51:26 -04:00
parent 1527d48cd0
commit 96bc59b1ca
28 changed files with 412 additions and 58 deletions
@@ -0,0 +1,32 @@
cmake_minimum_required(VERSION 3.8)
project(CheckIPOSupported-CUDA LANGUAGES CUDA)
cmake_policy(SET CMP0069 NEW)
include(CheckIPOSupported)
check_ipo_supported(RESULT ipo_supported OUTPUT ipo_output)
if(ipo_supported)
set(CMAKE_INTERPROCEDURAL_OPTIMIZATION ON)
endif()
if(NOT ipo_supported AND CMAKE_CUDA_COMPILER_ID STREQUAL "NVIDIA"
AND CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 11.2)
message(FATAL_ERROR "CheckIPOSupported failed to correctly identify NVIDIA CUDA IPO support")
endif()
set(CMAKE_CUDA_SEPARABLE_COMPILATION ON)
add_library(foo STATIC foo.cu)
set_target_properties(foo PROPERTIES
WINDOWS_EXPORT_ALL_SYMBOLS ON
POSITION_INDEPENDENT_CODE ON)
add_library(bar SHARED bar.cu)
set_target_properties(bar PROPERTIES WINDOWS_EXPORT_ALL_SYMBOLS ON)
target_link_libraries(bar PRIVATE foo)
add_executable(CheckIPOSupported-CUDA main.cu)
target_link_libraries(CheckIPOSupported-CUDA PUBLIC bar)
enable_testing()
add_test(NAME CheckIPOSupported-CUDA COMMAND CheckIPOSupported-CUDA)
@@ -0,0 +1,12 @@
__device__ int foo_func(int);
void __global__ bar_kernel(int x)
{
foo_func(x);
}
int launch_kernel(int x)
{
bar_kernel<<<1, 1>>>(x);
return x;
}
@@ -0,0 +1,4 @@
extern __device__ int foo_func(int a)
{
return a * 42 + 9;
}
@@ -0,0 +1,62 @@
#include <iostream>
#include "cuda.h"
#ifdef _WIN32
# define IMPORT __declspec(dllimport)
#else
# define IMPORT
#endif
IMPORT int launch_kernel(int x);
int choose_cuda_device()
{
int nDevices = 0;
cudaError_t err = cudaGetDeviceCount(&nDevices);
if (err != cudaSuccess) {
std::cerr << "Failed to retrieve the number of CUDA enabled devices"
<< std::endl;
return 1;
}
for (int i = 0; i < nDevices; ++i) {
cudaDeviceProp prop;
cudaError_t err = cudaGetDeviceProperties(&prop, i);
if (err != cudaSuccess) {
std::cerr << "Could not retrieve properties from CUDA device " << i
<< std::endl;
return 1;
}
std::cout << "prop.major: " << prop.major << std::endl;
err = cudaSetDevice(i);
if (err != cudaSuccess) {
std::cout << "Could not select CUDA device " << i << std::endl;
} else {
return 0;
}
}
std::cout << "Could not find a CUDA enabled card" << std::endl;
return 1;
}
int main()
{
int ret = choose_cuda_device();
if (ret) {
return 0;
}
cudaError_t err;
launch_kernel(1);
err = cudaGetLastError();
if (err != cudaSuccess) {
std::cerr << "launch_kernel: kernel launch should have passed.\n "
"Error message: "
<< cudaGetErrorString(err) << std::endl;
return 1;
}
return 0;
}