CUDA: static lib device linking computes required static libs

Previously the CMake didn't compute the required set of libraries
needed to properly device link a static library when
CUDA_RESOLVE_DEVICE_SYMBOLS was enabled.
This commit is contained in:
Robert Maynard
2019-08-27 13:52:55 -04:00
parent 09032f09f8
commit 2d7bb13da7
10 changed files with 138 additions and 56 deletions
@@ -16,21 +16,29 @@ else()
endif()
#Goal for this example:
# Build a static library that defines multiple methods and kernels that
# use each other.
# Resolve the device symbols into that static library
# Verify that we can't use those device symbols from anything that links
# 1. Build two static libraries that defines multiple methods and kernels
# 2. Resolve the device symbols into the second static library, therefore
# confirming that the first static library is on the device link line
# 3. Verify that we can't use those device symbols from anything that links
# to the static library
string(APPEND CMAKE_CUDA_FLAGS " -gencode arch=compute_30,code=[compute_30] -gencode arch=compute_50,code=\\\"compute_50\\\"")
string(APPEND CMAKE_CUDA_FLAGS " -gencode arch=compute_30,code=[sm_30] -gencode arch=compute_50,code=\\\"compute_50\\\"")
set(CMAKE_CXX_STANDARD 11)
set(CMAKE_CUDA_STANDARD 11)
add_library(CUDAResolveDeviceLib STATIC file1.cu file2.cu)
add_library(CUDAResolveDeviceDepsA STATIC file1.cu)
add_library(CUDAResolveDeviceDepsB STATIC file2.cu)
set_target_properties(CUDAResolveDeviceDepsA CUDAResolveDeviceDepsB
PROPERTIES
CUDA_SEPARABLE_COMPILATION ON
POSITION_INDEPENDENT_CODE ON)
add_library(CUDAResolveDeviceLib STATIC file2_launch.cu)
set_target_properties(CUDAResolveDeviceLib
PROPERTIES
CUDA_SEPARABLE_COMPILATION ON
CUDA_RESOLVE_DEVICE_SYMBOLS ON
POSITION_INDEPENDENT_CODE ON)
target_link_libraries(CUDAResolveDeviceLib PRIVATE CUDAResolveDeviceDepsA CUDAResolveDeviceDepsB)
if(dump_command)
add_custom_command(TARGET CUDAResolveDeviceLib POST_BUILD
@@ -45,7 +53,8 @@ endif()
add_executable(CudaOnlyResolveDeviceSymbols main.cu)
set_target_properties(CudaOnlyResolveDeviceSymbols
PROPERTIES
CUDA_SEPARABLE_COMPILATION ON)
CUDA_SEPARABLE_COMPILATION OFF
CUDA_RESOLVE_DEVICE_SYMBOLS OFF)
target_link_libraries(CudaOnlyResolveDeviceSymbols PRIVATE CUDAResolveDeviceLib)
@@ -1,7 +1,10 @@
#pragma once
struct result_type
{
int input;
int sum;
};
result_type __device__ file1_func(int x);
@@ -1,25 +1,9 @@
#include "file2.h"
result_type __device__ file1_func(int x);
result_type_dynamic __device__ file2_func(int x)
{
const result_type r = file1_func(x);
const result_type_dynamic rd{ r.input, r.sum, true };
return rd;
}
static __global__ void file2_kernel(result_type_dynamic& r, int x)
{
// call static_func which is a method that is defined in the
// static library that is always out of date
r = file2_func(x);
}
int file2_launch_kernel(int x)
{
result_type_dynamic r;
file2_kernel<<<1, 1>>>(r, x);
return r.sum;
}
@@ -8,3 +8,5 @@ struct result_type_dynamic
int sum;
bool from_static;
};
result_type_dynamic __device__ file2_func(int x);
@@ -0,0 +1,18 @@
#include "file2.h"
static __global__ void file2_kernel(result_type_dynamic& r, int x)
{
// call static_func which is a method that is defined in the
// static library that is always out of date
r = file2_func(x);
}
static __global__ void file2_kernel(result_type_dynamic& r, int x);
int file2_launch_kernel(int x)
{
result_type_dynamic r;
file2_kernel<<<1, 1>>>(r, x);
return r.sum;
}
+3 -21
View File
@@ -1,26 +1,10 @@
#include <iostream>
#include "file1.h"
#include "file2.h"
int file2_launch_kernel(int x);
result_type_dynamic __device__ file2_func(int x);
static __global__ void main_kernel(result_type_dynamic& r, int x)
{
// call function that was not device linked to us, this will cause
// a runtime failure of "invalid device function"
r = file2_func(x);
}
int main_launch_kernel(int x)
{
result_type_dynamic r;
main_kernel<<<1, 1>>>(r, x);
return r.sum;
}
int choose_cuda_device()
{
int nDevices = 0;
@@ -62,12 +46,10 @@ int main(int argc, char** argv)
return 0;
}
main_launch_kernel(1);
file2_launch_kernel(1);
cudaError_t err = cudaGetLastError();
if (err == cudaSuccess) {
// This kernel launch should fail as the file2_func was device linked
// into the static library and is not usable by the executable
std::cerr << "main_launch_kernel: kernel launch should have failed"
if (err != cudaSuccess) {
std::cerr << "file2_launch_kernel: kernel launch should have passed"
<< std::endl;
return 1;
}