CUDA: static lib device linking computes required static libs

Previously the CMake didn't compute the required set of libraries needed to properly device link a static library when CUDA_RESOLVE_DEVICE_SYMBOLS was enabled.
2026-04-23 22:58:37 -05:00 · 2019-08-27 13:52:55 -04:00
parent 09032f09f8
commit 2d7bb13da7
10 changed files with 138 additions and 56 deletions
@@ -16,21 +16,29 @@ else()
 endif()

 #Goal for this example:
-# Build a static library that defines multiple methods and kernels that
-# use each other.
-# Resolve the device symbols into that static library
-# Verify that we can't use those device symbols from anything that links
+# 1. Build two static libraries that defines multiple methods and kernels
+# 2. Resolve the device symbols into the second static library, therefore
+# confirming that the first static library is on the device link line
+# 3. Verify that we can't use those device symbols from anything that links
 # to the static library
-string(APPEND CMAKE_CUDA_FLAGS " -gencode arch=compute_30,code=[compute_30] -gencode arch=compute_50,code=\\\"compute_50\\\"")
+string(APPEND CMAKE_CUDA_FLAGS " -gencode arch=compute_30,code=[sm_30] -gencode arch=compute_50,code=\\\"compute_50\\\"")
 set(CMAKE_CXX_STANDARD 11)
 set(CMAKE_CUDA_STANDARD 11)

-add_library(CUDAResolveDeviceLib STATIC file1.cu file2.cu)
+add_library(CUDAResolveDeviceDepsA STATIC file1.cu)
+add_library(CUDAResolveDeviceDepsB STATIC file2.cu)
+set_target_properties(CUDAResolveDeviceDepsA CUDAResolveDeviceDepsB
+                      PROPERTIES
+                      CUDA_SEPARABLE_COMPILATION ON
+                      POSITION_INDEPENDENT_CODE ON)
+
+add_library(CUDAResolveDeviceLib STATIC file2_launch.cu)
 set_target_properties(CUDAResolveDeviceLib
                      PROPERTIES
                      CUDA_SEPARABLE_COMPILATION ON
                      CUDA_RESOLVE_DEVICE_SYMBOLS ON
                      POSITION_INDEPENDENT_CODE ON)
+target_link_libraries(CUDAResolveDeviceLib PRIVATE CUDAResolveDeviceDepsA CUDAResolveDeviceDepsB)

 if(dump_command)
 add_custom_command(TARGET CUDAResolveDeviceLib POST_BUILD
@@ -45,7 +53,8 @@ endif()
 add_executable(CudaOnlyResolveDeviceSymbols main.cu)
 set_target_properties(CudaOnlyResolveDeviceSymbols
                      PROPERTIES
-                      CUDA_SEPARABLE_COMPILATION ON)
+                      CUDA_SEPARABLE_COMPILATION OFF
+                      CUDA_RESOLVE_DEVICE_SYMBOLS OFF)

 target_link_libraries(CudaOnlyResolveDeviceSymbols PRIVATE CUDAResolveDeviceLib)

@@ -1,7 +1,10 @@

 #pragma once
+
 struct result_type
 {
  int input;
  int sum;
 };
+
+result_type __device__ file1_func(int x);
@@ -1,25 +1,9 @@

 #include "file2.h"

-result_type __device__ file1_func(int x);
-
 result_type_dynamic __device__ file2_func(int x)
 {
  const result_type r = file1_func(x);
  const result_type_dynamic rd{ r.input, r.sum, true };
  return rd;
 }
-
-static __global__ void file2_kernel(result_type_dynamic& r, int x)
-{
-  // call static_func which is a method that is defined in the
-  // static library that is always out of date
-  r = file2_func(x);
-}
-
-int file2_launch_kernel(int x)
-{
-  result_type_dynamic r;
-  file2_kernel<<<1, 1>>>(r, x);
-  return r.sum;
-}
@@ -8,3 +8,5 @@ struct result_type_dynamic
  int sum;
  bool from_static;
 };
+
+result_type_dynamic __device__ file2_func(int x);
@@ -0,0 +1,18 @@
+
+#include "file2.h"
+
+static __global__ void file2_kernel(result_type_dynamic& r, int x)
+{
+  // call static_func which is a method that is defined in the
+  // static library that is always out of date
+  r = file2_func(x);
+}
+
+static __global__ void file2_kernel(result_type_dynamic& r, int x);
+
+int file2_launch_kernel(int x)
+{
+  result_type_dynamic r;
+  file2_kernel<<<1, 1>>>(r, x);
+  return r.sum;
+}
@@ -1,26 +1,10 @@

 #include <iostream>

-#include "file1.h"
 #include "file2.h"

 int file2_launch_kernel(int x);

-result_type_dynamic __device__ file2_func(int x);
-static __global__ void main_kernel(result_type_dynamic& r, int x)
-{
-  // call function that was not device linked to us, this will cause
-  // a runtime failure of "invalid device function"
-  r = file2_func(x);
-}
-
-int main_launch_kernel(int x)
-{
-  result_type_dynamic r;
-  main_kernel<<<1, 1>>>(r, x);
-  return r.sum;
-}
-
 int choose_cuda_device()
 {
  int nDevices = 0;
@@ -62,12 +46,10 @@ int main(int argc, char** argv)
    return 0;
  }

-  main_launch_kernel(1);
+  file2_launch_kernel(1);
  cudaError_t err = cudaGetLastError();
-  if (err == cudaSuccess) {
-    // This kernel launch should fail as the file2_func was device linked
-    // into the static library and is not usable by the executable
-    std::cerr << "main_launch_kernel: kernel launch should have failed"
+  if (err != cudaSuccess) {
+    std::cerr << "file2_launch_kernel: kernel launch should have passed"
              << std::endl;
    return 1;
  }