Merge topic 'cuda_handle_target_objects_in_device_link'

cd984261e1 CUDA: Device linking now uses TARGET_OBJECTS content
aa8facefe8 CUDA: Visual Studio propagate objects to device linking

Acked-by: Kitware Robot <kwrobot@kitware.com>
Merge-request: !8829
This commit is contained in:
Brad King
2023-09-26 13:42:33 +00:00
committed by Kitware Robot
7 changed files with 112 additions and 22 deletions

View File

@@ -101,9 +101,7 @@ void cmLinkLineDeviceComputer::ComputeLinkLibraries(
ItemVector const& items = cli.GetItems();
std::string config = cli.GetConfig();
bool skipItemAfterFramework = false;
// Note:
// Any modification of this algorithm should be reflected also in
// cmVisualStudio10TargetGenerator::ComputeCudaLinkOptions
for (auto const& item : items) {
if (skipItemAfterFramework) {
skipItemAfterFramework = false;
@@ -132,11 +130,13 @@ void cmLinkLineDeviceComputer::ComputeLinkLibraries(
BT<std::string> linkLib;
if (item.IsPath == cmComputeLinkInformation::ItemIsPath::Yes) {
// nvcc understands absolute paths to libraries ending in '.a' or '.lib'.
// These should be passed to nvlink. Other extensions need to be left
// out because nvlink may not understand or need them. Even though it
// can tolerate '.so' or '.dylib' it cannot tolerate '.so.1'.
if (cmHasLiteralSuffix(item.Value.Value, ".a") ||
// nvcc understands absolute paths to libraries ending in '.o', .a', or
// '.lib'. These should be passed to nvlink. Other extensions need to be
// left out because nvlink may not understand or need them. Even though
// it can tolerate '.so' or '.dylib' it cannot tolerate '.so.1'.
if (cmHasLiteralSuffix(item.Value.Value, ".o") ||
cmHasLiteralSuffix(item.Value.Value, ".obj") ||
cmHasLiteralSuffix(item.Value.Value, ".a") ||
cmHasLiteralSuffix(item.Value.Value, ".lib")) {
linkLib.Value = item
.GetFormattedItem(this->ConvertToOutputFormat(

View File

@@ -3864,22 +3864,41 @@ bool cmVisualStudio10TargetGenerator::ComputeCudaLinkOptions(
}
cudaLinkOptions.AppendFlagString("AdditionalOptions", linkFlags);
// For static libraries that have device linking enabled compute
// the libraries
if (this->GeneratorTarget->GetType() == cmStateEnums::STATIC_LIBRARY &&
doDeviceLinking) {
cmComputeLinkInformation& cli = *pcli;
cmLinkLineDeviceComputer computer(
this->LocalGenerator,
this->LocalGenerator->GetStateSnapshot().GetDirectory());
std::vector<BT<std::string>> btLibVec;
computer.ComputeLinkLibraries(cli, std::string{}, btLibVec);
if (doDeviceLinking) {
std::vector<std::string> libVec;
for (auto const& item : btLibVec) {
libVec.emplace_back(item.Value);
auto const& kinded = this->GeneratorTarget->GetKindedSources(configName);
// CMake conversion uses full paths when possible to allow deeper trees.
// However, CUDA 8.0 msbuild rules fail on absolute paths so for CUDA
// we must use relative paths.
const bool forceRelative = true;
for (cmGeneratorTarget::SourceAndKind const& si : kinded.Sources) {
switch (si.Kind) {
case cmGeneratorTarget::SourceKindExternalObject: {
std::string path =
this->ConvertPath(si.Source.Value->GetFullPath(), forceRelative);
ConvertToWindowsSlash(path);
libVec.emplace_back(std::move(path));
} break;
default:
break;
}
}
// For static libraries that have device linking enabled compute
// the libraries
if (this->GeneratorTarget->GetType() == cmStateEnums::STATIC_LIBRARY) {
cmComputeLinkInformation& cli = *pcli;
cmLinkLineDeviceComputer computer(
this->LocalGenerator,
this->LocalGenerator->GetStateSnapshot().GetDirectory());
std::vector<BT<std::string>> btLibVec;
computer.ComputeLinkLibraries(cli, std::string{}, btLibVec);
for (auto const& item : btLibVec) {
libVec.emplace_back(item.Value);
}
}
if (!libVec.empty()) {
cudaLinkOptions.AddFlag("AdditionalDependencies", libVec);
}
cudaLinkOptions.AddFlag("AdditionalDependencies", libVec);
}
this->CudaLinkOptions[configName] = std::move(pOptions);

View File

@@ -20,6 +20,7 @@ add_cuda_test_macro(CudaOnly.CircularLinkLine CudaOnlyCircularLinkLine)
add_cuda_test_macro(CudaOnly.ResolveDeviceSymbols CudaOnlyResolveDeviceSymbols)
add_cuda_test_macro(CudaOnly.SeparateCompilation main/CudaOnlySeparateCompilation)
add_cuda_test_macro(CudaOnly.SeparateCompilationPTX CudaOnlySeparateCompilationPTX)
add_cuda_test_macro(CudaOnly.SeparateCompilationTargetObjects CudaOnlySeparateCompilationTargetObjects)
if(CMake_TEST_CUDA AND NOT CMake_TEST_CUDA STREQUAL "Clang")
# Clang doesn't have flags for selecting the runtime.

View File

@@ -0,0 +1,18 @@
cmake_minimum_required(VERSION 3.25.5)
project(SeparateCompilationObjects LANGUAGES CUDA)
add_library(foo OBJECT foo.cu)
set_target_properties(foo PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
add_library(bar OBJECT bar.cu)
set_target_properties(bar PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
add_executable(CudaOnlySeparateCompilationTargetObjects main.cu)
set_target_properties(CudaOnlySeparateCompilationTargetObjects PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
target_link_libraries(CudaOnlySeparateCompilationTargetObjects PRIVATE $<TARGET_OBJECTS:foo> bar)
if(APPLE)
# Help the static cuda runtime find the driver (libcuda.dyllib) at runtime.
set_property(TARGET CudaOnlySeparateCompilationTargetObjects PROPERTY BUILD_RPATH ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES})
endif()

View File

@@ -0,0 +1,18 @@
#include <iostream>
#ifdef _WIN32
# define EXPORT __declspec(dllexport)
#else
# define EXPORT __attribute__((__visibility__("default")))
#endif
__global__ void b1()
{
}
EXPORT int bar()
{
b1<<<1, 1>>>();
return 0;
}

View File

@@ -0,0 +1,18 @@
#include <iostream>
#ifdef _WIN32
# define EXPORT __declspec(dllexport)
#else
# define EXPORT __attribute__((__visibility__("default")))
#endif
__global__ void k1()
{
}
EXPORT int foo()
{
k1<<<1, 1>>>();
return 0;
}

View File

@@ -0,0 +1,16 @@
// main.cu
#include <iostream>
#ifdef _WIN32
# define IMPORT __declspec(dllimport)
#else
# define IMPORT
#endif
IMPORT int foo();
IMPORT int bar();
int main(int argc, char**)
{
return foo() && bar();
}