mirror of
https://github.com/Kitware/CMake.git
synced 2026-04-22 22:31:18 -05:00
Merge topic 'cuda-device-link-ar'
493671a5CUDA: Static libraries can now explicitly resolve device symbols8fb85c68CUDA: Makefile uses relative path for device linking status messagesa36fb229CUDA: Visual Studio now properly delays device linking Acked-by: Kitware Robot <kwrobot@kitware.com> Merge-request: !759
This commit is contained in:
@@ -154,6 +154,7 @@ Properties on Targets
|
||||
/prop_tgt/CROSSCOMPILING_EMULATOR
|
||||
/prop_tgt/CUDA_PTX_COMPILATION
|
||||
/prop_tgt/CUDA_SEPARABLE_COMPILATION
|
||||
/prop_tgt/CUDA_RESOLVE_DEVICE_SYMBOLS
|
||||
/prop_tgt/CUDA_EXTENSIONS
|
||||
/prop_tgt/CUDA_STANDARD
|
||||
/prop_tgt/CUDA_STANDARD_REQUIRED
|
||||
|
||||
@@ -0,0 +1,15 @@
|
||||
CUDA_RESOLVE_DEVICE_SYMBOLS
|
||||
---------------------------
|
||||
|
||||
CUDA only: Enables device linking for the specific static library target
|
||||
|
||||
If set this will enable device linking on this static library target. Normally
|
||||
device linking is deferred until a shared library or executable is generated,
|
||||
allowing for multiple static libraries to resolve device symbols at the same
|
||||
time.
|
||||
|
||||
For instance:
|
||||
|
||||
.. code-block:: cmake
|
||||
|
||||
set_property(TARGET mystaticlib PROPERTY CUDA_RESOLVE_DEVICE_SYMBOLS ON)
|
||||
@@ -39,9 +39,24 @@ std::string cmLinkLineDeviceComputer::ComputeLinkLibraries(
|
||||
continue;
|
||||
}
|
||||
|
||||
if (li->Target->GetType() == cmStateEnums::INTERFACE_LIBRARY ||
|
||||
li->Target->GetType() == cmStateEnums::SHARED_LIBRARY ||
|
||||
li->Target->GetType() == cmStateEnums::MODULE_LIBRARY) {
|
||||
bool skippable = false;
|
||||
switch (li->Target->GetType()) {
|
||||
case cmStateEnums::SHARED_LIBRARY:
|
||||
case cmStateEnums::MODULE_LIBRARY:
|
||||
case cmStateEnums::INTERFACE_LIBRARY:
|
||||
skippable = true;
|
||||
break;
|
||||
case cmStateEnums::STATIC_LIBRARY:
|
||||
// If a static library is resolving its device linking, it should
|
||||
// be removed for other device linking
|
||||
skippable =
|
||||
li->Target->GetPropertyAsBool("CUDA_RESOLVE_DEVICE_SYMBOLS");
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
if (skippable) {
|
||||
continue;
|
||||
}
|
||||
|
||||
|
||||
@@ -122,7 +122,11 @@ void cmMakefileExecutableTargetGenerator::WriteDeviceExecutableRule(
|
||||
std::string buildEcho = "Linking ";
|
||||
buildEcho += linkLanguage;
|
||||
buildEcho += " device code ";
|
||||
buildEcho += targetOutputReal;
|
||||
buildEcho += this->LocalGenerator->ConvertToOutputFormat(
|
||||
this->LocalGenerator->MaybeConvertToRelativePath(
|
||||
this->LocalGenerator->GetCurrentBinaryDirectory(),
|
||||
this->DeviceLinkObject),
|
||||
cmOutputConverter::SHELL);
|
||||
this->LocalGenerator->AppendEcho(
|
||||
commands, buildEcho, cmLocalUnixMakefileGenerator3::EchoLink, &progress);
|
||||
}
|
||||
|
||||
@@ -127,6 +127,24 @@ void cmMakefileLibraryTargetGenerator::WriteObjectLibraryRules()
|
||||
|
||||
void cmMakefileLibraryTargetGenerator::WriteStaticLibraryRules()
|
||||
{
|
||||
const std::string cuda_lang("CUDA");
|
||||
cmGeneratorTarget::LinkClosure const* closure =
|
||||
this->GeneratorTarget->GetLinkClosure(this->ConfigName);
|
||||
|
||||
const bool hasCUDA =
|
||||
(std::find(closure->Languages.begin(), closure->Languages.end(),
|
||||
cuda_lang) != closure->Languages.end());
|
||||
|
||||
const bool resolveDeviceSymbols =
|
||||
this->GeneratorTarget->GetPropertyAsBool("CUDA_RESOLVE_DEVICE_SYMBOLS");
|
||||
if (hasCUDA && resolveDeviceSymbols) {
|
||||
std::string linkRuleVar = "CMAKE_CUDA_DEVICE_LINK_LIBRARY";
|
||||
std::string extraFlags;
|
||||
this->LocalGenerator->AppendFlags(
|
||||
extraFlags, this->GeneratorTarget->GetProperty("LINK_FLAGS"));
|
||||
this->WriteDeviceLibraryRules(linkRuleVar, extraFlags, false);
|
||||
}
|
||||
|
||||
std::string linkLanguage =
|
||||
this->GeneratorTarget->GetLinkerLanguage(this->ConfigName);
|
||||
|
||||
@@ -292,8 +310,12 @@ void cmMakefileLibraryTargetGenerator::WriteDeviceLibraryRules(
|
||||
cmLocalUnixMakefileGenerator3::EchoProgress progress;
|
||||
this->MakeEchoProgress(progress);
|
||||
// Add the link message.
|
||||
std::string buildEcho = "Linking " + linkLanguage + " device code";
|
||||
buildEcho += targetOutputReal;
|
||||
std::string buildEcho = "Linking " + linkLanguage + " device code ";
|
||||
buildEcho += this->LocalGenerator->ConvertToOutputFormat(
|
||||
this->LocalGenerator->MaybeConvertToRelativePath(
|
||||
this->LocalGenerator->GetCurrentBinaryDirectory(),
|
||||
this->DeviceLinkObject),
|
||||
cmOutputConverter::SHELL);
|
||||
this->LocalGenerator->AppendEcho(
|
||||
commands, buildEcho, cmLocalUnixMakefileGenerator3::EchoLink, &progress);
|
||||
}
|
||||
@@ -857,6 +879,16 @@ void cmMakefileLibraryTargetGenerator::WriteLibraryRules(
|
||||
std::vector<std::string> object_strings;
|
||||
this->WriteObjectsStrings(object_strings, archiveCommandLimit);
|
||||
|
||||
// Add the cuda device object to the list of archive files. This will
|
||||
// only occur on archives which have CUDA_RESOLVE_DEVICE_SYMBOLS enabled
|
||||
if (!this->DeviceLinkObject.empty()) {
|
||||
object_strings.push_back(this->LocalGenerator->ConvertToOutputFormat(
|
||||
this->LocalGenerator->MaybeConvertToRelativePath(
|
||||
this->LocalGenerator->GetCurrentBinaryDirectory(),
|
||||
this->DeviceLinkObject),
|
||||
cmOutputConverter::SHELL));
|
||||
}
|
||||
|
||||
// Create the archive with the first set of objects.
|
||||
std::vector<std::string>::iterator osi = object_strings.begin();
|
||||
{
|
||||
|
||||
@@ -447,6 +447,7 @@ std::vector<std::string> cmNinjaNormalTargetGenerator::ComputeDeviceLinkCmd()
|
||||
// an executable or a dynamic library.
|
||||
std::string linkCmd;
|
||||
switch (this->GetGeneratorTarget()->GetType()) {
|
||||
case cmStateEnums::STATIC_LIBRARY:
|
||||
case cmStateEnums::SHARED_LIBRARY:
|
||||
case cmStateEnums::MODULE_LIBRARY: {
|
||||
const std::string cudaLinkCmd(
|
||||
@@ -559,11 +560,15 @@ void cmNinjaNormalTargetGenerator::WriteDeviceLinkStatement()
|
||||
case cmStateEnums::EXECUTABLE:
|
||||
shouldHaveDeviceLinking = true;
|
||||
break;
|
||||
case cmStateEnums::STATIC_LIBRARY:
|
||||
shouldHaveDeviceLinking =
|
||||
genTarget.GetPropertyAsBool("CUDA_RESOLVE_DEVICE_SYMBOLS");
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
if (!shouldHaveDeviceLinking || !hasCUDA) {
|
||||
if (!(shouldHaveDeviceLinking && hasCUDA)) {
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
@@ -116,6 +116,10 @@ cmVisualStudio10TargetGenerator::~cmVisualStudio10TargetGenerator()
|
||||
i != this->CudaOptions.end(); ++i) {
|
||||
delete i->second;
|
||||
}
|
||||
for (OptionsMap::iterator i = this->CudaLinkOptions.begin();
|
||||
i != this->CudaLinkOptions.end(); ++i) {
|
||||
delete i->second;
|
||||
}
|
||||
if (!this->BuildFileStream) {
|
||||
return;
|
||||
}
|
||||
@@ -213,6 +217,9 @@ void cmVisualStudio10TargetGenerator::Generate()
|
||||
if (!this->ComputeCudaOptions()) {
|
||||
return;
|
||||
}
|
||||
if (!this->ComputeCudaLinkOptions()) {
|
||||
return;
|
||||
}
|
||||
if (!this->ComputeMasmOptions()) {
|
||||
return;
|
||||
}
|
||||
@@ -2524,6 +2531,70 @@ void cmVisualStudio10TargetGenerator::WriteCudaOptions(
|
||||
this->WriteString("</CudaCompile>\n", 2);
|
||||
}
|
||||
|
||||
bool cmVisualStudio10TargetGenerator::ComputeCudaLinkOptions()
|
||||
{
|
||||
if (!this->GlobalGenerator->IsCudaEnabled()) {
|
||||
return true;
|
||||
}
|
||||
for (std::vector<std::string>::const_iterator i =
|
||||
this->Configurations.begin();
|
||||
i != this->Configurations.end(); ++i) {
|
||||
if (!this->ComputeCudaLinkOptions(*i)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool cmVisualStudio10TargetGenerator::ComputeCudaLinkOptions(
|
||||
std::string const& configName)
|
||||
{
|
||||
cmGlobalVisualStudio10Generator* gg =
|
||||
static_cast<cmGlobalVisualStudio10Generator*>(this->GlobalGenerator);
|
||||
CM_AUTO_PTR<Options> pOptions(new Options(
|
||||
this->LocalGenerator, Options::CudaCompiler, gg->GetCudaFlagTable()));
|
||||
Options& cudaLinkOptions = *pOptions;
|
||||
|
||||
// Determine if we need to do a device link
|
||||
bool doDeviceLinking = false;
|
||||
switch (this->GeneratorTarget->GetType()) {
|
||||
case cmStateEnums::SHARED_LIBRARY:
|
||||
case cmStateEnums::MODULE_LIBRARY:
|
||||
case cmStateEnums::EXECUTABLE:
|
||||
doDeviceLinking = true;
|
||||
break;
|
||||
case cmStateEnums::STATIC_LIBRARY:
|
||||
doDeviceLinking = this->GeneratorTarget->GetPropertyAsBool(
|
||||
"CUDA_RESOLVE_DEVICE_SYMBOLS");
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
cudaLinkOptions.AddFlag("PerformDeviceLink",
|
||||
doDeviceLinking ? "true" : "false");
|
||||
|
||||
this->CudaLinkOptions[configName] = pOptions.release();
|
||||
return true;
|
||||
}
|
||||
|
||||
void cmVisualStudio10TargetGenerator::WriteCudaLinkOptions(
|
||||
std::string const& configName)
|
||||
{
|
||||
if (this->GeneratorTarget->GetType() > cmStateEnums::MODULE_LIBRARY) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (!this->MSTools || !this->GlobalGenerator->IsCudaEnabled()) {
|
||||
return;
|
||||
}
|
||||
|
||||
this->WriteString("<CudaLink>\n", 2);
|
||||
Options& cudaLinkOptions = *(this->CudaLinkOptions[configName]);
|
||||
cudaLinkOptions.OutputFlagMap(*this->BuildFileStream, " ");
|
||||
this->WriteString("</CudaLink>\n", 2);
|
||||
}
|
||||
|
||||
bool cmVisualStudio10TargetGenerator::ComputeMasmOptions()
|
||||
{
|
||||
if (!this->GlobalGenerator->IsMasmEnabled()) {
|
||||
@@ -3283,6 +3354,7 @@ void cmVisualStudio10TargetGenerator::WriteItemDefinitionGroups()
|
||||
}
|
||||
// output link flags <Link></Link>
|
||||
this->WriteLinkOptions(*i);
|
||||
this->WriteCudaLinkOptions(*i);
|
||||
// output lib flags <Lib></Lib>
|
||||
this->WriteLibOptions(*i);
|
||||
// output manifest flags <Manifest></Manifest>
|
||||
|
||||
@@ -101,6 +101,11 @@ private:
|
||||
bool ComputeCudaOptions(std::string const& config);
|
||||
void WriteCudaOptions(std::string const& config,
|
||||
std::vector<std::string> const& includes);
|
||||
|
||||
bool ComputeCudaLinkOptions();
|
||||
bool ComputeCudaLinkOptions(std::string const& config);
|
||||
void WriteCudaLinkOptions(std::string const& config);
|
||||
|
||||
bool ComputeMasmOptions();
|
||||
bool ComputeMasmOptions(std::string const& config);
|
||||
void WriteMasmOptions(std::string const& config,
|
||||
@@ -154,6 +159,7 @@ private:
|
||||
OptionsMap ClOptions;
|
||||
OptionsMap RcOptions;
|
||||
OptionsMap CudaOptions;
|
||||
OptionsMap CudaLinkOptions;
|
||||
OptionsMap MasmOptions;
|
||||
OptionsMap NasmOptions;
|
||||
OptionsMap LinkOptions;
|
||||
|
||||
@@ -37,7 +37,7 @@ EXPORT int choose_cuda_device()
|
||||
<< std::endl;
|
||||
return 1;
|
||||
}
|
||||
if (prop.major >= 4) {
|
||||
if (prop.major >= 3) {
|
||||
err = cudaSetDevice(i);
|
||||
if (err != cudaSuccess) {
|
||||
std::cout << "Could not select CUDA device " << i << std::endl;
|
||||
|
||||
@@ -3,3 +3,4 @@ ADD_TEST_MACRO(CudaOnly.EnableStandard CudaOnlyEnableStandard)
|
||||
ADD_TEST_MACRO(CudaOnly.ExportPTX CudaOnlyExportPTX)
|
||||
ADD_TEST_MACRO(CudaOnly.SeparateCompilation CudaOnlySeparateCompilation)
|
||||
ADD_TEST_MACRO(CudaOnly.WithDefs CudaOnlyWithDefs)
|
||||
ADD_TEST_MACRO(CudaOnly.ResolveDeviceSymbols CudaOnlyResolveDeviceSymbols)
|
||||
|
||||
@@ -0,0 +1,52 @@
|
||||
cmake_minimum_required(VERSION 3.7)
|
||||
project (CudaOnlyResolveDeviceSymbols CUDA)
|
||||
|
||||
# Find nm and dumpbin
|
||||
if(CMAKE_NM)
|
||||
set(dump_command ${CMAKE_NM})
|
||||
set(dump_args -g)
|
||||
else()
|
||||
include(GetPrerequisites)
|
||||
message(STATUS "calling list_prerequisites to find dumpbin")
|
||||
list_prerequisites("${CMAKE_COMMAND}" 0 0 0)
|
||||
if(gp_dumpbin)
|
||||
set(dump_command ${gp_dumpbin})
|
||||
set(dump_args /ARCHIVEMEMBERS)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
#Goal for this example:
|
||||
#Build a static library that defines multiple methods and kernels that
|
||||
#use each other.
|
||||
#Use a custom command to build an executable that uses this static library
|
||||
#We do these together to verify that we can get a static library to do
|
||||
#device symbol linking, and not have it done when the executable is made
|
||||
string(APPEND CMAKE_CUDA_FLAGS " -gencode arch=compute_30,code=compute_30")
|
||||
set(CMAKE_CXX_STANDARD 11)
|
||||
set(CMAKE_CUDA_STANDARD 11)
|
||||
|
||||
add_library(CUDAResolveDeviceLib STATIC file1.cu file2.cu)
|
||||
set_target_properties(CUDAResolveDeviceLib
|
||||
PROPERTIES
|
||||
CUDA_SEPARABLE_COMPILATION ON
|
||||
CUDA_RESOLVE_DEVICE_SYMBOLS ON
|
||||
POSITION_INDEPENDENT_CODE ON)
|
||||
|
||||
if(dump_command)
|
||||
add_custom_command(TARGET CUDAResolveDeviceLib POST_BUILD
|
||||
COMMAND ${CMAKE_COMMAND}
|
||||
-DDUMP_COMMAND=${dump_command}
|
||||
-DDUMP_ARGS=${dump_args}
|
||||
-DTEST_LIBRARY_PATH=$<TARGET_FILE:CUDAResolveDeviceLib>
|
||||
-P ${CMAKE_CURRENT_SOURCE_DIR}/verify.cmake
|
||||
)
|
||||
endif()
|
||||
|
||||
add_executable(CudaOnlyResolveDeviceSymbols main.cu)
|
||||
target_link_libraries(CudaOnlyResolveDeviceSymbols PRIVATE CUDAResolveDeviceLib)
|
||||
|
||||
if(APPLE)
|
||||
# We need to add the default path to the driver (libcuda.dylib) as an rpath, so that
|
||||
# the static cuda runtime can find it at runtime.
|
||||
target_link_libraries(CudaOnlyResolveDeviceSymbols PRIVATE -Wl,-rpath,/usr/local/cuda/lib)
|
||||
endif()
|
||||
@@ -0,0 +1,10 @@
|
||||
|
||||
#include "file1.h"
|
||||
|
||||
result_type __device__ file1_func(int x)
|
||||
{
|
||||
result_type r;
|
||||
r.input = x;
|
||||
r.sum = x * x;
|
||||
return r;
|
||||
}
|
||||
@@ -0,0 +1,7 @@
|
||||
|
||||
#pragma once
|
||||
struct result_type
|
||||
{
|
||||
int input;
|
||||
int sum;
|
||||
};
|
||||
@@ -0,0 +1,25 @@
|
||||
|
||||
#include "file2.h"
|
||||
|
||||
result_type __device__ file1_func(int x);
|
||||
|
||||
result_type_dynamic __device__ file2_func(int x)
|
||||
{
|
||||
const result_type r = file1_func(x);
|
||||
const result_type_dynamic rd{ r.input, r.sum, true };
|
||||
return rd;
|
||||
}
|
||||
|
||||
static __global__ void file2_kernel(result_type_dynamic& r, int x)
|
||||
{
|
||||
// call static_func which is a method that is defined in the
|
||||
// static library that is always out of date
|
||||
r = file2_func(x);
|
||||
}
|
||||
|
||||
int file2_launch_kernel(int x)
|
||||
{
|
||||
result_type_dynamic r;
|
||||
file2_kernel<<<1, 1>>>(r, x);
|
||||
return r.sum;
|
||||
}
|
||||
@@ -0,0 +1,10 @@
|
||||
|
||||
#pragma once
|
||||
#include "file1.h"
|
||||
|
||||
struct result_type_dynamic
|
||||
{
|
||||
int input;
|
||||
int sum;
|
||||
bool from_static;
|
||||
};
|
||||
@@ -0,0 +1,85 @@
|
||||
|
||||
#include <iostream>
|
||||
|
||||
#include "file1.h"
|
||||
#include "file2.h"
|
||||
|
||||
int file2_launch_kernel(int x);
|
||||
|
||||
result_type_dynamic __device__ file2_func(int x);
|
||||
static __global__ void main_kernel(result_type_dynamic& r, int x)
|
||||
{
|
||||
// call function that was not device linked to us, this will cause
|
||||
// a runtime failure of "invalid device function"
|
||||
r = file2_func(x);
|
||||
}
|
||||
|
||||
int main_launch_kernel(int x)
|
||||
{
|
||||
result_type_dynamic r;
|
||||
main_kernel<<<1, 1>>>(r, x);
|
||||
return r.sum;
|
||||
}
|
||||
|
||||
int choose_cuda_device()
|
||||
{
|
||||
int nDevices = 0;
|
||||
cudaError_t err = cudaGetDeviceCount(&nDevices);
|
||||
if (err != cudaSuccess) {
|
||||
std::cerr << "Failed to retrieve the number of CUDA enabled devices"
|
||||
<< std::endl;
|
||||
return 1;
|
||||
}
|
||||
for (int i = 0; i < nDevices; ++i) {
|
||||
cudaDeviceProp prop;
|
||||
cudaError_t err = cudaGetDeviceProperties(&prop, i);
|
||||
if (err != cudaSuccess) {
|
||||
std::cerr << "Could not retrieve properties from CUDA device " << i
|
||||
<< std::endl;
|
||||
return 1;
|
||||
}
|
||||
std::cout << "prop.major: " << prop.major << std::endl;
|
||||
if (prop.major >= 3) {
|
||||
err = cudaSetDevice(i);
|
||||
if (err != cudaSuccess) {
|
||||
std::cout << "Could not select CUDA device " << i << std::endl;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::cout << "Could not find a CUDA enabled card supporting compute >=3.0"
|
||||
<< std::endl;
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
int main(int argc, char** argv)
|
||||
{
|
||||
int ret = choose_cuda_device();
|
||||
if (ret) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
cudaError_t err;
|
||||
file2_launch_kernel(42);
|
||||
err = cudaGetLastError();
|
||||
if (err != cudaSuccess) {
|
||||
std::cerr << "file2_launch_kernel: kernel launch failed: "
|
||||
<< cudaGetErrorString(err) << std::endl;
|
||||
return 1;
|
||||
}
|
||||
|
||||
main_launch_kernel(1);
|
||||
err = cudaGetLastError();
|
||||
if (err == cudaSuccess) {
|
||||
// This kernel launch should fail as the file2_func was device linked
|
||||
// into the static library and is not usable by the executable
|
||||
std::cerr << "main_launch_kernel: kernel launch should have failed"
|
||||
<< std::endl;
|
||||
return 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -0,0 +1,14 @@
|
||||
execute_process(COMMAND ${DUMP_COMMAND} ${DUMP_ARGS} ${TEST_LIBRARY_PATH}
|
||||
RESULT_VARIABLE RESULT
|
||||
OUTPUT_VARIABLE OUTPUT
|
||||
ERROR_VARIABLE ERROR
|
||||
)
|
||||
|
||||
if(NOT "${RESULT}" STREQUAL "0")
|
||||
message(FATAL_ERROR "${DUMP_COMMAND} failed [${RESULT}] [${OUTPUT}] [${ERROR}]")
|
||||
endif()
|
||||
|
||||
if(NOT "${OUTPUT}" MATCHES "(cmake_device_link|device-link)")
|
||||
message(FATAL_ERROR
|
||||
"No cuda device objects found, device linking did not occur")
|
||||
endif()
|
||||
@@ -12,6 +12,7 @@ project (CudaOnlySeparateCompilation CUDA)
|
||||
string(APPEND CMAKE_CUDA_FLAGS " -gencode arch=compute_30,code=compute_30")
|
||||
set(CMAKE_CXX_STANDARD 11)
|
||||
set(CMAKE_CUDA_STANDARD 11)
|
||||
|
||||
add_library(CUDASeparateLibA STATIC file1.cu file2.cu file3.cu)
|
||||
|
||||
#Having file4/file5 in a shared library causes serious problems
|
||||
@@ -22,12 +23,24 @@ add_library(CUDASeparateLibB STATIC file4.cu file5.cu)
|
||||
target_link_libraries(CUDASeparateLibB PRIVATE CUDASeparateLibA)
|
||||
|
||||
add_executable(CudaOnlySeparateCompilation main.cu)
|
||||
target_link_libraries(CudaOnlySeparateCompilation PRIVATE CUDASeparateLibB)
|
||||
target_link_libraries(CudaOnlySeparateCompilation
|
||||
PRIVATE CUDASeparateLibB)
|
||||
|
||||
set_target_properties( CUDASeparateLibA
|
||||
CUDASeparateLibB
|
||||
PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
|
||||
set_target_properties(CUDASeparateLibA
|
||||
CUDASeparateLibB
|
||||
PROPERTIES CUDA_SEPARABLE_COMPILATION ON
|
||||
POSITION_INDEPENDENT_CODE ON)
|
||||
|
||||
set_target_properties( CUDASeparateLibA
|
||||
CUDASeparateLibB
|
||||
PROPERTIES POSITION_INDEPENDENT_CODE ON)
|
||||
if (CMAKE_GENERATOR MATCHES "^Visual Studio")
|
||||
#Visual Studio CUDA integration will not perform device linking
|
||||
#on a target that itself does not have GenerateRelocatableDeviceCode
|
||||
#enabled.
|
||||
set_target_properties(CudaOnlySeparateCompilation
|
||||
PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
|
||||
endif()
|
||||
|
||||
if (APPLE)
|
||||
# We need to add the default path to the driver (libcuda.dylib) as an rpath, so that
|
||||
# the static cuda runtime can find it at runtime.
|
||||
target_link_libraries(CudaOnlySeparateCompilation PRIVATE -Wl,-rpath,/usr/local/cuda/lib)
|
||||
endif()
|
||||
|
||||
@@ -7,9 +7,62 @@
|
||||
int file4_launch_kernel(int x);
|
||||
int file5_launch_kernel(int x);
|
||||
|
||||
int choose_cuda_device()
|
||||
{
|
||||
int nDevices = 0;
|
||||
cudaError_t err = cudaGetDeviceCount(&nDevices);
|
||||
if (err != cudaSuccess) {
|
||||
std::cerr << "Failed to retrieve the number of CUDA enabled devices"
|
||||
<< std::endl;
|
||||
return 1;
|
||||
}
|
||||
for (int i = 0; i < nDevices; ++i) {
|
||||
cudaDeviceProp prop;
|
||||
cudaError_t err = cudaGetDeviceProperties(&prop, i);
|
||||
if (err != cudaSuccess) {
|
||||
std::cerr << "Could not retrieve properties from CUDA device " << i
|
||||
<< std::endl;
|
||||
return 1;
|
||||
}
|
||||
if (prop.major >= 3) {
|
||||
err = cudaSetDevice(i);
|
||||
if (err != cudaSuccess) {
|
||||
std::cout << "Could not select CUDA device " << i << std::endl;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::cout << "Could not find a CUDA enabled card supporting compute >=3.0"
|
||||
<< std::endl;
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
int main(int argc, char** argv)
|
||||
{
|
||||
int ret = choose_cuda_device();
|
||||
if (ret) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
cudaError_t err;
|
||||
file4_launch_kernel(42);
|
||||
err = cudaGetLastError();
|
||||
if (err != cudaSuccess) {
|
||||
std::cerr << "file4_launch_kernel: kernel launch failed: "
|
||||
<< cudaGetErrorString(err) << std::endl;
|
||||
return 1;
|
||||
}
|
||||
|
||||
file5_launch_kernel(42);
|
||||
err = cudaGetLastError();
|
||||
if (err != cudaSuccess) {
|
||||
std::cerr << "file5_launch_kernel: kernel launch failed: "
|
||||
<< cudaGetErrorString(err) << std::endl;
|
||||
return 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user