mirror of
https://github.com/Kitware/CMake.git
synced 2026-05-03 21:00:01 -05:00
CUDA: Add abstraction for cuda runtime selection
Fixes #17559 Replace our hard-coded default of cudart=static with a first-class abstraction to select the runtime library from an enumeration of logical names.
This commit is contained in:
@@ -173,6 +173,7 @@ Properties on Targets
|
|||||||
/prop_tgt/CUDA_PTX_COMPILATION
|
/prop_tgt/CUDA_PTX_COMPILATION
|
||||||
/prop_tgt/CUDA_SEPARABLE_COMPILATION
|
/prop_tgt/CUDA_SEPARABLE_COMPILATION
|
||||||
/prop_tgt/CUDA_RESOLVE_DEVICE_SYMBOLS
|
/prop_tgt/CUDA_RESOLVE_DEVICE_SYMBOLS
|
||||||
|
/prop_tgt/CUDA_RUNTIME_LIBRARY
|
||||||
/prop_tgt/CUDA_EXTENSIONS
|
/prop_tgt/CUDA_EXTENSIONS
|
||||||
/prop_tgt/CUDA_STANDARD
|
/prop_tgt/CUDA_STANDARD
|
||||||
/prop_tgt/CUDA_STANDARD_REQUIRED
|
/prop_tgt/CUDA_STANDARD_REQUIRED
|
||||||
|
|||||||
@@ -372,6 +372,7 @@ Variables that Control the Build
|
|||||||
/variable/CMAKE_CTEST_ARGUMENTS
|
/variable/CMAKE_CTEST_ARGUMENTS
|
||||||
/variable/CMAKE_CUDA_SEPARABLE_COMPILATION
|
/variable/CMAKE_CUDA_SEPARABLE_COMPILATION
|
||||||
/variable/CMAKE_CUDA_RESOLVE_DEVICE_SYMBOLS
|
/variable/CMAKE_CUDA_RESOLVE_DEVICE_SYMBOLS
|
||||||
|
/variable/CMAKE_CUDA_RUNTIME_LIBRARY
|
||||||
/variable/CMAKE_DEBUG_POSTFIX
|
/variable/CMAKE_DEBUG_POSTFIX
|
||||||
/variable/CMAKE_DISABLE_PRECOMPILE_HEADERS
|
/variable/CMAKE_DISABLE_PRECOMPILE_HEADERS
|
||||||
/variable/CMAKE_ENABLE_EXPORTS
|
/variable/CMAKE_ENABLE_EXPORTS
|
||||||
|
|||||||
@@ -0,0 +1,9 @@
|
|||||||
|
``None``
|
||||||
|
Link with ``-cudart=none`` or equivalent flag(s) to use no CUDA
|
||||||
|
runtime library.
|
||||||
|
``Shared``
|
||||||
|
Link with ``-cudart=shared`` or equivalent flag(s) to use a
|
||||||
|
dynamically-linked CUDA runtime library.
|
||||||
|
``Static``
|
||||||
|
Link with ``-cudart=static`` or equivalent flag(s) to use a
|
||||||
|
statically-linked CUDA runtime library.
|
||||||
@@ -0,0 +1,21 @@
|
|||||||
|
CUDA_RUNTIME_LIBRARY
|
||||||
|
--------------------
|
||||||
|
|
||||||
|
Select the CUDA runtime library for use by compilers targeting the CUDA language.
|
||||||
|
|
||||||
|
The allowed case insensitive values are:
|
||||||
|
|
||||||
|
.. include:: CUDA_RUNTIME_LIBRARY-VALUES.txt
|
||||||
|
|
||||||
|
Contents of ``CUDA_RUNTIME_LIBRARY`` may use
|
||||||
|
:manual:`generator expressions <cmake-generator-expressions(7)>`.
|
||||||
|
|
||||||
|
If this property is not set then CMake uses the default value
|
||||||
|
``Static`` to select the CUDA runtime library.
|
||||||
|
|
||||||
|
.. note::
|
||||||
|
|
||||||
|
This property has effect only when the ``CUDA`` language is enabled. To
|
||||||
|
control the CUDA runtime linking when only using the CUDA SDK with the
|
||||||
|
``C`` or ``C++`` language we recommend using the :module:`FindCUDAToolkit`
|
||||||
|
module.
|
||||||
@@ -0,0 +1,7 @@
|
|||||||
|
cuda-runtime-library
|
||||||
|
--------------------
|
||||||
|
|
||||||
|
* The :variable:`CMAKE_CUDA_RUNTIME_LIBRARY` variable and
|
||||||
|
:prop_tgt:`CUDA_RUNTIME_LIBRARY` target property were introduced to
|
||||||
|
select the CUDA runtime library used when linking targets that
|
||||||
|
use CUDA.
|
||||||
@@ -0,0 +1,24 @@
|
|||||||
|
CMAKE_CUDA_RUNTIME_LIBRARY
|
||||||
|
--------------------------
|
||||||
|
|
||||||
|
Select the CUDA runtime library for use by compilers targeting the MSVC ABI.
|
||||||
|
This variable is used to initialize the :prop_tgt:`CUDA_RUNTIME_LIBRARY`
|
||||||
|
property on all targets as they are created.
|
||||||
|
|
||||||
|
The allowed case insensitive values are:
|
||||||
|
|
||||||
|
.. include:: ../prop_tgt/CUDA_RUNTIME_LIBRARY-VALUES.txt
|
||||||
|
|
||||||
|
Contents of ``CMAKE_CUDA_RUNTIME_LIBRARY`` may use
|
||||||
|
:manual:`generator expressions <cmake-generator-expressions(7)>`.
|
||||||
|
|
||||||
|
If this variable is not set then the :prop_tgt:`CUDA_RUNTIME_LIBRARY` target
|
||||||
|
property will not be set automatically. If that property is not set then
|
||||||
|
CMake uses the default value ``Static`` to select the CUDA runtime library.
|
||||||
|
|
||||||
|
.. note::
|
||||||
|
|
||||||
|
This property has effect only when the ``CUDA`` language is enabled. To
|
||||||
|
control the CUDA runtime linking when only using the CUDA SDK with the
|
||||||
|
``C`` or ``C++`` language we recommend using the :module:`FindCUDAToolkit`
|
||||||
|
module.
|
||||||
@@ -67,6 +67,17 @@ else()
|
|||||||
set(CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES "${CMAKE_CUDA_HOST_IMPLICIT_LINK_DIRECTORIES}")
|
set(CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES "${CMAKE_CUDA_HOST_IMPLICIT_LINK_DIRECTORIES}")
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
# Remove the following libraries from CMAKE_CUDA_HOST_IMPLICIT_LINK_LIBRARIES and
|
||||||
|
# CMAKE_CUDA_IMPLICIT_LINK_LIBRARIES
|
||||||
|
#
|
||||||
|
# - cudart
|
||||||
|
# - cudart_static
|
||||||
|
# - cudadevrt
|
||||||
|
#
|
||||||
|
# These are controlled by CMAKE_CUDA_RUNTIME_LIBRARY
|
||||||
|
list(REMOVE_ITEM CMAKE_CUDA_IMPLICIT_LINK_LIBRARIES cudart cudart_static cudadevrt)
|
||||||
|
list(REMOVE_ITEM CMAKE_CUDA_HOST_IMPLICIT_LINK_LIBRARIES cudart cudart_static cudadevrt)
|
||||||
|
|
||||||
# Re-configure to save learned information.
|
# Re-configure to save learned information.
|
||||||
configure_file(
|
configure_file(
|
||||||
${CMAKE_ROOT}/Modules/CMakeCUDACompiler.cmake.in
|
${CMAKE_ROOT}/Modules/CMakeCUDACompiler.cmake.in
|
||||||
|
|||||||
@@ -43,6 +43,11 @@ endif()
|
|||||||
set(CMAKE_SHARED_LIBRARY_CREATE_CUDA_FLAGS -shared)
|
set(CMAKE_SHARED_LIBRARY_CREATE_CUDA_FLAGS -shared)
|
||||||
set(CMAKE_INCLUDE_SYSTEM_FLAG_CUDA -isystem=)
|
set(CMAKE_INCLUDE_SYSTEM_FLAG_CUDA -isystem=)
|
||||||
|
|
||||||
|
set(CMAKE_CUDA_RUNTIME_LIBRARY_DEFAULT "STATIC")
|
||||||
|
set(CMAKE_CUDA_RUNTIME_LIBRARY_LINK_OPTIONS_STATIC "cudadevrt;cudart_static")
|
||||||
|
set(CMAKE_CUDA_RUNTIME_LIBRARY_LINK_OPTIONS_SHARED "cudadevrt;cudart")
|
||||||
|
set(CMAKE_CUDA_RUNTIME_LIBRARY_LINK_OPTIONS_NONE "")
|
||||||
|
|
||||||
if("x${CMAKE_CUDA_SIMULATE_ID}" STREQUAL "xMSVC")
|
if("x${CMAKE_CUDA_SIMULATE_ID}" STREQUAL "xMSVC")
|
||||||
set(CMAKE_CUDA03_STANDARD_COMPILE_OPTION "")
|
set(CMAKE_CUDA03_STANDARD_COMPILE_OPTION "")
|
||||||
set(CMAKE_CUDA03_EXTENSION_COMPILE_OPTION "")
|
set(CMAKE_CUDA03_EXTENSION_COMPILE_OPTION "")
|
||||||
|
|||||||
@@ -69,6 +69,11 @@ else()
|
|||||||
endif()
|
endif()
|
||||||
unset(_cmp0092)
|
unset(_cmp0092)
|
||||||
|
|
||||||
|
set(CMAKE_CUDA_RUNTIME_LIBRARY_DEFAULT "STATIC")
|
||||||
|
set(CMAKE_CUDA_RUNTIME_LIBRARY_LINK_OPTIONS_STATIC "cudadevrt;cudart_static")
|
||||||
|
set(CMAKE_CUDA_RUNTIME_LIBRARY_LINK_OPTIONS_SHARED "cudadevrt;cudart")
|
||||||
|
set(CMAKE_CUDA_RUNTIME_LIBRARY_LINK_OPTIONS_NONE "")
|
||||||
|
|
||||||
string(APPEND CMAKE_CUDA_FLAGS_INIT " ${PLATFORM_DEFINES_CUDA} -D_WINDOWS -Xcompiler=\"${_W3}${_FLAGS_CXX}\"")
|
string(APPEND CMAKE_CUDA_FLAGS_INIT " ${PLATFORM_DEFINES_CUDA} -D_WINDOWS -Xcompiler=\"${_W3}${_FLAGS_CXX}\"")
|
||||||
string(APPEND CMAKE_CUDA_FLAGS_DEBUG_INIT " -Xcompiler=\"${_MDd}-Zi -Ob0 -Od ${_RTC1}\"")
|
string(APPEND CMAKE_CUDA_FLAGS_DEBUG_INIT " -Xcompiler=\"${_MDd}-Zi -Ob0 -Od ${_RTC1}\"")
|
||||||
string(APPEND CMAKE_CUDA_FLAGS_RELEASE_INIT " -Xcompiler=\"${_MD}-O2 -Ob2\" -DNDEBUG")
|
string(APPEND CMAKE_CUDA_FLAGS_RELEASE_INIT " -Xcompiler=\"${_MD}-O2 -Ob2\" -DNDEBUG")
|
||||||
|
|||||||
@@ -10,6 +10,7 @@
|
|||||||
|
|
||||||
#include "cmAlgorithms.h"
|
#include "cmAlgorithms.h"
|
||||||
#include "cmComputeLinkDepends.h"
|
#include "cmComputeLinkDepends.h"
|
||||||
|
#include "cmGeneratorExpression.h"
|
||||||
#include "cmGeneratorTarget.h"
|
#include "cmGeneratorTarget.h"
|
||||||
#include "cmGlobalGenerator.h"
|
#include "cmGlobalGenerator.h"
|
||||||
#include "cmListFileCache.h"
|
#include "cmListFileCache.h"
|
||||||
@@ -573,6 +574,15 @@ void cmComputeLinkInformation::AddImplicitLinkInfo()
|
|||||||
cmGeneratorTarget::LinkClosure const* lc =
|
cmGeneratorTarget::LinkClosure const* lc =
|
||||||
this->Target->GetLinkClosure(this->Config);
|
this->Target->GetLinkClosure(this->Config);
|
||||||
for (std::string const& li : lc->Languages) {
|
for (std::string const& li : lc->Languages) {
|
||||||
|
|
||||||
|
if (li == "CUDA") {
|
||||||
|
// These need to go before the other implicit link information
|
||||||
|
// as they could require symbols from those other library
|
||||||
|
// Currently restricted to CUDA as it is the only language
|
||||||
|
// we have documented runtime behavior controls for
|
||||||
|
this->AddRuntimeLinkLibrary(li);
|
||||||
|
}
|
||||||
|
|
||||||
// Skip those of the linker language. They are implicit.
|
// Skip those of the linker language. They are implicit.
|
||||||
if (li != this->LinkLanguage) {
|
if (li != this->LinkLanguage) {
|
||||||
this->AddImplicitLinkInfo(li);
|
this->AddImplicitLinkInfo(li);
|
||||||
@@ -580,6 +590,39 @@ void cmComputeLinkInformation::AddImplicitLinkInfo()
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void cmComputeLinkInformation::AddRuntimeLinkLibrary(std::string const& lang)
|
||||||
|
{ // Add the lang runtime library flags. This is activated by the presence
|
||||||
|
// of a default selection whether or not it is overridden by a property.
|
||||||
|
std::string defaultVar =
|
||||||
|
cmStrCat("CMAKE_", lang, "_RUNTIME_LIBRARY_DEFAULT");
|
||||||
|
const char* langRuntimeLibraryDefault =
|
||||||
|
this->Makefile->GetDefinition(defaultVar);
|
||||||
|
if (langRuntimeLibraryDefault && *langRuntimeLibraryDefault) {
|
||||||
|
const char* runtimeLibraryValue =
|
||||||
|
this->Target->GetProperty(cmStrCat(lang, "_RUNTIME_LIBRARY"));
|
||||||
|
if (!runtimeLibraryValue) {
|
||||||
|
runtimeLibraryValue = langRuntimeLibraryDefault;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string runtimeLibrary =
|
||||||
|
cmSystemTools::UpperCase(cmGeneratorExpression::Evaluate(
|
||||||
|
runtimeLibraryValue, this->Target->GetLocalGenerator(), this->Config,
|
||||||
|
this->Target));
|
||||||
|
if (!runtimeLibrary.empty()) {
|
||||||
|
if (const char* runtimeLinkOptions = this->Makefile->GetDefinition(
|
||||||
|
"CMAKE_" + lang + "_RUNTIME_LIBRARY_LINK_OPTIONS_" +
|
||||||
|
runtimeLibrary)) {
|
||||||
|
std::vector<std::string> libsVec = cmExpandedList(runtimeLinkOptions);
|
||||||
|
for (std::string const& i : libsVec) {
|
||||||
|
if (!cmContains(this->ImplicitLinkLibs, i)) {
|
||||||
|
this->AddItem(i, nullptr);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void cmComputeLinkInformation::AddImplicitLinkInfo(std::string const& lang)
|
void cmComputeLinkInformation::AddImplicitLinkInfo(std::string const& lang)
|
||||||
{
|
{
|
||||||
// Add libraries for this language that are not implied by the
|
// Add libraries for this language that are not implied by the
|
||||||
|
|||||||
@@ -172,6 +172,7 @@ private:
|
|||||||
void LoadImplicitLinkInfo();
|
void LoadImplicitLinkInfo();
|
||||||
void AddImplicitLinkInfo();
|
void AddImplicitLinkInfo();
|
||||||
void AddImplicitLinkInfo(std::string const& lang);
|
void AddImplicitLinkInfo(std::string const& lang);
|
||||||
|
void AddRuntimeLinkLibrary(std::string const& lang);
|
||||||
std::set<std::string> ImplicitLinkDirs;
|
std::set<std::string> ImplicitLinkDirs;
|
||||||
std::set<std::string> ImplicitLinkLibs;
|
std::set<std::string> ImplicitLinkLibs;
|
||||||
|
|
||||||
|
|||||||
@@ -358,6 +358,7 @@ cmTarget::cmTarget(std::string const& name, cmStateEnums::TargetType type,
|
|||||||
initProp("CUDA_COMPILER_LAUNCHER");
|
initProp("CUDA_COMPILER_LAUNCHER");
|
||||||
initProp("CUDA_SEPARABLE_COMPILATION");
|
initProp("CUDA_SEPARABLE_COMPILATION");
|
||||||
initProp("CUDA_RESOLVE_DEVICE_SYMBOLS");
|
initProp("CUDA_RESOLVE_DEVICE_SYMBOLS");
|
||||||
|
initProp("CUDA_RUNTIME_LIBRARY");
|
||||||
initProp("LINK_SEARCH_START_STATIC");
|
initProp("LINK_SEARCH_START_STATIC");
|
||||||
initProp("LINK_SEARCH_END_STATIC");
|
initProp("LINK_SEARCH_END_STATIC");
|
||||||
initProp("Swift_LANGUAGE_VERSION");
|
initProp("Swift_LANGUAGE_VERSION");
|
||||||
|
|||||||
@@ -3636,18 +3636,7 @@ bool cmVisualStudio10TargetGenerator::ComputeLinkOptions(
|
|||||||
this->AddLibraries(cli, libVec, vsTargetVec, config);
|
this->AddLibraries(cli, libVec, vsTargetVec, config);
|
||||||
if (cmContains(linkClosure->Languages, "CUDA") &&
|
if (cmContains(linkClosure->Languages, "CUDA") &&
|
||||||
this->CudaOptions[config] != nullptr) {
|
this->CudaOptions[config] != nullptr) {
|
||||||
switch (this->CudaOptions[config]->GetCudaRuntime()) {
|
this->CudaOptions[config]->FixCudaRuntime(this->GeneratorTarget);
|
||||||
case cmVisualStudioGeneratorOptions::CudaRuntimeStatic:
|
|
||||||
libVec.push_back("cudadevrt.lib");
|
|
||||||
libVec.push_back("cudart_static.lib");
|
|
||||||
break;
|
|
||||||
case cmVisualStudioGeneratorOptions::CudaRuntimeShared:
|
|
||||||
libVec.push_back("cudadevrt.lib");
|
|
||||||
libVec.push_back("cudart.lib");
|
|
||||||
break;
|
|
||||||
case cmVisualStudioGeneratorOptions::CudaRuntimeNone:
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
std::string standardLibsVar =
|
std::string standardLibsVar =
|
||||||
cmStrCat("CMAKE_", linkLanguage, "_STANDARD_LIBRARIES");
|
cmStrCat("CMAKE_", linkLanguage, "_STANDARD_LIBRARIES");
|
||||||
|
|||||||
@@ -3,6 +3,8 @@
|
|||||||
#include <cm/iterator>
|
#include <cm/iterator>
|
||||||
|
|
||||||
#include "cmAlgorithms.h"
|
#include "cmAlgorithms.h"
|
||||||
|
#include "cmGeneratorExpression.h"
|
||||||
|
#include "cmGeneratorTarget.h"
|
||||||
#include "cmLocalVisualStudioGenerator.h"
|
#include "cmLocalVisualStudioGenerator.h"
|
||||||
#include "cmOutputConverter.h"
|
#include "cmOutputConverter.h"
|
||||||
#include "cmSystemTools.h"
|
#include "cmSystemTools.h"
|
||||||
@@ -149,25 +151,33 @@ bool cmVisualStudioGeneratorOptions::UsingSBCS() const
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
cmVisualStudioGeneratorOptions::CudaRuntime
|
void cmVisualStudioGeneratorOptions::FixCudaRuntime(cmGeneratorTarget* target)
|
||||||
cmVisualStudioGeneratorOptions::GetCudaRuntime() const
|
|
||||||
{
|
{
|
||||||
std::map<std::string, FlagValue>::const_iterator i =
|
std::map<std::string, FlagValue>::const_iterator i =
|
||||||
this->FlagMap.find("CudaRuntime");
|
this->FlagMap.find("CudaRuntime");
|
||||||
if (i != this->FlagMap.end() && i->second.size() == 1) {
|
if (i == this->FlagMap.end()) {
|
||||||
std::string const& cudaRuntime = i->second[0];
|
// User didn't provide am override so get the property value
|
||||||
if (cudaRuntime == "Static") {
|
const char* runtimeLibraryValue =
|
||||||
return CudaRuntimeStatic;
|
target->GetProperty("CUDA_RUNTIME_LIBRARY");
|
||||||
}
|
if (runtimeLibraryValue) {
|
||||||
if (cudaRuntime == "Shared") {
|
std::string cudaRuntime =
|
||||||
return CudaRuntimeShared;
|
cmSystemTools::UpperCase(cmGeneratorExpression::Evaluate(
|
||||||
}
|
runtimeLibraryValue, this->LocalGenerator, this->Configuration,
|
||||||
if (cudaRuntime == "None") {
|
target));
|
||||||
return CudaRuntimeNone;
|
if (cudaRuntime == "STATIC") {
|
||||||
|
this->AddFlag("CudaRuntime", "Static");
|
||||||
|
}
|
||||||
|
if (cudaRuntime == "SHARED") {
|
||||||
|
this->AddFlag("CudaRuntime", "Shared");
|
||||||
|
}
|
||||||
|
if (cudaRuntime == "NONE") {
|
||||||
|
this->AddFlag("CudaRuntime", "None");
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// nvcc default is static
|
||||||
|
this->AddFlag("CudaRuntime", "Static");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// nvcc default is static
|
|
||||||
return CudaRuntimeStatic;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void cmVisualStudioGeneratorOptions::FixCudaCodeGeneration()
|
void cmVisualStudioGeneratorOptions::FixCudaCodeGeneration()
|
||||||
|
|||||||
@@ -13,6 +13,7 @@
|
|||||||
#include "cmIDEOptions.h"
|
#include "cmIDEOptions.h"
|
||||||
|
|
||||||
class cmLocalVisualStudioGenerator;
|
class cmLocalVisualStudioGenerator;
|
||||||
|
class cmGeneratorTarget;
|
||||||
|
|
||||||
using cmVS7FlagTable = cmIDEFlagTable;
|
using cmVS7FlagTable = cmIDEFlagTable;
|
||||||
|
|
||||||
@@ -61,15 +62,8 @@ public:
|
|||||||
bool UsingUnicode() const;
|
bool UsingUnicode() const;
|
||||||
bool UsingSBCS() const;
|
bool UsingSBCS() const;
|
||||||
|
|
||||||
enum CudaRuntime
|
|
||||||
{
|
|
||||||
CudaRuntimeStatic,
|
|
||||||
CudaRuntimeShared,
|
|
||||||
CudaRuntimeNone
|
|
||||||
};
|
|
||||||
CudaRuntime GetCudaRuntime() const;
|
|
||||||
|
|
||||||
void FixCudaCodeGeneration();
|
void FixCudaCodeGeneration();
|
||||||
|
void FixCudaRuntime(cmGeneratorTarget* target);
|
||||||
|
|
||||||
void FixManifestUACFlags();
|
void FixManifestUACFlags();
|
||||||
|
|
||||||
|
|||||||
@@ -22,18 +22,11 @@ set(CMAKE_CUDA_STANDARD_REQUIRED TRUE)
|
|||||||
set(CMAKE_CXX_STANDARD_REQUIRED TRUE)
|
set(CMAKE_CXX_STANDARD_REQUIRED TRUE)
|
||||||
|
|
||||||
add_library(CudaComplexCppBase SHARED dynamic.cpp)
|
add_library(CudaComplexCppBase SHARED dynamic.cpp)
|
||||||
add_library(CudaComplexSeperableLib STATIC file1.cu file2.cu file3.cu)
|
|
||||||
set_target_properties(CudaComplexSeperableLib
|
|
||||||
PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
|
|
||||||
set_target_properties( CudaComplexSeperableLib
|
|
||||||
PROPERTIES POSITION_INDEPENDENT_CODE ON)
|
|
||||||
|
|
||||||
add_library(CudaComplexSharedLib SHARED dynamic.cu)
|
add_library(CudaComplexSharedLib SHARED dynamic.cu)
|
||||||
target_link_libraries(CudaComplexSharedLib PUBLIC CudaComplexCppBase)
|
target_link_libraries(CudaComplexSharedLib PUBLIC CudaComplexCppBase)
|
||||||
|
|
||||||
|
add_library(CudaComplexSeperableLib STATIC file1.cu file2.cu file3.cu)
|
||||||
add_library(CudaComplexMixedLib SHARED mixed.cpp mixed.cu)
|
add_library(CudaComplexMixedLib SHARED mixed.cpp mixed.cu)
|
||||||
set_target_properties(CudaComplexMixedLib
|
|
||||||
PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
|
|
||||||
target_link_libraries(CudaComplexMixedLib
|
target_link_libraries(CudaComplexMixedLib
|
||||||
PUBLIC CudaComplexSharedLib
|
PUBLIC CudaComplexSharedLib
|
||||||
PRIVATE CudaComplexSeperableLib)
|
PRIVATE CudaComplexSeperableLib)
|
||||||
@@ -41,7 +34,27 @@ target_link_libraries(CudaComplexMixedLib
|
|||||||
add_executable(CudaComplex main.cpp)
|
add_executable(CudaComplex main.cpp)
|
||||||
target_link_libraries(CudaComplex PUBLIC CudaComplexMixedLib)
|
target_link_libraries(CudaComplex PUBLIC CudaComplexMixedLib)
|
||||||
|
|
||||||
|
|
||||||
|
set_target_properties(CudaComplexMixedLib
|
||||||
|
CudaComplexSeperableLib
|
||||||
|
PROPERTIES
|
||||||
|
POSITION_INDEPENDENT_CODE ON
|
||||||
|
CUDA_SEPARABLE_COMPILATION ON
|
||||||
|
)
|
||||||
|
set_target_properties(CudaComplexMixedLib
|
||||||
|
CudaComplexSharedLib
|
||||||
|
PROPERTIES
|
||||||
|
CUDA_RUNTIME_LIBRARY shared
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
if(APPLE)
|
if(APPLE)
|
||||||
# Help the static cuda runtime find the driver (libcuda.dyllib) at runtime.
|
# Help the static cuda runtime find the driver (libcuda.dyllib) at runtime.
|
||||||
set_property(TARGET CudaComplex PROPERTY BUILD_RPATH ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES})
|
set_property(TARGET CudaComplex PROPERTY BUILD_RPATH ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES})
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
if(UNIX)
|
||||||
|
# Help the shared cuda runtime find libcudart as it is not located
|
||||||
|
# in a default system searched location
|
||||||
|
set_property(TARGET CudaComplexMixedLib PROPERTY BUILD_RPATH ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES})
|
||||||
|
endif()
|
||||||
|
|||||||
@@ -54,17 +54,20 @@ EXPORT int choose_cuda_device()
|
|||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
EXPORT void cuda_dynamic_lib_func()
|
EXPORT bool cuda_dynamic_lib_func()
|
||||||
{
|
{
|
||||||
DetermineIfValidCudaDevice<<<1, 1>>>();
|
|
||||||
cudaError_t err = cudaGetLastError();
|
cudaError_t err = cudaGetLastError();
|
||||||
if (err != cudaSuccess) {
|
if (err != cudaSuccess) {
|
||||||
std::cerr << "DetermineIfValidCudaDevice [SYNC] failed: "
|
std::cerr << "DetermineIfValidCudaDevice [Per Launch] failed: "
|
||||||
<< cudaGetErrorString(err) << std::endl;
|
<< cudaGetErrorString(err) << std::endl;
|
||||||
|
return false;
|
||||||
}
|
}
|
||||||
|
DetermineIfValidCudaDevice<<<1, 1>>>();
|
||||||
err = cudaDeviceSynchronize();
|
err = cudaDeviceSynchronize();
|
||||||
if (err != cudaSuccess) {
|
if (err != cudaSuccess) {
|
||||||
std::cerr << "DetermineIfValidCudaDevice [ASYNC] failed: "
|
std::cerr << "DetermineIfValidCudaDevice [SYNC] failed: "
|
||||||
<< cudaGetErrorString(cudaGetLastError()) << std::endl;
|
<< cudaGetErrorString(cudaGetLastError()) << std::endl;
|
||||||
|
return false;
|
||||||
}
|
}
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -22,5 +22,6 @@ int main(int argc, char** argv)
|
|||||||
|
|
||||||
int r1 = call_cuda_seperable_code(42);
|
int r1 = call_cuda_seperable_code(42);
|
||||||
int r2 = mixed_launch_kernel(42);
|
int r2 = mixed_launch_kernel(42);
|
||||||
|
|
||||||
return (r1 == 42 || r2 == 42) ? 1 : 0;
|
return (r1 == 42 || r2 == 42) ? 1 : 0;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -15,7 +15,7 @@
|
|||||||
result_type __device__ file1_func(int x);
|
result_type __device__ file1_func(int x);
|
||||||
result_type_dynamic __device__ file2_func(int x);
|
result_type_dynamic __device__ file2_func(int x);
|
||||||
|
|
||||||
IMPORT void __host__ cuda_dynamic_lib_func();
|
IMPORT bool __host__ cuda_dynamic_lib_func();
|
||||||
|
|
||||||
static __global__ void mixed_kernel(result_type* r, int x)
|
static __global__ void mixed_kernel(result_type* r, int x)
|
||||||
{
|
{
|
||||||
@@ -25,7 +25,9 @@ static __global__ void mixed_kernel(result_type* r, int x)
|
|||||||
|
|
||||||
EXPORT int mixed_launch_kernel(int x)
|
EXPORT int mixed_launch_kernel(int x)
|
||||||
{
|
{
|
||||||
cuda_dynamic_lib_func();
|
if (!cuda_dynamic_lib_func()) {
|
||||||
|
return x;
|
||||||
|
}
|
||||||
|
|
||||||
result_type* r;
|
result_type* r;
|
||||||
cudaError_t err = cudaMallocManaged(&r, sizeof(result_type));
|
cudaError_t err = cudaMallocManaged(&r, sizeof(result_type));
|
||||||
|
|||||||
@@ -5,10 +5,21 @@ ADD_TEST_MACRO(CudaOnly.ExportPTX CudaOnlyExportPTX)
|
|||||||
ADD_TEST_MACRO(CudaOnly.GPUDebugFlag CudaOnlyGPUDebugFlag)
|
ADD_TEST_MACRO(CudaOnly.GPUDebugFlag CudaOnlyGPUDebugFlag)
|
||||||
ADD_TEST_MACRO(CudaOnly.ResolveDeviceSymbols CudaOnlyResolveDeviceSymbols)
|
ADD_TEST_MACRO(CudaOnly.ResolveDeviceSymbols CudaOnlyResolveDeviceSymbols)
|
||||||
ADD_TEST_MACRO(CudaOnly.SeparateCompilation CudaOnlySeparateCompilation)
|
ADD_TEST_MACRO(CudaOnly.SeparateCompilation CudaOnlySeparateCompilation)
|
||||||
|
ADD_TEST_MACRO(CudaOnly.SharedRuntimePlusToolkit CudaOnlySharedRuntimePlusToolkit)
|
||||||
ADD_TEST_MACRO(CudaOnly.Standard98 CudaOnlyStandard98)
|
ADD_TEST_MACRO(CudaOnly.Standard98 CudaOnlyStandard98)
|
||||||
ADD_TEST_MACRO(CudaOnly.Toolkit CudaOnlyToolkit)
|
ADD_TEST_MACRO(CudaOnly.Toolkit CudaOnlyToolkit)
|
||||||
ADD_TEST_MACRO(CudaOnly.WithDefs CudaOnlyWithDefs)
|
ADD_TEST_MACRO(CudaOnly.WithDefs CudaOnlyWithDefs)
|
||||||
|
|
||||||
|
# The CUDA only ships the shared version of the toolkit libraries
|
||||||
|
# on windows
|
||||||
|
if(NOT WIN32)
|
||||||
|
ADD_TEST_MACRO(Cuda.StaticRuntimePlusToolkit StaticRuntimePlusToolkit)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
if(MSVC)
|
||||||
|
ADD_TEST_MACRO(CudaOnly.PDB CudaOnlyPDB)
|
||||||
|
endif()
|
||||||
|
|
||||||
add_test(NAME CudaOnly.DontResolveDeviceSymbols COMMAND
|
add_test(NAME CudaOnly.DontResolveDeviceSymbols COMMAND
|
||||||
${CMAKE_CTEST_COMMAND} -C $<CONFIGURATION>
|
${CMAKE_CTEST_COMMAND} -C $<CONFIGURATION>
|
||||||
--build-and-test
|
--build-and-test
|
||||||
@@ -20,6 +31,14 @@ add_test(NAME CudaOnly.DontResolveDeviceSymbols COMMAND
|
|||||||
--test-command ${CMAKE_CTEST_COMMAND} -V -C $<CONFIGURATION>
|
--test-command ${CMAKE_CTEST_COMMAND} -V -C $<CONFIGURATION>
|
||||||
)
|
)
|
||||||
|
|
||||||
if(MSVC)
|
add_test(NAME CudaOnly.RuntimeControls COMMAND
|
||||||
ADD_TEST_MACRO(CudaOnly.PDB CudaOnlyPDB)
|
${CMAKE_CTEST_COMMAND} -C $<CONFIGURATION>
|
||||||
endif()
|
--build-and-test
|
||||||
|
"${CMAKE_CURRENT_SOURCE_DIR}/RuntimeControls/"
|
||||||
|
"${CMAKE_CURRENT_BINARY_DIR}/RuntimeControls/"
|
||||||
|
--build-two-config
|
||||||
|
${build_generator_args}
|
||||||
|
--build-project RuntimeControls
|
||||||
|
--build-options ${build_options}
|
||||||
|
--test-command ${CMAKE_CTEST_COMMAND} -V -C $<CONFIGURATION>
|
||||||
|
)
|
||||||
|
|||||||
@@ -0,0 +1,60 @@
|
|||||||
|
cmake_minimum_required(VERSION 3.7)
|
||||||
|
project (RuntimeControls CUDA)
|
||||||
|
|
||||||
|
# Find nm and dumpbin
|
||||||
|
if(CMAKE_NM)
|
||||||
|
set(dump_command ${CMAKE_NM})
|
||||||
|
set(dump_args -g)
|
||||||
|
else()
|
||||||
|
include(GetPrerequisites)
|
||||||
|
message(STATUS "calling list_prerequisites to find dumpbin")
|
||||||
|
list_prerequisites("${CMAKE_COMMAND}" 0 0 0)
|
||||||
|
if(gp_dumpbin)
|
||||||
|
set(dump_command ${gp_dumpbin})
|
||||||
|
set(dump_args /ARCHIVEMEMBERS)
|
||||||
|
endif()
|
||||||
|
endif()
|
||||||
|
|
||||||
|
string(APPEND CMAKE_CUDA_FLAGS " -gencode arch=compute_30,code=[compute_30]")
|
||||||
|
|
||||||
|
set(CMAKE_CUDA_STANDARD 11)
|
||||||
|
set(CMAKE_CUDA_RUNTIME_LIBRARY static)
|
||||||
|
|
||||||
|
if(NOT "x${CMAKE_CUDA_SIMULATE_ID}" STREQUAL "xMSVC")
|
||||||
|
add_library(UsesNoCudaRT SHARED file1.cu)
|
||||||
|
set_target_properties(UsesNoCudaRT PROPERTIES CUDA_RUNTIME_LIBRARY none)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
add_library(UsesStaticCudaRT SHARED file2.cu)
|
||||||
|
|
||||||
|
add_executable(CudaOnlyRuntimeControls main.cu)
|
||||||
|
set_target_properties(CudaOnlyRuntimeControls PROPERTIES CUDA_RUNTIME_LIBRARY shared)
|
||||||
|
|
||||||
|
target_link_libraries(CudaOnlyRuntimeControls PRIVATE $<TARGET_NAME_IF_EXISTS:UsesNoCudaRT> UsesStaticCudaRT)
|
||||||
|
|
||||||
|
|
||||||
|
if(dump_command)
|
||||||
|
if(TARGET UsesNoCudaRT)
|
||||||
|
add_custom_command(TARGET UsesNoCudaRT POST_BUILD
|
||||||
|
COMMAND ${CMAKE_COMMAND}
|
||||||
|
-DDUMP_COMMAND=${dump_command}
|
||||||
|
-DDUMP_ARGS=${dump_args}
|
||||||
|
-DTEST_LIBRARY_PATH=$<TARGET_FILE:UsesNoCudaRT>
|
||||||
|
-P ${CMAKE_CURRENT_SOURCE_DIR}/no_runtime.cmake
|
||||||
|
)
|
||||||
|
endif()
|
||||||
|
add_custom_command(TARGET UsesStaticCudaRT POST_BUILD
|
||||||
|
COMMAND ${CMAKE_COMMAND}
|
||||||
|
-DDUMP_COMMAND=${dump_command}
|
||||||
|
-DDUMP_ARGS=${dump_args}
|
||||||
|
-DTEST_LIBRARY_PATH=$<TARGET_FILE:UsesStaticCudaRT>
|
||||||
|
-P ${CMAKE_CURRENT_SOURCE_DIR}/uses_static_runtime.cmake
|
||||||
|
)
|
||||||
|
string(REPLACE ";" "|" dirs "${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES}")
|
||||||
|
add_custom_command(TARGET CudaOnlyRuntimeControls POST_BUILD
|
||||||
|
COMMAND ${CMAKE_COMMAND}
|
||||||
|
-DEXEC_PATH=$<TARGET_FILE:CudaOnlyRuntimeControls>
|
||||||
|
-DEXTRA_LIB_DIRS="${dirs}"
|
||||||
|
-P ${CMAKE_CURRENT_SOURCE_DIR}/verify_runtime.cmake
|
||||||
|
)
|
||||||
|
endif()
|
||||||
@@ -0,0 +1,18 @@
|
|||||||
|
|
||||||
|
#ifdef _WIN32
|
||||||
|
# define EXPORT __declspec(dllexport)
|
||||||
|
#else
|
||||||
|
# define EXPORT
|
||||||
|
#endif
|
||||||
|
|
||||||
|
void __global__ file1_kernel(int x, int& r)
|
||||||
|
{
|
||||||
|
r = -x;
|
||||||
|
}
|
||||||
|
|
||||||
|
EXPORT int file1_launch_kernel(int x)
|
||||||
|
{
|
||||||
|
int r = 0;
|
||||||
|
file1_kernel<<<1, 1>>>(x, r);
|
||||||
|
return r;
|
||||||
|
}
|
||||||
@@ -0,0 +1,18 @@
|
|||||||
|
|
||||||
|
#ifdef _WIN32
|
||||||
|
# define EXPORT __declspec(dllexport)
|
||||||
|
#else
|
||||||
|
# define EXPORT
|
||||||
|
#endif
|
||||||
|
|
||||||
|
void __global__ file2_kernel(int x, int& r)
|
||||||
|
{
|
||||||
|
r = -x;
|
||||||
|
}
|
||||||
|
|
||||||
|
EXPORT int file2_launch_kernel(int x)
|
||||||
|
{
|
||||||
|
int r = 0;
|
||||||
|
file2_kernel<<<1, 1>>>(x, r);
|
||||||
|
return r;
|
||||||
|
}
|
||||||
@@ -0,0 +1,81 @@
|
|||||||
|
|
||||||
|
#include <iostream>
|
||||||
|
|
||||||
|
#include "cuda.h"
|
||||||
|
|
||||||
|
#ifdef _WIN32
|
||||||
|
# define IMPORT __declspec(dllimport)
|
||||||
|
#else
|
||||||
|
# define IMPORT
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef _WIN32
|
||||||
|
IMPORT int file1_launch_kernel(int x);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
IMPORT int file2_launch_kernel(int x);
|
||||||
|
|
||||||
|
int choose_cuda_device()
|
||||||
|
{
|
||||||
|
int nDevices = 0;
|
||||||
|
cudaError_t err = cudaGetDeviceCount(&nDevices);
|
||||||
|
if (err != cudaSuccess) {
|
||||||
|
std::cerr << "Failed to retrieve the number of CUDA enabled devices"
|
||||||
|
<< std::endl;
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
for (int i = 0; i < nDevices; ++i) {
|
||||||
|
cudaDeviceProp prop;
|
||||||
|
cudaError_t err = cudaGetDeviceProperties(&prop, i);
|
||||||
|
if (err != cudaSuccess) {
|
||||||
|
std::cerr << "Could not retrieve properties from CUDA device " << i
|
||||||
|
<< std::endl;
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
std::cout << "prop.major: " << prop.major << std::endl;
|
||||||
|
if (prop.major >= 3) {
|
||||||
|
err = cudaSetDevice(i);
|
||||||
|
if (err != cudaSuccess) {
|
||||||
|
std::cout << "Could not select CUDA device " << i << std::endl;
|
||||||
|
} else {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
std::cout << "Could not find a CUDA enabled card supporting compute >=3.0"
|
||||||
|
<< std::endl;
|
||||||
|
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(int argc, char** argv)
|
||||||
|
{
|
||||||
|
int ret = choose_cuda_device();
|
||||||
|
if (ret) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
cudaError_t err;
|
||||||
|
#ifndef _WIN32
|
||||||
|
file1_launch_kernel(1);
|
||||||
|
err = cudaGetLastError();
|
||||||
|
if (err != cudaSuccess) {
|
||||||
|
std::cerr << "file1_launch_kernel: kernel launch should have passed.\n "
|
||||||
|
"Error message: "
|
||||||
|
<< cudaGetErrorString(err) << std::endl;
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
file2_launch_kernel(1);
|
||||||
|
err = cudaGetLastError();
|
||||||
|
if (err != cudaSuccess) {
|
||||||
|
std::cerr << "file2_launch_kernel: kernel launch should have passed.\n "
|
||||||
|
"Error message: "
|
||||||
|
<< cudaGetErrorString(err) << std::endl;
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
@@ -0,0 +1,14 @@
|
|||||||
|
execute_process(COMMAND ${DUMP_COMMAND} ${DUMP_ARGS} ${TEST_LIBRARY_PATH}
|
||||||
|
RESULT_VARIABLE RESULT
|
||||||
|
OUTPUT_VARIABLE OUTPUT
|
||||||
|
ERROR_VARIABLE ERROR
|
||||||
|
)
|
||||||
|
|
||||||
|
if(NOT "${RESULT}" STREQUAL "0")
|
||||||
|
message(FATAL_ERROR "${DUMP_COMMAND} failed [${RESULT}] [${OUTPUT}] [${ERROR}]")
|
||||||
|
endif()
|
||||||
|
|
||||||
|
if(NOT "${OUTPUT}" MATCHES "(__cuda)")
|
||||||
|
message(FATAL_ERROR
|
||||||
|
"not missing cuda device symbols, static runtime linking was used.")
|
||||||
|
endif()
|
||||||
@@ -0,0 +1,14 @@
|
|||||||
|
execute_process(COMMAND ${DUMP_COMMAND} ${DUMP_ARGS} ${TEST_LIBRARY_PATH}
|
||||||
|
RESULT_VARIABLE RESULT
|
||||||
|
OUTPUT_VARIABLE OUTPUT
|
||||||
|
ERROR_VARIABLE ERROR
|
||||||
|
)
|
||||||
|
|
||||||
|
if(NOT "${RESULT}" STREQUAL "0")
|
||||||
|
message(FATAL_ERROR "${DUMP_COMMAND} failed [${RESULT}] [${OUTPUT}] [${ERROR}]")
|
||||||
|
endif()
|
||||||
|
|
||||||
|
if("${OUTPUT}" MATCHES "__cuda")
|
||||||
|
message(FATAL_ERROR
|
||||||
|
"missing cuda device symbols, static runtime linking was not used.")
|
||||||
|
endif()
|
||||||
@@ -0,0 +1,16 @@
|
|||||||
|
|
||||||
|
string(REPLACE "|" ";" dirs "${EXTRA_LIB_DIRS}")
|
||||||
|
file(GET_RUNTIME_DEPENDENCIES
|
||||||
|
RESOLVED_DEPENDENCIES_VAR resolved_libs
|
||||||
|
UNRESOLVED_DEPENDENCIES_VAR unresolved_libs
|
||||||
|
DIRECTORIES ${dirs}
|
||||||
|
EXECUTABLES ${EXEC_PATH}
|
||||||
|
)
|
||||||
|
|
||||||
|
list(FILTER resolved_libs INCLUDE REGEX ".*cudart.*")
|
||||||
|
list(LENGTH resolved_libs has_cudart)
|
||||||
|
|
||||||
|
if(has_cudart EQUAL 0)
|
||||||
|
message(FATAL_ERROR
|
||||||
|
"missing cudart shared library from runtime dependency output.")
|
||||||
|
endif()
|
||||||
@@ -0,0 +1,42 @@
|
|||||||
|
cmake_minimum_required(VERSION 3.15)
|
||||||
|
project(SharedRuntimePlusToolkit CUDA)
|
||||||
|
|
||||||
|
#Goal for this example:
|
||||||
|
# Validate that with c++ we can use some components of the CUDA toolkit, and
|
||||||
|
# specify the cuda runtime
|
||||||
|
find_package(CUDAToolkit REQUIRED)
|
||||||
|
|
||||||
|
add_library(Common OBJECT curand.cu nppif.cu)
|
||||||
|
target_link_libraries(Common PRIVATE CUDA::toolkit)
|
||||||
|
set_target_properties(Common PROPERTIES POSITION_INDEPENDENT_CODE ON)
|
||||||
|
|
||||||
|
#shared runtime with shared toolkit libraries
|
||||||
|
add_library(SharedToolkit SHARED shared.cu)
|
||||||
|
target_link_libraries(SharedToolkit PRIVATE Common PUBLIC CUDA::curand CUDA::nppif)
|
||||||
|
set_target_properties(SharedToolkit PROPERTIES CUDA_RUNTIME_LIBRARY none)
|
||||||
|
target_link_libraries(SharedToolkit PUBLIC CUDA::cudart)
|
||||||
|
|
||||||
|
# The CUDA only ships the shared version of the toolkit libraries
|
||||||
|
# on windows
|
||||||
|
if(NOT WIN32)
|
||||||
|
#shared runtime with static toolkit libraries
|
||||||
|
add_library(StaticToolkit SHARED static.cu)
|
||||||
|
target_link_libraries(StaticToolkit PRIVATE Common CUDA::curand_static CUDA::nppif_static)
|
||||||
|
set_target_properties(StaticToolkit PROPERTIES CUDA_RUNTIME_LIBRARY Shared)
|
||||||
|
|
||||||
|
#static runtime with mixed toolkit libraries
|
||||||
|
add_library(MixedToolkit SHARED mixed.cu)
|
||||||
|
target_link_libraries(MixedToolkit PRIVATE Common CUDA::curand_static CUDA::nppif)
|
||||||
|
set_target_properties(MixedToolkit PROPERTIES CUDA_RUNTIME_LIBRARY Shared)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
add_executable(CudaOnlySharedRuntimePlusToolkit main.cu)
|
||||||
|
target_link_libraries(CudaOnlySharedRuntimePlusToolkit PRIVATE SharedToolkit
|
||||||
|
$<TARGET_NAME_IF_EXISTS:StaticToolkit>
|
||||||
|
$<TARGET_NAME_IF_EXISTS:MixedToolkit>)
|
||||||
|
|
||||||
|
if(UNIX)
|
||||||
|
# Help the shared cuda runtime find libcudart as it is not located
|
||||||
|
# in a default system searched location
|
||||||
|
set_property(TARGET CudaOnlySharedRuntimePlusToolkit PROPERTY BUILD_RPATH ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES})
|
||||||
|
endif()
|
||||||
@@ -0,0 +1,65 @@
|
|||||||
|
// Comes from:
|
||||||
|
// https://docs.nvidia.com/cuda/curand/host-api-overview.html#host-api-example
|
||||||
|
|
||||||
|
#ifdef _WIN32
|
||||||
|
# define EXPORT __declspec(dllexport)
|
||||||
|
#else
|
||||||
|
# define EXPORT
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/*
|
||||||
|
* This program uses the host CURAND API to generate 100
|
||||||
|
* pseudorandom floats.
|
||||||
|
*/
|
||||||
|
#include <cuda.h>
|
||||||
|
#include <curand.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
|
||||||
|
#define CUDA_CALL(x) \
|
||||||
|
do { \
|
||||||
|
if ((x) != cudaSuccess) { \
|
||||||
|
printf("Error at %s:%d\n", __FILE__, __LINE__); \
|
||||||
|
return EXIT_FAILURE; \
|
||||||
|
} \
|
||||||
|
} while (0)
|
||||||
|
#define CURAND_CALL(x) \
|
||||||
|
do { \
|
||||||
|
if ((x) != CURAND_STATUS_SUCCESS) { \
|
||||||
|
printf("Error at %s:%d\n", __FILE__, __LINE__); \
|
||||||
|
return EXIT_FAILURE; \
|
||||||
|
} \
|
||||||
|
} while (0)
|
||||||
|
|
||||||
|
EXPORT int curand_main()
|
||||||
|
{
|
||||||
|
size_t n = 100;
|
||||||
|
size_t i;
|
||||||
|
curandGenerator_t gen;
|
||||||
|
float *devData, *hostData;
|
||||||
|
|
||||||
|
/* Allocate n floats on host */
|
||||||
|
hostData = (float*)calloc(n, sizeof(float));
|
||||||
|
|
||||||
|
/* Allocate n floats on device */
|
||||||
|
CUDA_CALL(cudaMalloc((void**)&devData, n * sizeof(float)));
|
||||||
|
|
||||||
|
/* Create pseudo-random number generator */
|
||||||
|
CURAND_CALL(curandCreateGenerator(&gen, CURAND_RNG_PSEUDO_DEFAULT));
|
||||||
|
|
||||||
|
/* Set seed */
|
||||||
|
CURAND_CALL(curandSetPseudoRandomGeneratorSeed(gen, 1234ULL));
|
||||||
|
|
||||||
|
/* Generate n floats on device */
|
||||||
|
CURAND_CALL(curandGenerateUniform(gen, devData, n));
|
||||||
|
|
||||||
|
/* Copy device memory to host */
|
||||||
|
CUDA_CALL(
|
||||||
|
cudaMemcpy(hostData, devData, n * sizeof(float), cudaMemcpyDeviceToHost));
|
||||||
|
|
||||||
|
/* Cleanup */
|
||||||
|
CURAND_CALL(curandDestroyGenerator(gen));
|
||||||
|
CUDA_CALL(cudaFree(devData));
|
||||||
|
free(hostData);
|
||||||
|
return EXIT_SUCCESS;
|
||||||
|
}
|
||||||
@@ -0,0 +1,23 @@
|
|||||||
|
|
||||||
|
#ifdef _WIN32
|
||||||
|
# define IMPORT __declspec(dllimport)
|
||||||
|
IMPORT int shared_version();
|
||||||
|
int static_version()
|
||||||
|
{
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
int mixed_version()
|
||||||
|
{
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
int shared_version();
|
||||||
|
int static_version();
|
||||||
|
int mixed_version();
|
||||||
|
#endif
|
||||||
|
|
||||||
|
int main()
|
||||||
|
{
|
||||||
|
return mixed_version() == 0 && shared_version() == 0 &&
|
||||||
|
static_version() == 0;
|
||||||
|
}
|
||||||
@@ -0,0 +1,16 @@
|
|||||||
|
|
||||||
|
#ifdef _WIN32
|
||||||
|
# define IMPORT __declspec(dllimport)
|
||||||
|
# define EXPORT __declspec(dllexport)
|
||||||
|
#else
|
||||||
|
# define IMPORT
|
||||||
|
# define EXPORT
|
||||||
|
#endif
|
||||||
|
|
||||||
|
IMPORT int curand_main();
|
||||||
|
IMPORT int nppif_main();
|
||||||
|
|
||||||
|
EXPORT int mixed_version()
|
||||||
|
{
|
||||||
|
return curand_main() == 0 && nppif_main() == 0;
|
||||||
|
}
|
||||||
@@ -0,0 +1,92 @@
|
|||||||
|
// Comes from
|
||||||
|
// https://devtalk.nvidia.com/default/topic/1037482/gpu-accelerated-libraries/help-me-help-you-with-modern-cmake-and-cuda-mwe-for-npp/post/5271066/#5271066
|
||||||
|
|
||||||
|
#ifdef _WIN32
|
||||||
|
# define EXPORT __declspec(dllexport)
|
||||||
|
#else
|
||||||
|
# define EXPORT
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include <cstdio>
|
||||||
|
#include <iostream>
|
||||||
|
|
||||||
|
#include <assert.h>
|
||||||
|
#include <cuda_runtime_api.h>
|
||||||
|
#include <nppi_filtering_functions.h>
|
||||||
|
|
||||||
|
EXPORT int nppif_main()
|
||||||
|
{
|
||||||
|
/**
|
||||||
|
* 8-bit unsigned single-channel 1D row convolution.
|
||||||
|
*/
|
||||||
|
const int simgrows = 32;
|
||||||
|
const int simgcols = 32;
|
||||||
|
Npp8u *d_pSrc, *d_pDst;
|
||||||
|
const int nMaskSize = 3;
|
||||||
|
NppiSize oROI;
|
||||||
|
oROI.width = simgcols - nMaskSize;
|
||||||
|
oROI.height = simgrows;
|
||||||
|
const int simgsize = simgrows * simgcols * sizeof(d_pSrc[0]);
|
||||||
|
const int dimgsize = oROI.width * oROI.height * sizeof(d_pSrc[0]);
|
||||||
|
const int simgpix = simgrows * simgcols;
|
||||||
|
const int dimgpix = oROI.width * oROI.height;
|
||||||
|
const int nSrcStep = simgcols * sizeof(d_pSrc[0]);
|
||||||
|
const int nDstStep = oROI.width * sizeof(d_pDst[0]);
|
||||||
|
const int pixval = 1;
|
||||||
|
const int nDivisor = 1;
|
||||||
|
const Npp32s h_pKernel[nMaskSize] = { pixval, pixval, pixval };
|
||||||
|
Npp32s* d_pKernel;
|
||||||
|
const Npp32s nAnchor = 2;
|
||||||
|
cudaError_t err = cudaMalloc((void**)&d_pSrc, simgsize);
|
||||||
|
if (err != cudaSuccess) {
|
||||||
|
fprintf(stderr, "Cuda error %d\n", __LINE__);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
err = cudaMalloc((void**)&d_pDst, dimgsize);
|
||||||
|
if (err != cudaSuccess) {
|
||||||
|
fprintf(stderr, "Cuda error %d\n", __LINE__);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
err = cudaMalloc((void**)&d_pKernel, nMaskSize * sizeof(d_pKernel[0]));
|
||||||
|
if (err != cudaSuccess) {
|
||||||
|
fprintf(stderr, "Cuda error %d\n", __LINE__);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
// set image to pixval initially
|
||||||
|
err = cudaMemset(d_pSrc, pixval, simgsize);
|
||||||
|
if (err != cudaSuccess) {
|
||||||
|
fprintf(stderr, "Cuda error %d\n", __LINE__);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
err = cudaMemset(d_pDst, 0, dimgsize);
|
||||||
|
if (err != cudaSuccess) {
|
||||||
|
fprintf(stderr, "Cuda error %d\n", __LINE__);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
err = cudaMemcpy(d_pKernel, h_pKernel, nMaskSize * sizeof(d_pKernel[0]),
|
||||||
|
cudaMemcpyHostToDevice);
|
||||||
|
if (err != cudaSuccess) {
|
||||||
|
fprintf(stderr, "Cuda error %d\n", __LINE__);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
// copy src to dst
|
||||||
|
NppStatus ret =
|
||||||
|
nppiFilterRow_8u_C1R(d_pSrc, nSrcStep, d_pDst, nDstStep, oROI, d_pKernel,
|
||||||
|
nMaskSize, nAnchor, nDivisor);
|
||||||
|
assert(ret == NPP_NO_ERROR);
|
||||||
|
Npp8u* h_imgres = new Npp8u[dimgpix];
|
||||||
|
err = cudaMemcpy(h_imgres, d_pDst, dimgsize, cudaMemcpyDeviceToHost);
|
||||||
|
if (err != cudaSuccess) {
|
||||||
|
fprintf(stderr, "Cuda error %d\n", __LINE__);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
// test for filtering
|
||||||
|
for (int i = 0; i < dimgpix; i++) {
|
||||||
|
if (h_imgres[i] != (pixval * pixval * nMaskSize)) {
|
||||||
|
fprintf(stderr, "h_imgres at index %d failed to match\n", i);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
@@ -0,0 +1,16 @@
|
|||||||
|
|
||||||
|
#ifdef _WIN32
|
||||||
|
# define IMPORT __declspec(dllimport)
|
||||||
|
# define EXPORT __declspec(dllexport)
|
||||||
|
#else
|
||||||
|
# define IMPORT
|
||||||
|
# define EXPORT
|
||||||
|
#endif
|
||||||
|
|
||||||
|
int curand_main();
|
||||||
|
int nppif_main();
|
||||||
|
|
||||||
|
EXPORT int shared_version()
|
||||||
|
{
|
||||||
|
return curand_main() == 0 && nppif_main() == 0;
|
||||||
|
}
|
||||||
@@ -0,0 +1,16 @@
|
|||||||
|
|
||||||
|
#ifdef _WIN32
|
||||||
|
# define IMPORT __declspec(dllimport)
|
||||||
|
# define EXPORT __declspec(dllexport)
|
||||||
|
#else
|
||||||
|
# define IMPORT
|
||||||
|
# define EXPORT
|
||||||
|
#endif
|
||||||
|
|
||||||
|
IMPORT int curand_main();
|
||||||
|
IMPORT int nppif_main();
|
||||||
|
|
||||||
|
EXPORT int static_version()
|
||||||
|
{
|
||||||
|
return curand_main() == 0 && nppif_main() == 0;
|
||||||
|
}
|
||||||
@@ -0,0 +1,29 @@
|
|||||||
|
cmake_minimum_required(VERSION 3.15)
|
||||||
|
project(StaticRuntimePlusToolkit CUDA)
|
||||||
|
|
||||||
|
#Goal for this example:
|
||||||
|
# Validate that with cuda we can use some components of the CUDA toolkit, and
|
||||||
|
# specify the cuda runtime
|
||||||
|
find_package(CUDAToolkit REQUIRED)
|
||||||
|
|
||||||
|
add_library(Common OBJECT curand.cu nppif.cu)
|
||||||
|
target_link_libraries(Common PRIVATE CUDA::toolkit)
|
||||||
|
set_target_properties(Common PROPERTIES POSITION_INDEPENDENT_CODE ON)
|
||||||
|
|
||||||
|
#static runtime with shared toolkit libraries
|
||||||
|
add_library(SharedToolkit SHARED shared.cu)
|
||||||
|
target_link_libraries(SharedToolkit PRIVATE Common CUDA::curand CUDA::nppif )
|
||||||
|
set_target_properties(SharedToolkit PROPERTIES CUDA_RUNTIME_LIBRARY none)
|
||||||
|
target_link_libraries(SharedToolkit PUBLIC CUDA::cudart_static)
|
||||||
|
|
||||||
|
#static runtime with static toolkit libraries
|
||||||
|
add_library(StaticToolkit SHARED static.cu)
|
||||||
|
target_link_libraries(StaticToolkit PRIVATE Common CUDA::curand_static CUDA::nppif_static)
|
||||||
|
|
||||||
|
#static runtime with mixed toolkit libraries
|
||||||
|
add_library(MixedToolkit SHARED mixed.cu)
|
||||||
|
target_link_libraries(MixedToolkit PRIVATE Common CUDA::curand CUDA::nppif_static)
|
||||||
|
set_target_properties(MixedToolkit PROPERTIES CUDA_RUNTIME_LIBRARY Static)
|
||||||
|
|
||||||
|
add_executable(CudaOnlyStaticRuntimePlusToolkit main.cu)
|
||||||
|
target_link_libraries(CudaOnlyStaticRuntimePlusToolkit PRIVATE SharedToolkit StaticToolkit MixedToolkit)
|
||||||
@@ -0,0 +1,59 @@
|
|||||||
|
// Comes from:
|
||||||
|
// https://docs.nvidia.com/cuda/curand/host-api-overview.html#host-api-example
|
||||||
|
|
||||||
|
/*
|
||||||
|
* This program uses the host CURAND API to generate 100
|
||||||
|
* pseudorandom floats.
|
||||||
|
*/
|
||||||
|
#include <cuda.h>
|
||||||
|
#include <curand.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
|
||||||
|
#define CUDA_CALL(x) \
|
||||||
|
do { \
|
||||||
|
if ((x) != cudaSuccess) { \
|
||||||
|
printf("Error at %s:%d\n", __FILE__, __LINE__); \
|
||||||
|
return EXIT_FAILURE; \
|
||||||
|
} \
|
||||||
|
} while (0)
|
||||||
|
#define CURAND_CALL(x) \
|
||||||
|
do { \
|
||||||
|
if ((x) != CURAND_STATUS_SUCCESS) { \
|
||||||
|
printf("Error at %s:%d\n", __FILE__, __LINE__); \
|
||||||
|
return EXIT_FAILURE; \
|
||||||
|
} \
|
||||||
|
} while (0)
|
||||||
|
|
||||||
|
int curand_main()
|
||||||
|
{
|
||||||
|
size_t n = 100;
|
||||||
|
size_t i;
|
||||||
|
curandGenerator_t gen;
|
||||||
|
float *devData, *hostData;
|
||||||
|
|
||||||
|
/* Allocate n floats on host */
|
||||||
|
hostData = (float*)calloc(n, sizeof(float));
|
||||||
|
|
||||||
|
/* Allocate n floats on device */
|
||||||
|
CUDA_CALL(cudaMalloc((void**)&devData, n * sizeof(float)));
|
||||||
|
|
||||||
|
/* Create pseudo-random number generator */
|
||||||
|
CURAND_CALL(curandCreateGenerator(&gen, CURAND_RNG_PSEUDO_DEFAULT));
|
||||||
|
|
||||||
|
/* Set seed */
|
||||||
|
CURAND_CALL(curandSetPseudoRandomGeneratorSeed(gen, 1234ULL));
|
||||||
|
|
||||||
|
/* Generate n floats on device */
|
||||||
|
CURAND_CALL(curandGenerateUniform(gen, devData, n));
|
||||||
|
|
||||||
|
/* Copy device memory to host */
|
||||||
|
CUDA_CALL(
|
||||||
|
cudaMemcpy(hostData, devData, n * sizeof(float), cudaMemcpyDeviceToHost));
|
||||||
|
|
||||||
|
/* Cleanup */
|
||||||
|
CURAND_CALL(curandDestroyGenerator(gen));
|
||||||
|
CUDA_CALL(cudaFree(devData));
|
||||||
|
free(hostData);
|
||||||
|
return EXIT_SUCCESS;
|
||||||
|
}
|
||||||
@@ -0,0 +1,11 @@
|
|||||||
|
|
||||||
|
|
||||||
|
int shared_version();
|
||||||
|
int static_version();
|
||||||
|
int mixed_version();
|
||||||
|
|
||||||
|
int main()
|
||||||
|
{
|
||||||
|
return mixed_version() == 0 && shared_version() == 0 &&
|
||||||
|
static_version() == 0;
|
||||||
|
}
|
||||||
@@ -0,0 +1,8 @@
|
|||||||
|
|
||||||
|
int curand_main();
|
||||||
|
int nppif_main();
|
||||||
|
|
||||||
|
int mixed_version()
|
||||||
|
{
|
||||||
|
return curand_main() == 0 && nppif_main() == 0;
|
||||||
|
}
|
||||||
@@ -0,0 +1,86 @@
|
|||||||
|
// Comes from
|
||||||
|
// https://devtalk.nvidia.com/default/topic/1037482/gpu-accelerated-libraries/help-me-help-you-with-modern-cmake-and-cuda-mwe-for-npp/post/5271066/#5271066
|
||||||
|
|
||||||
|
#include <cstdio>
|
||||||
|
#include <iostream>
|
||||||
|
|
||||||
|
#include <assert.h>
|
||||||
|
#include <cuda_runtime_api.h>
|
||||||
|
#include <nppi_filtering_functions.h>
|
||||||
|
|
||||||
|
int nppif_main()
|
||||||
|
{
|
||||||
|
/**
|
||||||
|
* 8-bit unsigned single-channel 1D row convolution.
|
||||||
|
*/
|
||||||
|
const int simgrows = 32;
|
||||||
|
const int simgcols = 32;
|
||||||
|
Npp8u *d_pSrc, *d_pDst;
|
||||||
|
const int nMaskSize = 3;
|
||||||
|
NppiSize oROI;
|
||||||
|
oROI.width = simgcols - nMaskSize;
|
||||||
|
oROI.height = simgrows;
|
||||||
|
const int simgsize = simgrows * simgcols * sizeof(d_pSrc[0]);
|
||||||
|
const int dimgsize = oROI.width * oROI.height * sizeof(d_pSrc[0]);
|
||||||
|
const int simgpix = simgrows * simgcols;
|
||||||
|
const int dimgpix = oROI.width * oROI.height;
|
||||||
|
const int nSrcStep = simgcols * sizeof(d_pSrc[0]);
|
||||||
|
const int nDstStep = oROI.width * sizeof(d_pDst[0]);
|
||||||
|
const int pixval = 1;
|
||||||
|
const int nDivisor = 1;
|
||||||
|
const Npp32s h_pKernel[nMaskSize] = { pixval, pixval, pixval };
|
||||||
|
Npp32s* d_pKernel;
|
||||||
|
const Npp32s nAnchor = 2;
|
||||||
|
cudaError_t err = cudaMalloc((void**)&d_pSrc, simgsize);
|
||||||
|
if (err != cudaSuccess) {
|
||||||
|
fprintf(stderr, "Cuda error %d\n", __LINE__);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
err = cudaMalloc((void**)&d_pDst, dimgsize);
|
||||||
|
if (err != cudaSuccess) {
|
||||||
|
fprintf(stderr, "Cuda error %d\n", __LINE__);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
err = cudaMalloc((void**)&d_pKernel, nMaskSize * sizeof(d_pKernel[0]));
|
||||||
|
if (err != cudaSuccess) {
|
||||||
|
fprintf(stderr, "Cuda error %d\n", __LINE__);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
// set image to pixval initially
|
||||||
|
err = cudaMemset(d_pSrc, pixval, simgsize);
|
||||||
|
if (err != cudaSuccess) {
|
||||||
|
fprintf(stderr, "Cuda error %d\n", __LINE__);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
err = cudaMemset(d_pDst, 0, dimgsize);
|
||||||
|
if (err != cudaSuccess) {
|
||||||
|
fprintf(stderr, "Cuda error %d\n", __LINE__);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
err = cudaMemcpy(d_pKernel, h_pKernel, nMaskSize * sizeof(d_pKernel[0]),
|
||||||
|
cudaMemcpyHostToDevice);
|
||||||
|
if (err != cudaSuccess) {
|
||||||
|
fprintf(stderr, "Cuda error %d\n", __LINE__);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
// copy src to dst
|
||||||
|
NppStatus ret =
|
||||||
|
nppiFilterRow_8u_C1R(d_pSrc, nSrcStep, d_pDst, nDstStep, oROI, d_pKernel,
|
||||||
|
nMaskSize, nAnchor, nDivisor);
|
||||||
|
assert(ret == NPP_NO_ERROR);
|
||||||
|
Npp8u* h_imgres = new Npp8u[dimgpix];
|
||||||
|
err = cudaMemcpy(h_imgres, d_pDst, dimgsize, cudaMemcpyDeviceToHost);
|
||||||
|
if (err != cudaSuccess) {
|
||||||
|
fprintf(stderr, "Cuda error %d\n", __LINE__);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
// test for filtering
|
||||||
|
for (int i = 0; i < dimgpix; i++) {
|
||||||
|
if (h_imgres[i] != (pixval * pixval * nMaskSize)) {
|
||||||
|
fprintf(stderr, "h_imgres at index %d failed to match\n", i);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
@@ -0,0 +1,8 @@
|
|||||||
|
|
||||||
|
int curand_main();
|
||||||
|
int nppif_main();
|
||||||
|
|
||||||
|
int shared_version()
|
||||||
|
{
|
||||||
|
return curand_main() == 0 && nppif_main() == 0;
|
||||||
|
}
|
||||||
@@ -0,0 +1,8 @@
|
|||||||
|
|
||||||
|
int curand_main();
|
||||||
|
int nppif_main();
|
||||||
|
|
||||||
|
int static_version()
|
||||||
|
{
|
||||||
|
return curand_main() == 0 && nppif_main() == 0;
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user