mirror of
https://github.com/Kitware/CMake.git
synced 2026-01-06 21:59:54 -06:00
Merge topic 'cuda_support_system_libraries_with_device_symbols'
41eab150a8CUDA: Pass more link libraries to device linking88c7abb740CUDA: Pass host linker directories to device linker Acked-by: Kitware Robot <kwrobot@kitware.com> Merge-request: !1634
This commit is contained in:
@@ -177,17 +177,31 @@ else()
|
||||
set(_CMAKE_CUDA_EXTRA_DEVICE_LINK_FLAGS "")
|
||||
endif()
|
||||
|
||||
# Add implicit host link directories that contain device libraries
|
||||
# to the device link line.
|
||||
set(__IMPLICT_DLINK_DIRS ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES})
|
||||
if(__IMPLICT_DLINK_DIRS)
|
||||
list(REMOVE_ITEM __IMPLICT_DLINK_DIRS ${CMAKE_CUDA_HOST_IMPLICIT_LINK_DIRECTORIES})
|
||||
endif()
|
||||
set(__IMPLICT_DLINK_FLAGS )
|
||||
foreach(dir ${__IMPLICT_DLINK_DIRS})
|
||||
if(EXISTS "${dir}/libcublas_device.a")
|
||||
string(APPEND __IMPLICT_DLINK_FLAGS " -L\"${dir}\"")
|
||||
endif()
|
||||
endforeach()
|
||||
unset(__IMPLICT_DLINK_DIRS)
|
||||
|
||||
#These are used when linking relocatable (dc) cuda code
|
||||
if(NOT CMAKE_CUDA_DEVICE_LINK_LIBRARY)
|
||||
set(CMAKE_CUDA_DEVICE_LINK_LIBRARY
|
||||
"<CMAKE_CUDA_COMPILER> ${CMAKE_CUDA_HOST_FLAGS} <CMAKE_CUDA_LINK_FLAGS> <LANGUAGE_COMPILE_FLAGS> ${CMAKE_CUDA_COMPILE_OPTIONS_PIC} ${_CMAKE_CUDA_EXTRA_DEVICE_LINK_FLAGS} -shared -dlink <OBJECTS> -o <TARGET> <LINK_LIBRARIES>")
|
||||
"<CMAKE_CUDA_COMPILER> ${CMAKE_CUDA_HOST_FLAGS} <CMAKE_CUDA_LINK_FLAGS> <LANGUAGE_COMPILE_FLAGS> ${CMAKE_CUDA_COMPILE_OPTIONS_PIC} ${_CMAKE_CUDA_EXTRA_DEVICE_LINK_FLAGS} -shared -dlink <OBJECTS> -o <TARGET> <LINK_LIBRARIES>${__IMPLICT_DLINK_FLAGS}")
|
||||
endif()
|
||||
if(NOT CMAKE_CUDA_DEVICE_LINK_EXECUTABLE)
|
||||
set(CMAKE_CUDA_DEVICE_LINK_EXECUTABLE
|
||||
"<CMAKE_CUDA_COMPILER> ${CMAKE_CUDA_HOST_FLAGS} <FLAGS> <CMAKE_CUDA_LINK_FLAGS> ${CMAKE_CUDA_COMPILE_OPTIONS_PIC} ${_CMAKE_CUDA_EXTRA_DEVICE_LINK_FLAGS} -shared -dlink <OBJECTS> -o <TARGET> <LINK_LIBRARIES>")
|
||||
"<CMAKE_CUDA_COMPILER> ${CMAKE_CUDA_HOST_FLAGS} <FLAGS> <CMAKE_CUDA_LINK_FLAGS> ${CMAKE_CUDA_COMPILE_OPTIONS_PIC} ${_CMAKE_CUDA_EXTRA_DEVICE_LINK_FLAGS} -shared -dlink <OBJECTS> -o <TARGET> <LINK_LIBRARIES>${__IMPLICT_DLINK_FLAGS}")
|
||||
endif()
|
||||
|
||||
unset(_CMAKE_CUDA_EXTRA_DEVICE_LINK_FLAGS)
|
||||
unset(__IMPLICT_DLINK_FLAGS)
|
||||
|
||||
set(CMAKE_CUDA_INFORMATION_LOADED 1)
|
||||
|
||||
@@ -36,12 +36,27 @@ else()
|
||||
set(_CMAKE_CUDA_EXTRA_DEVICE_LINK_FLAGS "")
|
||||
endif()
|
||||
|
||||
# Add implicit host link directories that contain device libraries
|
||||
# to the device link line.
|
||||
set(__IMPLICT_DLINK_DIRS ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES})
|
||||
if(__IMPLICT_DLINK_DIRS)
|
||||
list(REMOVE_ITEM __IMPLICT_DLINK_DIRS ${CMAKE_CUDA_HOST_IMPLICIT_LINK_DIRECTORIES})
|
||||
endif()
|
||||
set(__IMPLICT_DLINK_FLAGS )
|
||||
foreach(dir ${__IMPLICT_DLINK_DIRS})
|
||||
if(EXISTS "${dir}/cublas_device.lib")
|
||||
string(APPEND __IMPLICT_DLINK_FLAGS " -L\"${dir}\"")
|
||||
endif()
|
||||
endforeach()
|
||||
unset(__IMPLICT_DLINK_DIRS)
|
||||
|
||||
set(CMAKE_CUDA_DEVICE_LINK_LIBRARY
|
||||
"<CMAKE_CUDA_COMPILER> <CMAKE_CUDA_LINK_FLAGS> <LANGUAGE_COMPILE_FLAGS> ${_CMAKE_CUDA_EXTRA_DEVICE_LINK_FLAGS} -shared -dlink <OBJECTS> -o <TARGET> <LINK_LIBRARIES> -Xcompiler=-Fd<TARGET_COMPILE_PDB>,-FS")
|
||||
"<CMAKE_CUDA_COMPILER> <CMAKE_CUDA_LINK_FLAGS> <LANGUAGE_COMPILE_FLAGS> ${_CMAKE_CUDA_EXTRA_DEVICE_LINK_FLAGS} -shared -dlink <OBJECTS> -o <TARGET> <LINK_LIBRARIES> -Xcompiler=-Fd<TARGET_COMPILE_PDB>,-FS${__IMPLICT_DLINK_FLAGS}")
|
||||
set(CMAKE_CUDA_DEVICE_LINK_EXECUTABLE
|
||||
"<CMAKE_CUDA_COMPILER> <FLAGS> <CMAKE_CUDA_LINK_FLAGS> ${_CMAKE_CUDA_EXTRA_DEVICE_LINK_FLAGS} -shared -dlink <OBJECTS> -o <TARGET> <LINK_LIBRARIES> -Xcompiler=-Fd<TARGET_COMPILE_PDB>,-FS")
|
||||
"<CMAKE_CUDA_COMPILER> <FLAGS> <CMAKE_CUDA_LINK_FLAGS> ${_CMAKE_CUDA_EXTRA_DEVICE_LINK_FLAGS} -shared -dlink <OBJECTS> -o <TARGET> <LINK_LIBRARIES> -Xcompiler=-Fd<TARGET_COMPILE_PDB>,-FS${__IMPLICT_DLINK_FLAGS}")
|
||||
|
||||
unset(_CMAKE_CUDA_EXTRA_DEVICE_LINK_FLAGS)
|
||||
unset(__IMPLICT_DLINK_FLAGS)
|
||||
|
||||
string(REPLACE "/D" "-D" _PLATFORM_DEFINES_CUDA "${_PLATFORM_DEFINES}${_PLATFORM_DEFINES_CXX}")
|
||||
|
||||
|
||||
@@ -3,9 +3,9 @@
|
||||
|
||||
#include "cmLinkLineDeviceComputer.h"
|
||||
|
||||
#include <set>
|
||||
#include <sstream>
|
||||
|
||||
#include "cmAlgorithms.h"
|
||||
#include "cmComputeLinkInformation.h"
|
||||
#include "cmGeneratorTarget.h"
|
||||
#include "cmGlobalNinjaGenerator.h"
|
||||
@@ -32,38 +32,32 @@ std::string cmLinkLineDeviceComputer::ComputeLinkLibraries(
|
||||
ItemVector const& items = cli.GetItems();
|
||||
std::string config = cli.GetConfig();
|
||||
for (auto const& item : items) {
|
||||
if (!item.Target) {
|
||||
continue;
|
||||
}
|
||||
|
||||
bool skippable = false;
|
||||
switch (item.Target->GetType()) {
|
||||
case cmStateEnums::SHARED_LIBRARY:
|
||||
case cmStateEnums::MODULE_LIBRARY:
|
||||
case cmStateEnums::INTERFACE_LIBRARY:
|
||||
skippable = true;
|
||||
break;
|
||||
case cmStateEnums::STATIC_LIBRARY:
|
||||
// If a static library is resolving its device linking, it should
|
||||
// be removed for other device linking
|
||||
skippable =
|
||||
item.Target->GetPropertyAsBool("CUDA_RESOLVE_DEVICE_SYMBOLS");
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
if (skippable) {
|
||||
continue;
|
||||
}
|
||||
|
||||
std::set<std::string> langs;
|
||||
item.Target->GetLanguages(langs, config);
|
||||
if (langs.count("CUDA") == 0) {
|
||||
continue;
|
||||
if (item.Target) {
|
||||
bool skip = false;
|
||||
switch (item.Target->GetType()) {
|
||||
case cmStateEnums::MODULE_LIBRARY:
|
||||
case cmStateEnums::INTERFACE_LIBRARY:
|
||||
skip = true;
|
||||
break;
|
||||
case cmStateEnums::STATIC_LIBRARY:
|
||||
skip = item.Target->GetPropertyAsBool("CUDA_RESOLVE_DEVICE_SYMBOLS");
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
if (skip) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
if (item.IsPath) {
|
||||
// nvcc understands absolute paths to libraries ending in '.a' should
|
||||
// be passed to nvlink. Other extensions like '.so' or '.dylib' are
|
||||
// rejected by the nvcc front-end even though nvlink knows to ignore
|
||||
// them. Bypass the front-end via '-Xnvlink'.
|
||||
if (!cmHasLiteralSuffix(item.Value, ".a")) {
|
||||
fout << "-Xnvlink ";
|
||||
}
|
||||
fout << this->ConvertToOutputFormat(
|
||||
this->ConvertToLinkReference(item.Value));
|
||||
} else {
|
||||
|
||||
@@ -187,7 +187,7 @@ void cmNinjaNormalTargetGenerator::WriteDeviceLinkRule(bool useResponseFile)
|
||||
std::string responseFlag;
|
||||
if (!useResponseFile) {
|
||||
vars.Objects = "$in";
|
||||
vars.LinkLibraries = "$LINK_LIBRARIES";
|
||||
vars.LinkLibraries = "$LINK_PATH $LINK_LIBRARIES";
|
||||
} else {
|
||||
std::string cmakeVarLang = "CMAKE_";
|
||||
cmakeVarLang += this->TargetLinkLanguage;
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
ADD_TEST_MACRO(CudaOnly.EnableStandard CudaOnlyEnableStandard)
|
||||
ADD_TEST_MACRO(CudaOnly.ExportPTX CudaOnlyExportPTX)
|
||||
ADD_TEST_MACRO(CudaOnly.GPUDebugFlag CudaOnlyGPUDebugFlag)
|
||||
ADD_TEST_MACRO(CudaOnly.LinkSystemDeviceLibraries CudaOnlyLinkSystemDeviceLibraries)
|
||||
ADD_TEST_MACRO(CudaOnly.ResolveDeviceSymbols CudaOnlyResolveDeviceSymbols)
|
||||
ADD_TEST_MACRO(CudaOnly.SeparateCompilation CudaOnlySeparateCompilation)
|
||||
ADD_TEST_MACRO(CudaOnly.WithDefs CudaOnlyWithDefs)
|
||||
|
||||
15
Tests/CudaOnly/LinkSystemDeviceLibraries/CMakeLists.txt
Normal file
15
Tests/CudaOnly/LinkSystemDeviceLibraries/CMakeLists.txt
Normal file
@@ -0,0 +1,15 @@
|
||||
cmake_minimum_required(VERSION 3.8)
|
||||
project(CudaOnlyLinkSystemDeviceLibraries CUDA)
|
||||
|
||||
string(APPEND CMAKE_CUDA_FLAGS " -gencode arch=compute_35,code=compute_35 -gencode arch=compute_35,code=sm_35")
|
||||
set(CMAKE_CUDA_STANDARD 11)
|
||||
|
||||
add_executable(CudaOnlyLinkSystemDeviceLibraries main.cu)
|
||||
set_target_properties( CudaOnlyLinkSystemDeviceLibraries
|
||||
PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
|
||||
target_link_libraries( CudaOnlyLinkSystemDeviceLibraries PRIVATE cublas_device)
|
||||
|
||||
if(APPLE)
|
||||
# Help the static cuda runtime find the driver (libcuda.dyllib) at runtime.
|
||||
set_property(TARGET CudaOnlyLinkSystemDeviceLibraries PROPERTY BUILD_RPATH ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES})
|
||||
endif()
|
||||
77
Tests/CudaOnly/LinkSystemDeviceLibraries/main.cu
Normal file
77
Tests/CudaOnly/LinkSystemDeviceLibraries/main.cu
Normal file
@@ -0,0 +1,77 @@
|
||||
|
||||
#include <cublas_v2.h>
|
||||
#include <cuda_runtime.h>
|
||||
#include <iostream>
|
||||
|
||||
__global__ void deviceCublasSgemm(int n, float alpha, float beta,
|
||||
const float* d_A, const float* d_B,
|
||||
float* d_C)
|
||||
{
|
||||
cublasHandle_t cnpHandle;
|
||||
cublasStatus_t status = cublasCreate(&cnpHandle);
|
||||
|
||||
if (status != CUBLAS_STATUS_SUCCESS) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Call function defined in the cublas_device system static library.
|
||||
// This way we can verify that we properly pass system libraries to the
|
||||
// device link line
|
||||
status = cublasSgemm(cnpHandle, CUBLAS_OP_N, CUBLAS_OP_N, n, n, n, &alpha,
|
||||
d_A, n, d_B, n, &beta, d_C, n);
|
||||
|
||||
cublasDestroy(cnpHandle);
|
||||
}
|
||||
|
||||
int choose_cuda_device()
|
||||
{
|
||||
int nDevices = 0;
|
||||
cudaError_t err = cudaGetDeviceCount(&nDevices);
|
||||
if (err != cudaSuccess) {
|
||||
std::cerr << "Failed to retrieve the number of CUDA enabled devices"
|
||||
<< std::endl;
|
||||
return 1;
|
||||
}
|
||||
for (int i = 0; i < nDevices; ++i) {
|
||||
cudaDeviceProp prop;
|
||||
cudaError_t err = cudaGetDeviceProperties(&prop, i);
|
||||
if (err != cudaSuccess) {
|
||||
std::cerr << "Could not retrieve properties from CUDA device " << i
|
||||
<< std::endl;
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (prop.major > 3 || (prop.major == 3 && prop.minor >= 5)) {
|
||||
err = cudaSetDevice(i);
|
||||
if (err != cudaSuccess) {
|
||||
std::cout << "Could not select CUDA device " << i << std::endl;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::cout << "Could not find a CUDA enabled card supporting compute >=3.5"
|
||||
<< std::endl;
|
||||
return 1;
|
||||
}
|
||||
|
||||
int main(int argc, char** argv)
|
||||
{
|
||||
int ret = choose_cuda_device();
|
||||
if (ret) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
// initial values that will make sure that the cublasSgemm won't actually
|
||||
// do any work
|
||||
int n = 0;
|
||||
float alpha = 1;
|
||||
float beta = 1;
|
||||
float* d_A = nullptr;
|
||||
float* d_B = nullptr;
|
||||
float* d_C = nullptr;
|
||||
deviceCublasSgemm<<<1, 1>>>(n, alpha, beta, d_A, d_B, d_C);
|
||||
|
||||
return 0;
|
||||
}
|
||||
Reference in New Issue
Block a user