CUDA: Visual Studio now properly delays device linking

This commit is contained in:
Robert Maynard
2017-04-25 16:01:09 -04:00
parent 3cb7048b52
commit a36fb229ba
5 changed files with 148 additions and 8 deletions

View File

@@ -116,6 +116,10 @@ cmVisualStudio10TargetGenerator::~cmVisualStudio10TargetGenerator()
i != this->CudaOptions.end(); ++i) {
delete i->second;
}
for (OptionsMap::iterator i = this->CudaLinkOptions.begin();
i != this->CudaLinkOptions.end(); ++i) {
delete i->second;
}
if (!this->BuildFileStream) {
return;
}
@@ -213,6 +217,9 @@ void cmVisualStudio10TargetGenerator::Generate()
if (!this->ComputeCudaOptions()) {
return;
}
if (!this->ComputeCudaLinkOptions()) {
return;
}
if (!this->ComputeMasmOptions()) {
return;
}
@@ -2524,6 +2531,66 @@ void cmVisualStudio10TargetGenerator::WriteCudaOptions(
this->WriteString("</CudaCompile>\n", 2);
}
bool cmVisualStudio10TargetGenerator::ComputeCudaLinkOptions()
{
if (!this->GlobalGenerator->IsCudaEnabled()) {
return true;
}
for (std::vector<std::string>::const_iterator i =
this->Configurations.begin();
i != this->Configurations.end(); ++i) {
if (!this->ComputeCudaLinkOptions(*i)) {
return false;
}
}
return true;
}
bool cmVisualStudio10TargetGenerator::ComputeCudaLinkOptions(
std::string const& configName)
{
cmGlobalVisualStudio10Generator* gg =
static_cast<cmGlobalVisualStudio10Generator*>(this->GlobalGenerator);
CM_AUTO_PTR<Options> pOptions(new Options(
this->LocalGenerator, Options::CudaCompiler, gg->GetCudaFlagTable()));
Options& cudaLinkOptions = *pOptions;
// Determine if we need to do a device link
bool doDeviceLinking = false;
switch (this->GeneratorTarget->GetType()) {
case cmStateEnums::SHARED_LIBRARY:
case cmStateEnums::MODULE_LIBRARY:
case cmStateEnums::EXECUTABLE:
doDeviceLinking = true;
break;
default:
break;
}
cudaLinkOptions.AddFlag("PerformDeviceLink",
doDeviceLinking ? "true" : "false");
this->CudaLinkOptions[configName] = pOptions.release();
return true;
}
void cmVisualStudio10TargetGenerator::WriteCudaLinkOptions(
std::string const& configName)
{
if (this->GeneratorTarget->GetType() > cmStateEnums::MODULE_LIBRARY) {
return;
}
if (!this->MSTools || !this->GlobalGenerator->IsCudaEnabled()) {
return;
}
this->WriteString("<CudaLink>\n", 2);
Options& cudaLinkOptions = *(this->CudaLinkOptions[configName]);
cudaLinkOptions.OutputFlagMap(*this->BuildFileStream, " ");
this->WriteString("</CudaLink>\n", 2);
}
bool cmVisualStudio10TargetGenerator::ComputeMasmOptions()
{
if (!this->GlobalGenerator->IsMasmEnabled()) {
@@ -3283,6 +3350,7 @@ void cmVisualStudio10TargetGenerator::WriteItemDefinitionGroups()
}
// output link flags <Link></Link>
this->WriteLinkOptions(*i);
this->WriteCudaLinkOptions(*i);
// output lib flags <Lib></Lib>
this->WriteLibOptions(*i);
// output manifest flags <Manifest></Manifest>

View File

@@ -101,6 +101,11 @@ private:
bool ComputeCudaOptions(std::string const& config);
void WriteCudaOptions(std::string const& config,
std::vector<std::string> const& includes);
bool ComputeCudaLinkOptions();
bool ComputeCudaLinkOptions(std::string const& config);
void WriteCudaLinkOptions(std::string const& config);
bool ComputeMasmOptions();
bool ComputeMasmOptions(std::string const& config);
void WriteMasmOptions(std::string const& config,
@@ -154,6 +159,7 @@ private:
OptionsMap ClOptions;
OptionsMap RcOptions;
OptionsMap CudaOptions;
OptionsMap CudaLinkOptions;
OptionsMap MasmOptions;
OptionsMap NasmOptions;
OptionsMap LinkOptions;

View File

@@ -37,7 +37,7 @@ EXPORT int choose_cuda_device()
<< std::endl;
return 1;
}
if (prop.major >= 4) {
if (prop.major >= 3) {
err = cudaSetDevice(i);
if (err != cudaSuccess) {
std::cout << "Could not select CUDA device " << i << std::endl;

View File

@@ -12,6 +12,7 @@ project (CudaOnlySeparateCompilation CUDA)
string(APPEND CMAKE_CUDA_FLAGS " -gencode arch=compute_30,code=compute_30")
set(CMAKE_CXX_STANDARD 11)
set(CMAKE_CUDA_STANDARD 11)
add_library(CUDASeparateLibA STATIC file1.cu file2.cu file3.cu)
#Having file4/file5 in a shared library causes serious problems
@@ -22,12 +23,24 @@ add_library(CUDASeparateLibB STATIC file4.cu file5.cu)
target_link_libraries(CUDASeparateLibB PRIVATE CUDASeparateLibA)
add_executable(CudaOnlySeparateCompilation main.cu)
target_link_libraries(CudaOnlySeparateCompilation PRIVATE CUDASeparateLibB)
target_link_libraries(CudaOnlySeparateCompilation
PRIVATE CUDASeparateLibB)
set_target_properties( CUDASeparateLibA
CUDASeparateLibB
PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
set_target_properties(CUDASeparateLibA
CUDASeparateLibB
PROPERTIES CUDA_SEPARABLE_COMPILATION ON
POSITION_INDEPENDENT_CODE ON)
set_target_properties( CUDASeparateLibA
CUDASeparateLibB
PROPERTIES POSITION_INDEPENDENT_CODE ON)
if (CMAKE_GENERATOR MATCHES "^Visual Studio")
#Visual Studio CUDA integration will not perform device linking
#on a target that itself does not have GenerateRelocatableDeviceCode
#enabled.
set_target_properties(CudaOnlySeparateCompilation
PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
endif()
if (APPLE)
# We need to add the default path to the driver (libcuda.dylib) as an rpath, so that
# the static cuda runtime can find it at runtime.
target_link_libraries(CudaOnlySeparateCompilation PRIVATE -Wl,-rpath,/usr/local/cuda/lib)
endif()

View File

@@ -7,9 +7,62 @@
int file4_launch_kernel(int x);
int file5_launch_kernel(int x);
int choose_cuda_device()
{
int nDevices = 0;
cudaError_t err = cudaGetDeviceCount(&nDevices);
if (err != cudaSuccess) {
std::cerr << "Failed to retrieve the number of CUDA enabled devices"
<< std::endl;
return 1;
}
for (int i = 0; i < nDevices; ++i) {
cudaDeviceProp prop;
cudaError_t err = cudaGetDeviceProperties(&prop, i);
if (err != cudaSuccess) {
std::cerr << "Could not retrieve properties from CUDA device " << i
<< std::endl;
return 1;
}
if (prop.major >= 3) {
err = cudaSetDevice(i);
if (err != cudaSuccess) {
std::cout << "Could not select CUDA device " << i << std::endl;
} else {
return 0;
}
}
}
std::cout << "Could not find a CUDA enabled card supporting compute >=3.0"
<< std::endl;
return 1;
}
int main(int argc, char** argv)
{
int ret = choose_cuda_device();
if (ret) {
return 0;
}
cudaError_t err;
file4_launch_kernel(42);
err = cudaGetLastError();
if (err != cudaSuccess) {
std::cerr << "file4_launch_kernel: kernel launch failed: "
<< cudaGetErrorString(err) << std::endl;
return 1;
}
file5_launch_kernel(42);
err = cudaGetLastError();
if (err != cudaSuccess) {
std::cerr << "file5_launch_kernel: kernel launch failed: "
<< cudaGetErrorString(err) << std::endl;
return 1;
}
return 0;
}