diff --git a/Source/cmMakefileTargetGenerator.cxx b/Source/cmMakefileTargetGenerator.cxx index 6324b2ef83..98c61fe8be 100644 --- a/Source/cmMakefileTargetGenerator.cxx +++ b/Source/cmMakefileTargetGenerator.cxx @@ -1519,6 +1519,20 @@ void cmMakefileTargetGenerator::WriteDeviceLinkRule( // Link device code for each architecture. for (const std::string& architectureKind : architectures) { + std::string registerFileCmd; + + // The generated register file contains macros that when expanded + // register the device routines. Because the routines are the same for + // all architectures the register file will be the same too. Thus + // generate it only on the first invocation to reduce overhead. + if (fatbinaryDepends.empty()) { + std::string const registerFileRel = + cmStrCat(relPath, relObjectDir, "cmake_cuda_register.h"); + registerFileCmd = + cmStrCat(" --register-link-binaries=", registerFileRel); + cleanFiles.push_back(registerFileRel); + } + // Clang always generates real code, so strip the specifier. const std::string architecture = architectureKind.substr(0, architectureKind.find('-')); @@ -1528,20 +1542,6 @@ void cmMakefileTargetGenerator::WriteDeviceLinkRule( profiles += cmStrCat(" -im=profile=sm_", architecture, ",file=", cubin); fatbinaryDepends.emplace_back(cubin); - std::string registerFileCmd; - - // The generated register file contains macros that when expanded - // register the device routines. Because the routines are the same for - // all architectures the register file will be the same too. Thus - // generate it only on the first invocation to reduce overhead. - if (fatbinaryDepends.size() == 1) { - std::string const registerFileRel = - cmStrCat(relPath, relObjectDir, "cmake_cuda_register.h"); - registerFileCmd = - cmStrCat(" --register-link-binaries=", registerFileRel); - cleanFiles.push_back(registerFileRel); - } - std::string command = cmStrCat( this->Makefile->GetRequiredDefinition("CMAKE_CUDA_DEVICE_LINKER"), " -arch=sm_", architecture, registerFileCmd, " -o=$@ ", diff --git a/Source/cmNinjaNormalTargetGenerator.cxx b/Source/cmNinjaNormalTargetGenerator.cxx index 5a4c6521d8..493bd4ac85 100644 --- a/Source/cmNinjaNormalTargetGenerator.cxx +++ b/Source/cmNinjaNormalTargetGenerator.cxx @@ -753,10 +753,6 @@ void cmNinjaNormalTargetGenerator::WriteDeviceLinkStatements( const std::string cubin = cmStrCat(ninjaOutputDir, "/sm_", architecture, ".cubin"); - fatbinary.Variables["PROFILES"] += - cmStrCat(" -im=profile=sm_", architecture, ",file=", cubin); - fatbinary.ExplicitDeps.emplace_back(cubin); - cmNinjaBuild dlink(this->LanguageLinkerCudaDeviceRule(config)); dlink.ExplicitDeps = explicitDeps; dlink.Outputs = { cubin }; @@ -766,11 +762,15 @@ void cmNinjaNormalTargetGenerator::WriteDeviceLinkStatements( // the device routines. Because the routines are the same for all // architectures the register file will be the same too. Thus generate it // only on the first invocation to reduce overhead. - if (fatbinary.ExplicitDeps.size() == 1) { + if (fatbinary.ExplicitDeps.empty()) { dlink.Variables["REGISTER"] = cmStrCat( "--register-link-binaries=", ninjaOutputDir, "/cmake_cuda_register.h"); } + fatbinary.Variables["PROFILES"] += + cmStrCat(" -im=profile=sm_", architecture, ",file=", cubin); + fatbinary.ExplicitDeps.emplace_back(cubin); + this->GetGlobalGenerator()->WriteBuild(this->GetCommonFileStream(), dlink); }