From 3975678fcc3928f2a7dcd79fe9b9e9ebf3abe2b2 Mon Sep 17 00:00:00 2001 From: root Date: Tue, 27 Jul 2021 23:38:36 +0300 Subject: [PATCH] CUDA/Clang: Simplify --register-link-binaries logic Move the logic for appending cubin afterwards, so the check can simply be empty(). With the Makefile generator the option is now at the front instead of being intermixed with the actual bins. --- Source/cmMakefileTargetGenerator.cxx | 28 ++++++++++++------------- Source/cmNinjaNormalTargetGenerator.cxx | 10 ++++----- 2 files changed, 19 insertions(+), 19 deletions(-) diff --git a/Source/cmMakefileTargetGenerator.cxx b/Source/cmMakefileTargetGenerator.cxx index 6324b2ef83..98c61fe8be 100644 --- a/Source/cmMakefileTargetGenerator.cxx +++ b/Source/cmMakefileTargetGenerator.cxx @@ -1519,6 +1519,20 @@ void cmMakefileTargetGenerator::WriteDeviceLinkRule( // Link device code for each architecture. for (const std::string& architectureKind : architectures) { + std::string registerFileCmd; + + // The generated register file contains macros that when expanded + // register the device routines. Because the routines are the same for + // all architectures the register file will be the same too. Thus + // generate it only on the first invocation to reduce overhead. + if (fatbinaryDepends.empty()) { + std::string const registerFileRel = + cmStrCat(relPath, relObjectDir, "cmake_cuda_register.h"); + registerFileCmd = + cmStrCat(" --register-link-binaries=", registerFileRel); + cleanFiles.push_back(registerFileRel); + } + // Clang always generates real code, so strip the specifier. const std::string architecture = architectureKind.substr(0, architectureKind.find('-')); @@ -1528,20 +1542,6 @@ void cmMakefileTargetGenerator::WriteDeviceLinkRule( profiles += cmStrCat(" -im=profile=sm_", architecture, ",file=", cubin); fatbinaryDepends.emplace_back(cubin); - std::string registerFileCmd; - - // The generated register file contains macros that when expanded - // register the device routines. Because the routines are the same for - // all architectures the register file will be the same too. Thus - // generate it only on the first invocation to reduce overhead. - if (fatbinaryDepends.size() == 1) { - std::string const registerFileRel = - cmStrCat(relPath, relObjectDir, "cmake_cuda_register.h"); - registerFileCmd = - cmStrCat(" --register-link-binaries=", registerFileRel); - cleanFiles.push_back(registerFileRel); - } - std::string command = cmStrCat( this->Makefile->GetRequiredDefinition("CMAKE_CUDA_DEVICE_LINKER"), " -arch=sm_", architecture, registerFileCmd, " -o=$@ ", diff --git a/Source/cmNinjaNormalTargetGenerator.cxx b/Source/cmNinjaNormalTargetGenerator.cxx index 5a4c6521d8..493bd4ac85 100644 --- a/Source/cmNinjaNormalTargetGenerator.cxx +++ b/Source/cmNinjaNormalTargetGenerator.cxx @@ -753,10 +753,6 @@ void cmNinjaNormalTargetGenerator::WriteDeviceLinkStatements( const std::string cubin = cmStrCat(ninjaOutputDir, "/sm_", architecture, ".cubin"); - fatbinary.Variables["PROFILES"] += - cmStrCat(" -im=profile=sm_", architecture, ",file=", cubin); - fatbinary.ExplicitDeps.emplace_back(cubin); - cmNinjaBuild dlink(this->LanguageLinkerCudaDeviceRule(config)); dlink.ExplicitDeps = explicitDeps; dlink.Outputs = { cubin }; @@ -766,11 +762,15 @@ void cmNinjaNormalTargetGenerator::WriteDeviceLinkStatements( // the device routines. Because the routines are the same for all // architectures the register file will be the same too. Thus generate it // only on the first invocation to reduce overhead. - if (fatbinary.ExplicitDeps.size() == 1) { + if (fatbinary.ExplicitDeps.empty()) { dlink.Variables["REGISTER"] = cmStrCat( "--register-link-binaries=", ninjaOutputDir, "/cmake_cuda_register.h"); } + fatbinary.Variables["PROFILES"] += + cmStrCat(" -im=profile=sm_", architecture, ",file=", cubin); + fatbinary.ExplicitDeps.emplace_back(cubin); + this->GetGlobalGenerator()->WriteBuild(this->GetCommonFileStream(), dlink); }