CUDA: Visual Studio Generator propagates definitions for PTX files

From CUDA 9.0 to CUDA 11.4 the CUDA Visual Studio integration
defines omitted user defines from PTX generation.

With CUDA 11.5 this has been resolved, so we backport the fix
to allow for consistent behavior when using CMake
This commit is contained in:
unknown
2021-12-08 09:19:31 -05:00
committed by Robert Maynard
parent 065604b2b3
commit 574b492b47
4 changed files with 25 additions and 0 deletions

View File

@@ -3228,6 +3228,8 @@ bool cmVisualStudio10TargetGenerator::ComputeCudaOptions(
this->LocalGenerator, Options::CudaCompiler, gg->GetCudaFlagTable());
Options& cudaOptions = *pOptions;
auto cudaVersion = this->GlobalGenerator->GetPlatformToolsetCudaString();
// Get compile flags for CUDA in this directory.
std::string flags;
this->LocalGenerator->AddLanguageFlags(flags, this->GeneratorTarget, "CUDA",
@@ -3263,7 +3265,22 @@ bool cmVisualStudio10TargetGenerator::ComputeCudaOptions(
// to not have the source file extension at all
cudaOptions.AddFlag("CompileOut", "$(IntDir)%(Filename).ptx");
notPtx = false;
if (cmSystemTools::VersionCompare(cmSystemTools::OP_GREATER_EQUAL,
cudaVersion, "9.0") &&
cmSystemTools::VersionCompare(cmSystemTools::OP_LESS, cudaVersion,
"11.5")) {
// The DriverApi flag before 11.5 ( verified back to 9.0 ) which controls
// PTX compilation doesn't propagate user defines causing
// target_compile_definitions to behave differently for VS +
// PTX compared to other generators so we patch the rules
// to normalize behavior
cudaOptions.AddFlag("DriverApiCommandLineTemplate",
"%(BaseCommandLineTemplate) [CompileOut] [FastMath] "
"[Defines] \"%(FullPath)\"");
}
}
if (notPtx &&
cmSystemTools::VersionCompareGreaterEq(
"8.0", this->GlobalGenerator->GetPlatformToolsetCudaString())) {

View File

@@ -11,6 +11,7 @@ list(SUBLIST CMAKE_CUDA_ARCHITECTURES 0 1 CMAKE_CUDA_ARCHITECTURES)
string(APPEND CMAKE_CUDA_ARCHITECTURES "-virtual")
add_library(CudaPTX OBJECT kernelA.cu kernelB.cu)
target_compile_definitions(CudaPTX PRIVATE "CUDA_PTX_COMPILATION")
set_property(TARGET CudaPTX PROPERTY CUDA_PTX_COMPILATION ON)
#Test ObjectFiles with file(GENERATE)

View File

@@ -1,4 +1,8 @@
#ifndef CUDA_PTX_COMPILATION
# error "CUDA_PTX_COMPILATION define not provided"
#endif
__global__ void kernelA(float* r, float* x, float* y, float* z, int size)
{
for (int i = threadIdx.x; i < size; i += blockDim.x) {

View File

@@ -1,4 +1,7 @@
#ifndef CUDA_PTX_COMPILATION
# error "CUDA_PTX_COMPILATION define not provided"
#endif
__global__ void kernelB(float* r, float* x, float* y, float* z, int size)
{