mirror of
https://github.com/Kitware/CMake.git
synced 2026-02-21 14:40:26 -06:00
CUDA: Add support for CUBIN, FATBIN, and OPTIXIR compilation
This commit is contained in:
@@ -128,7 +128,10 @@ syn keyword cmakeProperty contained
|
||||
\ CPACK_WIX_ACL
|
||||
\ CROSSCOMPILING_EMULATOR
|
||||
\ CUDA_ARCHITECTURES
|
||||
\ CUDA_CUBIN_COMPILATION
|
||||
\ CUDA_EXTENSIONS
|
||||
\ CUDA_FATBIN_COMPILATION
|
||||
\ CUDA_OPTIX_COMPILATION
|
||||
\ CUDA_PTX_COMPILATION
|
||||
\ CUDA_RESOLVE_DEVICE_SYMBOLS
|
||||
\ CUDA_RUNTIME_LIBRARY
|
||||
|
||||
@@ -175,7 +175,10 @@ Properties on Targets
|
||||
/prop_tgt/CONFIG_POSTFIX
|
||||
/prop_tgt/CROSSCOMPILING_EMULATOR
|
||||
/prop_tgt/CUDA_ARCHITECTURES
|
||||
/prop_tgt/CUDA_CUBIN_COMPILATION
|
||||
/prop_tgt/CUDA_EXTENSIONS
|
||||
/prop_tgt/CUDA_FATBIN_COMPILATION
|
||||
/prop_tgt/CUDA_OPTIX_COMPILATION
|
||||
/prop_tgt/CUDA_PTX_COMPILATION
|
||||
/prop_tgt/CUDA_RESOLVE_DEVICE_SYMBOLS
|
||||
/prop_tgt/CUDA_RUNTIME_LIBRARY
|
||||
|
||||
14
Help/prop_tgt/CUDA_CUBIN_COMPILATION.rst
Normal file
14
Help/prop_tgt/CUDA_CUBIN_COMPILATION.rst
Normal file
@@ -0,0 +1,14 @@
|
||||
CUDA_CUBIN_COMPILATION
|
||||
----------------------
|
||||
|
||||
.. versionadded:: 3.27
|
||||
|
||||
Compile CUDA sources to ``.cubin`` files instead of ``.obj`` files
|
||||
within :ref:`Object Libraries`.
|
||||
|
||||
For example:
|
||||
|
||||
.. code-block:: cmake
|
||||
|
||||
add_library(mycubin OBJECT a.cu b.cu)
|
||||
set_property(TARGET mycubin PROPERTY CUDA_CUBIN_COMPILATION ON)
|
||||
14
Help/prop_tgt/CUDA_FATBIN_COMPILATION.rst
Normal file
14
Help/prop_tgt/CUDA_FATBIN_COMPILATION.rst
Normal file
@@ -0,0 +1,14 @@
|
||||
CUDA_FATBIN_COMPILATION
|
||||
-----------------------
|
||||
|
||||
.. versionadded:: 3.27
|
||||
|
||||
Compile CUDA sources to ``.fatbin`` files instead of ``.obj`` files
|
||||
within :ref:`Object Libraries`.
|
||||
|
||||
For example:
|
||||
|
||||
.. code-block:: cmake
|
||||
|
||||
add_library(myfbins OBJECT a.cu b.cu)
|
||||
set_property(TARGET myfbins PROPERTY CUDA_FATBIN_COMPILATION ON)
|
||||
14
Help/prop_tgt/CUDA_OPTIX_COMPILATION.rst
Normal file
14
Help/prop_tgt/CUDA_OPTIX_COMPILATION.rst
Normal file
@@ -0,0 +1,14 @@
|
||||
CUDA_OPTIX_COMPILATION
|
||||
----------------------
|
||||
|
||||
.. versionadded:: 3.27
|
||||
|
||||
Compile CUDA sources to ``.optixir`` files instead of ``.obj`` files
|
||||
within :ref:`Object Libraries`.
|
||||
|
||||
For example:
|
||||
|
||||
.. code-block:: cmake
|
||||
|
||||
add_library(myoptix OBJECT a.cu b.cu)
|
||||
set_property(TARGET myoptix PROPERTY CUDA_OPTIX_COMPILATION ON)
|
||||
14
Help/release/dev/cuda-support-new-compile-modes.rst
Normal file
14
Help/release/dev/cuda-support-new-compile-modes.rst
Normal file
@@ -0,0 +1,14 @@
|
||||
cuda-support-new-compile-modes
|
||||
------------------------------
|
||||
|
||||
* A :prop_tgt:`CUDA_CUBIN_COMPILATION` target property was added to
|
||||
:ref:`Object Libraries` to support compiling to ``.cubin`` files
|
||||
instead of host object files. Currently only supported with NVIDIA.
|
||||
|
||||
* A :prop_tgt:`CUDA_FATBIN_COMPILATION` target property was added to
|
||||
:ref:`Object Libraries` to support compiling to ``.fatbin`` files
|
||||
instead of host object files. Currently only supported with NVIDIA.
|
||||
|
||||
* A :prop_tgt:`CUDA_OPTIX_COMPILATION` target property was added to
|
||||
:ref:`Object Libraries` to support compiling to ``.optixir`` files
|
||||
instead of host object files. Currently only supported with NVIDIA.
|
||||
@@ -134,7 +134,6 @@ include(CMakeCommonLanguageInclude)
|
||||
# CMAKE_CUDA_CREATE_SHARED_LIBRARY
|
||||
# CMAKE_CUDA_CREATE_SHARED_MODULE
|
||||
# CMAKE_CUDA_COMPILE_WHOLE_COMPILATION
|
||||
# CMAKE_CUDA_COMPILE_PTX_COMPILATION
|
||||
# CMAKE_CUDA_COMPILE_SEPARABLE_COMPILATION
|
||||
# CMAKE_CUDA_LINK_EXECUTABLE
|
||||
|
||||
|
||||
@@ -8,6 +8,11 @@ set(_CMAKE_COMPILE_AS_CUDA_FLAG "-x cu")
|
||||
set(_CMAKE_CUDA_WHOLE_FLAG "-c")
|
||||
set(_CMAKE_CUDA_RDC_FLAG "-rdc=true")
|
||||
set(_CMAKE_CUDA_PTX_FLAG "-ptx")
|
||||
set(_CMAKE_CUDA_CUBIN_FLAG "-cubin")
|
||||
set(_CMAKE_CUDA_FATBIN_FLAG "-fatbin")
|
||||
if(CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL "11.7.0")
|
||||
set(_CMAKE_CUDA_OPTIX_FLAG "-optix-ir")
|
||||
endif()
|
||||
|
||||
if (CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 10.2.89)
|
||||
# The -forward-unknown-to-host-compiler flag was only
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
#include "cmGeneratorTarget.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <array>
|
||||
#include <cassert>
|
||||
#include <cerrno>
|
||||
#include <cstddef>
|
||||
@@ -1000,12 +1001,27 @@ const std::string& cmGeneratorTarget::GetObjectName(cmSourceFile const* file)
|
||||
|
||||
const char* cmGeneratorTarget::GetCustomObjectExtension() const
|
||||
{
|
||||
static std::string extension;
|
||||
const bool has_ptx_extension =
|
||||
this->GetPropertyAsBool("CUDA_PTX_COMPILATION");
|
||||
if (has_ptx_extension) {
|
||||
extension = ".ptx";
|
||||
return extension.c_str();
|
||||
struct compiler_mode
|
||||
{
|
||||
std::string variable;
|
||||
std::string extension;
|
||||
};
|
||||
static std::array<compiler_mode, 4> const modes{
|
||||
{ { "CUDA_PTX_COMPILATION", ".ptx" },
|
||||
{ "CUDA_CUBIN_COMPILATION", ".cubin" },
|
||||
{ "CUDA_FATBIN_COMPILATION", ".fatbin" },
|
||||
{ "CUDA_OPTIX_COMPILATION", ".optixir" } }
|
||||
};
|
||||
|
||||
std::string const& compiler =
|
||||
this->Makefile->GetSafeDefinition("CMAKE_CUDA_COMPILER_ID");
|
||||
if (!compiler.empty()) {
|
||||
for (const auto& m : modes) {
|
||||
const bool has_extension = this->GetPropertyAsBool(m.variable);
|
||||
if (has_extension) {
|
||||
return m.extension.c_str();
|
||||
}
|
||||
}
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
#include "cmMakefileTargetGenerator.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <array>
|
||||
#include <cassert>
|
||||
#include <cstdio>
|
||||
#include <iterator>
|
||||
@@ -977,11 +978,23 @@ void cmMakefileTargetGenerator::WriteObjectRuleFiles(
|
||||
this->Makefile->GetRequiredDefinition("_CMAKE_CUDA_RDC_FLAG");
|
||||
cudaCompileMode = cmStrCat(cudaCompileMode, rdcFlag, " ");
|
||||
}
|
||||
if (this->GeneratorTarget->GetPropertyAsBool("CUDA_PTX_COMPILATION")) {
|
||||
const std::string& ptxFlag =
|
||||
this->Makefile->GetRequiredDefinition("_CMAKE_CUDA_PTX_FLAG");
|
||||
cudaCompileMode = cmStrCat(cudaCompileMode, ptxFlag);
|
||||
} else {
|
||||
|
||||
static std::array<cm::string_view, 4> const compileModes{
|
||||
{ "PTX"_s, "CUBIN"_s, "FATBIN"_s, "OPTIX"_s }
|
||||
};
|
||||
bool useNormalCompileMode = true;
|
||||
for (cm::string_view mode : compileModes) {
|
||||
auto propName = cmStrCat("CUDA_", mode, "_COMPILATION");
|
||||
auto defName = cmStrCat("_CMAKE_CUDA_", mode, "_FLAG");
|
||||
if (this->GeneratorTarget->GetPropertyAsBool(propName)) {
|
||||
const std::string& flag =
|
||||
this->Makefile->GetRequiredDefinition(defName);
|
||||
cudaCompileMode = cmStrCat(cudaCompileMode, flag);
|
||||
useNormalCompileMode = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (useNormalCompileMode) {
|
||||
const std::string& wholeFlag =
|
||||
this->Makefile->GetRequiredDefinition("_CMAKE_CUDA_WHOLE_FLAG");
|
||||
cudaCompileMode = cmStrCat(cudaCompileMode, wholeFlag);
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
#include "cmNinjaTargetGenerator.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <array>
|
||||
#include <cassert>
|
||||
#include <functional>
|
||||
#include <iterator>
|
||||
@@ -859,11 +860,22 @@ void cmNinjaTargetGenerator::WriteCompileRule(const std::string& lang,
|
||||
this->Makefile->GetRequiredDefinition("_CMAKE_CUDA_RDC_FLAG");
|
||||
cudaCompileMode = cmStrCat(cudaCompileMode, rdcFlag, " ");
|
||||
}
|
||||
if (this->GeneratorTarget->GetPropertyAsBool("CUDA_PTX_COMPILATION")) {
|
||||
const std::string& ptxFlag =
|
||||
this->Makefile->GetRequiredDefinition("_CMAKE_CUDA_PTX_FLAG");
|
||||
cudaCompileMode = cmStrCat(cudaCompileMode, ptxFlag);
|
||||
} else {
|
||||
static std::array<cm::string_view, 4> const compileModes{
|
||||
{ "PTX"_s, "CUBIN"_s, "FATBIN"_s, "OPTIX"_s }
|
||||
};
|
||||
bool useNormalCompileMode = true;
|
||||
for (cm::string_view mode : compileModes) {
|
||||
auto propName = cmStrCat("CUDA_", mode, "_COMPILATION");
|
||||
auto defName = cmStrCat("_CMAKE_CUDA_", mode, "_FLAG");
|
||||
if (this->GeneratorTarget->GetPropertyAsBool(propName)) {
|
||||
const std::string& flag =
|
||||
this->Makefile->GetRequiredDefinition(defName);
|
||||
cudaCompileMode = cmStrCat(cudaCompileMode, flag);
|
||||
useNormalCompileMode = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (useNormalCompileMode) {
|
||||
const std::string& wholeFlag =
|
||||
this->Makefile->GetRequiredDefinition("_CMAKE_CUDA_WHOLE_FLAG");
|
||||
cudaCompileMode = cmStrCat(cudaCompileMode, wholeFlag);
|
||||
@@ -1789,11 +1801,22 @@ void cmNinjaTargetGenerator::ExportObjectCompileCommand(
|
||||
this->Makefile->GetRequiredDefinition("_CMAKE_CUDA_RDC_FLAG");
|
||||
cudaCompileMode = cmStrCat(cudaCompileMode, rdcFlag, " ");
|
||||
}
|
||||
if (this->GeneratorTarget->GetPropertyAsBool("CUDA_PTX_COMPILATION")) {
|
||||
const std::string& ptxFlag =
|
||||
this->Makefile->GetRequiredDefinition("_CMAKE_CUDA_PTX_FLAG");
|
||||
cudaCompileMode = cmStrCat(cudaCompileMode, ptxFlag);
|
||||
} else {
|
||||
static std::array<cm::string_view, 4> const compileModes{
|
||||
{ "PTX"_s, "CUBIN"_s, "FATBIN"_s, "OPTIX"_s }
|
||||
};
|
||||
bool useNormalCompileMode = true;
|
||||
for (cm::string_view mode : compileModes) {
|
||||
auto propName = cmStrCat("CUDA_", mode, "_COMPILATION");
|
||||
auto defName = cmStrCat("_CMAKE_CUDA_", mode, "_FLAG");
|
||||
if (this->GeneratorTarget->GetPropertyAsBool(propName)) {
|
||||
const std::string& flag =
|
||||
this->Makefile->GetRequiredDefinition(defName);
|
||||
cudaCompileMode = cmStrCat(cudaCompileMode, flag);
|
||||
useNormalCompileMode = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (useNormalCompileMode) {
|
||||
const std::string& wholeFlag =
|
||||
this->Makefile->GetRequiredDefinition("_CMAKE_CUDA_WHOLE_FLAG");
|
||||
cudaCompileMode = cmStrCat(cudaCompileMode, wholeFlag);
|
||||
|
||||
@@ -1742,6 +1742,9 @@ MAKE_PROP(COMPILE_FEATURES);
|
||||
MAKE_PROP(COMPILE_OPTIONS);
|
||||
MAKE_PROP(PRECOMPILE_HEADERS);
|
||||
MAKE_PROP(PRECOMPILE_HEADERS_REUSE_FROM);
|
||||
MAKE_PROP(CUDA_CUBIN_COMPILATION);
|
||||
MAKE_PROP(CUDA_FATBIN_COMPILATION);
|
||||
MAKE_PROP(CUDA_OPTIX_COMPILATION);
|
||||
MAKE_PROP(CUDA_PTX_COMPILATION);
|
||||
MAKE_PROP(EXPORT_NAME);
|
||||
MAKE_PROP(IMPORTED);
|
||||
@@ -1878,14 +1881,38 @@ void cmTarget::StoreProperty(const std::string& prop, ValueType value)
|
||||
value ? value
|
||||
: std::string{})) { // NOLINT(bugprone-branch-clone)
|
||||
/* error was reported by check method */
|
||||
} else if (prop == propCUDA_PTX_COMPILATION &&
|
||||
this->GetType() != cmStateEnums::OBJECT_LIBRARY) {
|
||||
std::ostringstream e;
|
||||
e << "CUDA_PTX_COMPILATION property can only be applied to OBJECT "
|
||||
"targets (\""
|
||||
<< this->impl->Name << "\")\n";
|
||||
this->impl->Makefile->IssueMessage(MessageType::FATAL_ERROR, e.str());
|
||||
return;
|
||||
} else if (prop == propCUDA_CUBIN_COMPILATION ||
|
||||
prop == propCUDA_FATBIN_COMPILATION ||
|
||||
prop == propCUDA_OPTIX_COMPILATION ||
|
||||
prop == propCUDA_PTX_COMPILATION) {
|
||||
auto const& compiler =
|
||||
this->impl->Makefile->GetSafeDefinition("CMAKE_CUDA_COMPILER_ID");
|
||||
auto const& compilerVersion =
|
||||
this->impl->Makefile->GetSafeDefinition("CMAKE_CUDA_COMPILER_VERSION");
|
||||
if (this->GetType() != cmStateEnums::OBJECT_LIBRARY) {
|
||||
auto e =
|
||||
cmStrCat(prop, " property can only be applied to OBJECT targets(",
|
||||
this->impl->Name, ")\n");
|
||||
this->impl->Makefile->IssueMessage(MessageType::FATAL_ERROR, e);
|
||||
return;
|
||||
}
|
||||
const bool flag_found =
|
||||
(prop == propCUDA_PTX_COMPILATION &&
|
||||
this->impl->Makefile->GetDefinition("_CMAKE_CUDA_PTX_FLAG")) ||
|
||||
(prop == propCUDA_CUBIN_COMPILATION &&
|
||||
this->impl->Makefile->GetDefinition("_CMAKE_CUDA_CUBIN_FLAG")) ||
|
||||
(prop == propCUDA_FATBIN_COMPILATION &&
|
||||
this->impl->Makefile->GetDefinition("_CMAKE_CUDA_FATBIN_FLAG")) ||
|
||||
(prop == propCUDA_OPTIX_COMPILATION &&
|
||||
this->impl->Makefile->GetDefinition("_CMAKE_CUDA_OPTIX_FLAG"));
|
||||
if (flag_found) {
|
||||
this->impl->Properties.SetProperty(prop, value);
|
||||
} else {
|
||||
auto e = cmStrCat(prop, " property is not supported by ", compiler,
|
||||
" compiler version ", compilerVersion, ".");
|
||||
this->impl->Makefile->IssueMessage(MessageType::FATAL_ERROR, e);
|
||||
return;
|
||||
}
|
||||
} else if (prop == propPRECOMPILE_HEADERS_REUSE_FROM) {
|
||||
if (this->GetProperty("PRECOMPILE_HEADERS")) {
|
||||
std::ostringstream e;
|
||||
|
||||
@@ -3595,13 +3595,13 @@ bool cmVisualStudio10TargetGenerator::ComputeCudaOptions(
|
||||
if (this->GeneratorTarget->GetPropertyAsBool("CUDA_SEPARABLE_COMPILATION")) {
|
||||
cudaOptions.AddFlag("GenerateRelocatableDeviceCode", "true");
|
||||
}
|
||||
bool notPtx = true;
|
||||
bool notPtxLike = true;
|
||||
if (this->GeneratorTarget->GetPropertyAsBool("CUDA_PTX_COMPILATION")) {
|
||||
cudaOptions.AddFlag("NvccCompilation", "ptx");
|
||||
// We drop the %(Extension) component as CMake expects all PTX files
|
||||
// to not have the source file extension at all
|
||||
cudaOptions.AddFlag("CompileOut", "$(IntDir)%(Filename).ptx");
|
||||
notPtx = false;
|
||||
notPtxLike = false;
|
||||
|
||||
if (cmSystemTools::VersionCompare(cmSystemTools::OP_GREATER_EQUAL,
|
||||
cudaVersion, "9.0") &&
|
||||
@@ -3616,9 +3616,24 @@ bool cmVisualStudio10TargetGenerator::ComputeCudaOptions(
|
||||
"%(BaseCommandLineTemplate) [CompileOut] [FastMath] "
|
||||
"[Defines] \"%(FullPath)\"");
|
||||
}
|
||||
} else if (this->GeneratorTarget->GetPropertyAsBool(
|
||||
"CUDA_CUBIN_COMPILATION")) {
|
||||
cudaOptions.AddFlag("NvccCompilation", "cubin");
|
||||
cudaOptions.AddFlag("CompileOut", "$(IntDir)%(Filename).cubin");
|
||||
notPtxLike = false;
|
||||
} else if (this->GeneratorTarget->GetPropertyAsBool(
|
||||
"CUDA_FATBIN_COMPILATION")) {
|
||||
cudaOptions.AddFlag("NvccCompilation", "fatbin");
|
||||
cudaOptions.AddFlag("CompileOut", "$(IntDir)%(Filename).fatbin");
|
||||
notPtxLike = false;
|
||||
} else if (this->GeneratorTarget->GetPropertyAsBool(
|
||||
"CUDA_OPTIX_COMPILATION")) {
|
||||
cudaOptions.AddFlag("NvccCompilation", "optix-ir");
|
||||
cudaOptions.AddFlag("CompileOut", "$(IntDir)%(Filename).optixir");
|
||||
notPtxLike = false;
|
||||
}
|
||||
|
||||
if (notPtx &&
|
||||
if (notPtxLike &&
|
||||
cmSystemTools::VersionCompareGreaterEq(
|
||||
"8.0", this->GlobalGenerator->GetPlatformToolsetCudaString())) {
|
||||
// Explicitly state that we want this file to be treated as a
|
||||
|
||||
@@ -27,6 +27,9 @@ if(CMake_TEST_CUDA AND NOT CMake_TEST_CUDA STREQUAL "Clang")
|
||||
|
||||
# Only NVCC defines __CUDACC_DEBUG__ when compiling in debug mode.
|
||||
add_cuda_test_macro(CudaOnly.GPUDebugFlag CudaOnlyGPUDebugFlag)
|
||||
add_cuda_test_macro(CudaOnly.CUBIN CudaOnlyCUBIN)
|
||||
add_cuda_test_macro(CudaOnly.Fatbin CudaOnlyFatbin)
|
||||
add_cuda_test_macro(CudaOnly.OptixIR CudaOnlyOptixIR)
|
||||
endif()
|
||||
|
||||
add_cuda_test_macro(CudaOnly.DeviceLTO CudaOnlyDeviceLTO)
|
||||
|
||||
21
Tests/CudaOnly/CUBIN/CMakeLists.txt
Normal file
21
Tests/CudaOnly/CUBIN/CMakeLists.txt
Normal file
@@ -0,0 +1,21 @@
|
||||
cmake_minimum_required(VERSION 3.18)
|
||||
project(CudaCUBIN LANGUAGES CUDA)
|
||||
|
||||
|
||||
set(CMAKE_CUDA_ARCHITECTURES all-major)
|
||||
|
||||
add_library(CudaCUBIN OBJECT kernelA.cu kernelB.cu kernelC.cu)
|
||||
set_property(TARGET CudaCUBIN PROPERTY CUDA_CUBIN_COMPILATION ON)
|
||||
set_property(TARGET CudaCUBIN PROPERTY CUDA_ARCHITECTURES native)
|
||||
|
||||
add_executable(CudaOnlyCUBIN main.cu)
|
||||
target_compile_features(CudaOnlyCUBIN PRIVATE cuda_std_11)
|
||||
target_compile_definitions(CudaOnlyCUBIN PRIVATE "CUBIN_FILE_PATHS=\"$<JOIN:$<TARGET_OBJECTS:CudaCUBIN>,~_~>\"")
|
||||
|
||||
find_package(CUDAToolkit REQUIRED)
|
||||
target_link_libraries(CudaOnlyCUBIN PRIVATE CUDA::cuda_driver)
|
||||
|
||||
if(APPLE)
|
||||
# Help the static cuda runtime find the driver (libcuda.dyllib) at runtime.
|
||||
set_property(TARGET CudaOnlyCUBIN PROPERTY BUILD_RPATH ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES})
|
||||
endif()
|
||||
7
Tests/CudaOnly/CUBIN/kernelA.cu
Normal file
7
Tests/CudaOnly/CUBIN/kernelA.cu
Normal file
@@ -0,0 +1,7 @@
|
||||
|
||||
__global__ void kernelA(float* r, float* x, float* y, float* z, int size)
|
||||
{
|
||||
for (int i = threadIdx.x; i < size; i += blockDim.x) {
|
||||
r[i] = x[i] * y[i] + z[i];
|
||||
}
|
||||
}
|
||||
7
Tests/CudaOnly/CUBIN/kernelB.cu
Normal file
7
Tests/CudaOnly/CUBIN/kernelB.cu
Normal file
@@ -0,0 +1,7 @@
|
||||
|
||||
__global__ void kernelB(float* r, float* x, float* y, float* z, int size)
|
||||
{
|
||||
for (int i = threadIdx.x; i < size; i += blockDim.x) {
|
||||
r[i] = x[i] * y[i] + z[i];
|
||||
}
|
||||
}
|
||||
7
Tests/CudaOnly/CUBIN/kernelC.cu
Normal file
7
Tests/CudaOnly/CUBIN/kernelC.cu
Normal file
@@ -0,0 +1,7 @@
|
||||
|
||||
__global__ void kernelC(float* r, float* x, float* y, float* z, int size)
|
||||
{
|
||||
for (int i = threadIdx.x; i < size; i += blockDim.x) {
|
||||
r[i] = x[i] * y[i] + z[i];
|
||||
}
|
||||
}
|
||||
56
Tests/CudaOnly/CUBIN/main.cu
Normal file
56
Tests/CudaOnly/CUBIN/main.cu
Normal file
@@ -0,0 +1,56 @@
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include <cuda.h>
|
||||
|
||||
#define GENERATED_HEADER(x) GENERATED_HEADER1(x)
|
||||
#define GENERATED_HEADER1(x) <x>
|
||||
|
||||
static std::string input_paths = { CUBIN_FILE_PATHS };
|
||||
|
||||
int main()
|
||||
{
|
||||
const std::string delimiter = "~_~";
|
||||
input_paths += delimiter;
|
||||
|
||||
size_t end = 0;
|
||||
size_t previous_end = 0;
|
||||
std::vector<std::string> actual_paths;
|
||||
while ((end = input_paths.find(delimiter, previous_end)) !=
|
||||
std::string::npos) {
|
||||
actual_paths.emplace_back(
|
||||
input_paths.substr(previous_end, end - previous_end));
|
||||
previous_end = end + 3;
|
||||
}
|
||||
|
||||
cuInit(0);
|
||||
int count = 0;
|
||||
cuDeviceGetCount(&count);
|
||||
if (count == 0) {
|
||||
std::cerr << "No CUDA devices found\n";
|
||||
return 1;
|
||||
}
|
||||
|
||||
CUdevice device;
|
||||
cuDeviceGet(&device, 0);
|
||||
|
||||
CUcontext context;
|
||||
cuCtxCreate(&context, 0, device);
|
||||
|
||||
CUmodule module;
|
||||
for (auto p : actual_paths) {
|
||||
if (p.find(".cubin") == std::string::npos) {
|
||||
std::cout << p << " Doesn't have the .cubin suffix" << p << std::endl;
|
||||
return 1;
|
||||
}
|
||||
std::cout << "trying to load cubin: " << p << std::endl;
|
||||
CUresult result = cuModuleLoad(&module, p.c_str());
|
||||
std::cout << "module pointer: " << module << '\n';
|
||||
if (result != CUDA_SUCCESS || module == nullptr) {
|
||||
std::cerr << "Failed to load the embedded cubin with error: "
|
||||
<< static_cast<unsigned int>(result) << '\n';
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
25
Tests/CudaOnly/Fatbin/CMakeLists.txt
Normal file
25
Tests/CudaOnly/Fatbin/CMakeLists.txt
Normal file
@@ -0,0 +1,25 @@
|
||||
cmake_minimum_required(VERSION 3.18)
|
||||
project(CudaFATBIN LANGUAGES CUDA)
|
||||
|
||||
|
||||
set(CMAKE_CUDA_ARCHITECTURES all-major)
|
||||
|
||||
add_library(CudaFATBIN OBJECT
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/../CUBIN/kernelA.cu
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/../CUBIN/kernelB.cu
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/../CUBIN/kernelC.cu)
|
||||
|
||||
set_property(TARGET CudaFATBIN PROPERTY CUDA_FATBIN_COMPILATION ON)
|
||||
|
||||
# Will use `cuModuleLoadFatBinary` to load the fatbinaries
|
||||
add_executable(CudaOnlyFatbin main.cu)
|
||||
target_compile_features(CudaOnlyFatbin PRIVATE cuda_std_11)
|
||||
target_compile_definitions(CudaOnlyFatbin PRIVATE "FATBIN_FILE_PATHS=\"$<JOIN:$<TARGET_OBJECTS:CudaFATBIN>,~_~>\"")
|
||||
|
||||
find_package(CUDAToolkit REQUIRED)
|
||||
target_link_libraries(CudaOnlyFatbin PRIVATE CUDA::cuda_driver)
|
||||
|
||||
if(APPLE)
|
||||
# Help the static cuda runtime find the driver (libcuda.dyllib) at runtime.
|
||||
set_property(TARGET CudaOnlyFatbin PROPERTY BUILD_RPATH ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES})
|
||||
endif()
|
||||
56
Tests/CudaOnly/Fatbin/main.cu
Normal file
56
Tests/CudaOnly/Fatbin/main.cu
Normal file
@@ -0,0 +1,56 @@
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include <cuda.h>
|
||||
|
||||
#define GENERATED_HEADER(x) GENERATED_HEADER1(x)
|
||||
#define GENERATED_HEADER1(x) <x>
|
||||
|
||||
static std::string input_paths = { FATBIN_FILE_PATHS };
|
||||
|
||||
int main()
|
||||
{
|
||||
const std::string delimiter = "~_~";
|
||||
input_paths += delimiter;
|
||||
|
||||
size_t end = 0;
|
||||
size_t previous_end = 0;
|
||||
std::vector<std::string> actual_paths;
|
||||
while ((end = input_paths.find(delimiter, previous_end)) !=
|
||||
std::string::npos) {
|
||||
actual_paths.emplace_back(
|
||||
input_paths.substr(previous_end, end - previous_end));
|
||||
previous_end = end + 3;
|
||||
}
|
||||
|
||||
cuInit(0);
|
||||
int count = 0;
|
||||
cuDeviceGetCount(&count);
|
||||
if (count == 0) {
|
||||
std::cerr << "No CUDA devices found\n";
|
||||
return 1;
|
||||
}
|
||||
|
||||
CUdevice device;
|
||||
cuDeviceGet(&device, 0);
|
||||
|
||||
CUcontext context;
|
||||
cuCtxCreate(&context, 0, device);
|
||||
|
||||
CUmodule module;
|
||||
for (auto p : actual_paths) {
|
||||
if (p.find(".fatbin") == std::string::npos) {
|
||||
std::cout << p << " Doesn't have the .fatbin suffix" << p << std::endl;
|
||||
return 1;
|
||||
}
|
||||
std::cout << "trying to load fatbin: " << p << std::endl;
|
||||
CUresult result = cuModuleLoad(&module, p.c_str());
|
||||
std::cout << "module pointer: " << module << '\n';
|
||||
if (result != CUDA_SUCCESS || module == nullptr) {
|
||||
std::cerr << "Failed to load the embedded fatbin with error: "
|
||||
<< static_cast<unsigned int>(result) << '\n';
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
33
Tests/CudaOnly/OptixIR/CMakeLists.txt
Normal file
33
Tests/CudaOnly/OptixIR/CMakeLists.txt
Normal file
@@ -0,0 +1,33 @@
|
||||
cmake_minimum_required(VERSION 3.18)
|
||||
project(CudaOptix LANGUAGES CUDA)
|
||||
|
||||
|
||||
set(CMAKE_CUDA_ARCHITECTURES all-major)
|
||||
|
||||
add_library(CudaOptix OBJECT
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/../CUBIN/kernelA.cu
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/../CUBIN/kernelB.cu
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/../CUBIN/kernelC.cu)
|
||||
|
||||
if(CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL "11.7.0")
|
||||
set_property(TARGET CudaOptix PROPERTY CUDA_OPTIX_COMPILATION ON)
|
||||
endif()
|
||||
|
||||
set_property(TARGET CudaOptix PROPERTY CUDA_ARCHITECTURES native)
|
||||
|
||||
add_executable(CudaOnlyOptixIR main.cu)
|
||||
target_compile_features(CudaOnlyOptixIR PRIVATE cuda_std_11)
|
||||
|
||||
if(CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL "11.7.0")
|
||||
target_compile_definitions(CudaOnlyOptixIR PRIVATE "OPTIX_FILE_PATHS=\"$<JOIN:$<TARGET_OBJECTS:CudaOptix>,~_~>\"")
|
||||
else()
|
||||
target_compile_definitions(CudaOnlyOptixIR PRIVATE "OPTIX_FILE_PATHS=\"NO_OPTIX_SUPPORT\"")
|
||||
endif()
|
||||
|
||||
find_package(CUDAToolkit REQUIRED)
|
||||
target_link_libraries(CudaOnlyOptixIR PRIVATE CUDA::cuda_driver)
|
||||
|
||||
if(APPLE)
|
||||
# Help the static cuda runtime find the driver (libcuda.dyllib) at runtime.
|
||||
set_property(TARGET CudaOnlyOptixIR PROPERTY BUILD_RPATH ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES})
|
||||
endif()
|
||||
53
Tests/CudaOnly/OptixIR/main.cu
Normal file
53
Tests/CudaOnly/OptixIR/main.cu
Normal file
@@ -0,0 +1,53 @@
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include <cuda.h>
|
||||
|
||||
#define GENERATED_HEADER(x) GENERATED_HEADER1(x)
|
||||
#define GENERATED_HEADER1(x) <x>
|
||||
|
||||
static std::string input_paths = { OPTIX_FILE_PATHS };
|
||||
|
||||
int main()
|
||||
{
|
||||
if (input_paths == "NO_OPTIX_SUPPORT") {
|
||||
return 0;
|
||||
}
|
||||
|
||||
const std::string delimiter = "~_~";
|
||||
input_paths += delimiter;
|
||||
|
||||
size_t end = 0;
|
||||
size_t previous_end = 0;
|
||||
std::vector<std::string> actual_paths;
|
||||
while ((end = input_paths.find(delimiter, previous_end)) !=
|
||||
std::string::npos) {
|
||||
actual_paths.emplace_back(
|
||||
input_paths.substr(previous_end, end - previous_end));
|
||||
previous_end = end + 3;
|
||||
}
|
||||
|
||||
if (actual_paths.empty()) {
|
||||
std::cerr << "Failed to parse OPTIX_FILE_PATHS" << std::endl;
|
||||
return 1;
|
||||
}
|
||||
|
||||
const std::uint32_t optix_magic_value = 0x7f4e43ed;
|
||||
for (auto p : actual_paths) {
|
||||
if (p.find(".optixir") == std::string::npos) {
|
||||
std::cout << p << " Doesn't have the .optixir suffix" << p << std::endl;
|
||||
return 1;
|
||||
}
|
||||
std::ifstream input(p, std::ios::binary);
|
||||
std::uint32_t value;
|
||||
input.read(reinterpret_cast<char*>(&value), sizeof(value));
|
||||
if (value != optix_magic_value) {
|
||||
std::cerr << p << " Doesn't look like an optix-ir file" << std::endl;
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
Reference in New Issue
Block a user