mirror of
https://github.com/hedge-dev/UnleashedRecomp.git
synced 2026-05-07 19:40:19 -05:00
GPU Profiling functionality. (#363)
* Added profiling timestamps to Vulkan. Added more profilers in general. * Add timestamps to D3D12. * Add update director to the profiler. --------- Co-authored-by: Skyth <19259897+blueskythlikesclouds@users.noreply.github.com>
This commit is contained in:
@@ -1437,6 +1437,52 @@ namespace plume {
|
||||
return height;
|
||||
}
|
||||
|
||||
// D3D12QueryPool
|
||||
|
||||
D3D12QueryPool::D3D12QueryPool(D3D12Device *device, uint32_t queryCount) {
|
||||
assert(device != nullptr);
|
||||
assert(queryCount > 0);
|
||||
|
||||
this->device = device;
|
||||
|
||||
D3D12_QUERY_HEAP_DESC queryHeapDesc = {};
|
||||
queryHeapDesc.Type = D3D12_QUERY_HEAP_TYPE_TIMESTAMP;
|
||||
queryHeapDesc.Count = queryCount;
|
||||
|
||||
HRESULT res = device->d3d->CreateQueryHeap(&queryHeapDesc, IID_PPV_ARGS(&d3d));
|
||||
if (FAILED(res)) {
|
||||
fprintf(stderr, "CreateQueryHeap failed with error code 0x%lX.\n", res);
|
||||
return;
|
||||
}
|
||||
|
||||
readbackBuffer = device->createBuffer(RenderBufferDesc::ReadbackBuffer(sizeof(uint64_t) * queryCount));
|
||||
results.resize(queryCount);
|
||||
}
|
||||
|
||||
D3D12QueryPool::~D3D12QueryPool() {
|
||||
if (d3d != nullptr) {
|
||||
d3d->Release();
|
||||
}
|
||||
}
|
||||
|
||||
void D3D12QueryPool::queryResults() {
|
||||
void *readbackData = readbackBuffer->map();
|
||||
memcpy(results.data(), readbackData, sizeof(uint64_t) * results.size());
|
||||
readbackBuffer->unmap();
|
||||
|
||||
for (uint64_t &result : results) {
|
||||
result = result / double(device->timestampFrequency) * 1000000000.0;
|
||||
}
|
||||
}
|
||||
|
||||
const uint64_t *D3D12QueryPool::getResults() const {
|
||||
return results.data();
|
||||
}
|
||||
|
||||
uint32_t D3D12QueryPool::getCount() const {
|
||||
return uint32_t(results.size());
|
||||
}
|
||||
|
||||
// D3D12CommandList
|
||||
|
||||
D3D12CommandList::D3D12CommandList(D3D12Device *device, RenderCommandListType type) {
|
||||
@@ -2004,6 +2050,19 @@ namespace plume {
|
||||
d3d->DiscardResource(interfaceTexture->d3d, nullptr);
|
||||
}
|
||||
|
||||
void D3D12CommandList::resetQueryPool(const RenderQueryPool *queryPool, uint32_t queryFirstIndex, uint32_t queryCount) {
|
||||
// Do nothing.
|
||||
}
|
||||
|
||||
void D3D12CommandList::writeTimestamp(const RenderQueryPool *queryPool, uint32_t queryIndex) {
|
||||
assert(queryPool != nullptr);
|
||||
|
||||
const D3D12QueryPool *interfaceQueryPool = static_cast<const D3D12QueryPool *>(queryPool);
|
||||
const D3D12Buffer *readbackBuffer = static_cast<const D3D12Buffer *>(interfaceQueryPool->readbackBuffer.get());
|
||||
d3d->EndQuery(interfaceQueryPool->d3d, D3D12_QUERY_TYPE_TIMESTAMP, queryIndex);
|
||||
d3d->ResolveQueryData(interfaceQueryPool->d3d, D3D12_QUERY_TYPE_TIMESTAMP, queryIndex, 1, readbackBuffer->d3d, queryIndex * sizeof(uint64_t));
|
||||
}
|
||||
|
||||
void D3D12CommandList::checkDescriptorHeaps() {
|
||||
if (!descriptorHeapsSet) {
|
||||
ID3D12DescriptorHeap *descriptorHeaps[] = { device->viewHeapAllocator->heap, device->samplerHeapAllocator->heap };
|
||||
@@ -3461,6 +3520,13 @@ namespace plume {
|
||||
samplerHeapAllocator = std::make_unique<D3D12DescriptorHeapAllocator>(this, SamplerDescriptorHeapSize, D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER);
|
||||
colorTargetHeapAllocator = std::make_unique<D3D12DescriptorHeapAllocator>(this, TargetDescriptorHeapSize, D3D12_DESCRIPTOR_HEAP_TYPE_RTV);
|
||||
depthTargetHeapAllocator = std::make_unique<D3D12DescriptorHeapAllocator>(this, TargetDescriptorHeapSize, D3D12_DESCRIPTOR_HEAP_TYPE_DSV);
|
||||
|
||||
// Create a command queue only for retrieving the timestamp frequency. Delete it immediately afterwards.
|
||||
std::unique_ptr<D3D12CommandQueue> timestampCommandQueue = std::make_unique<D3D12CommandQueue>(this, RenderCommandListType::DIRECT);
|
||||
res = timestampCommandQueue->d3d->GetTimestampFrequency(×tampFrequency);
|
||||
if (FAILED(res)) {
|
||||
fprintf(stderr, "GetTimestampFrequency failed with error code 0x%lX. Timestamps will be inaccurate.\n", res);
|
||||
}
|
||||
}
|
||||
|
||||
D3D12Device::~D3D12Device() {
|
||||
@@ -3535,6 +3601,10 @@ namespace plume {
|
||||
return std::make_unique<D3D12Framebuffer>(this, desc);
|
||||
}
|
||||
|
||||
std::unique_ptr<RenderQueryPool> D3D12Device::createQueryPool(uint32_t queryCount) {
|
||||
return std::make_unique<D3D12QueryPool>(this, queryCount);
|
||||
}
|
||||
|
||||
void D3D12Device::setBottomLevelASBuildInfo(RenderBottomLevelASBuildInfo &buildInfo, const RenderBottomLevelASMesh *meshes, uint32_t meshCount, bool preferFastBuild, bool preferFastTrace) {
|
||||
assert(meshes != nullptr);
|
||||
assert(meshCount > 0);
|
||||
|
||||
@@ -144,6 +144,19 @@ namespace plume {
|
||||
uint32_t getHeight() const override;
|
||||
};
|
||||
|
||||
struct D3D12QueryPool : RenderQueryPool {
|
||||
D3D12Device *device = nullptr;
|
||||
ID3D12QueryHeap *d3d = nullptr;
|
||||
std::vector<uint64_t> results;
|
||||
std::unique_ptr<RenderBuffer> readbackBuffer;
|
||||
|
||||
D3D12QueryPool(D3D12Device *device, uint32_t queryCount);
|
||||
virtual ~D3D12QueryPool() override;
|
||||
virtual void queryResults() override;
|
||||
virtual const uint64_t *getResults() const override;
|
||||
virtual uint32_t getCount() const override;
|
||||
};
|
||||
|
||||
struct D3D12CommandList : RenderCommandList {
|
||||
ID3D12GraphicsCommandList9 *d3d = nullptr;
|
||||
ID3D12CommandAllocator *commandAllocator = nullptr;
|
||||
@@ -196,6 +209,8 @@ namespace plume {
|
||||
void buildBottomLevelAS(const RenderAccelerationStructure *dstAccelerationStructure, RenderBufferReference scratchBuffer, const RenderBottomLevelASBuildInfo &buildInfo) override;
|
||||
void buildTopLevelAS(const RenderAccelerationStructure *dstAccelerationStructure, RenderBufferReference scratchBuffer, RenderBufferReference instancesBuffer, const RenderTopLevelASBuildInfo &buildInfo) override;
|
||||
void discardTexture(const RenderTexture* texture) override;
|
||||
void resetQueryPool(const RenderQueryPool *queryPool, uint32_t queryFirstIndex, uint32_t queryCount) override;
|
||||
void writeTimestamp(const RenderQueryPool *queryPool, uint32_t queryIndex) override;
|
||||
void checkDescriptorHeaps();
|
||||
void notifyDescriptorHeapWasChangedExternally();
|
||||
void checkTopology();
|
||||
@@ -417,6 +432,7 @@ namespace plume {
|
||||
std::unique_ptr<D3D12DescriptorHeapAllocator> depthTargetHeapAllocator;
|
||||
RenderDeviceCapabilities capabilities;
|
||||
RenderDeviceDescription description;
|
||||
uint64_t timestampFrequency = 1;
|
||||
|
||||
D3D12Device(D3D12Interface *renderInterface, const std::string &preferredDeviceName);
|
||||
~D3D12Device() override;
|
||||
@@ -436,6 +452,7 @@ namespace plume {
|
||||
std::unique_ptr<RenderCommandFence> createCommandFence() override;
|
||||
std::unique_ptr<RenderCommandSemaphore> createCommandSemaphore() override;
|
||||
std::unique_ptr<RenderFramebuffer> createFramebuffer(const RenderFramebufferDesc &desc) override;
|
||||
std::unique_ptr<RenderQueryPool> createQueryPool(uint32_t queryCount) override;
|
||||
void setBottomLevelASBuildInfo(RenderBottomLevelASBuildInfo &buildInfo, const RenderBottomLevelASMesh *meshes, uint32_t meshCount, bool preferFastBuild, bool preferFastTrace) override;
|
||||
void setTopLevelASBuildInfo(RenderTopLevelASBuildInfo &buildInfo, const RenderTopLevelASInstance *instances, uint32_t instanceCount, bool preferFastBuild, bool preferFastTrace) override;
|
||||
void setShaderBindingTableInfo(RenderShaderBindingTableInfo &tableInfo, const RenderShaderBindingGroups &groups, const RenderPipeline *pipeline, RenderDescriptorSet **descriptorSets, uint32_t descriptorSetCount) override;
|
||||
|
||||
@@ -147,6 +147,8 @@ namespace plume {
|
||||
virtual void buildBottomLevelAS(const RenderAccelerationStructure *dstAccelerationStructure, RenderBufferReference scratchBuffer, const RenderBottomLevelASBuildInfo &buildInfo) = 0;
|
||||
virtual void buildTopLevelAS(const RenderAccelerationStructure *dstAccelerationStructure, RenderBufferReference scratchBuffer, RenderBufferReference instancesBuffer, const RenderTopLevelASBuildInfo &buildInfo) = 0;
|
||||
virtual void discardTexture(const RenderTexture* texture) = 0; // D3D12 only.
|
||||
virtual void resetQueryPool(const RenderQueryPool *queryPool, uint32_t queryFirstIndex, uint32_t queryCount) = 0;
|
||||
virtual void writeTimestamp(const RenderQueryPool *queryPool, uint32_t queryIndex) = 0;
|
||||
|
||||
// Concrete implementation shortcuts.
|
||||
inline void barriers(RenderBarrierStages stages, const RenderBufferBarrier &barrier) {
|
||||
@@ -208,6 +210,13 @@ namespace plume {
|
||||
virtual std::unique_ptr<RenderTexture> createTexture(const RenderTextureDesc &desc) = 0;
|
||||
};
|
||||
|
||||
struct RenderQueryPool {
|
||||
virtual ~RenderQueryPool() { }
|
||||
virtual void queryResults() = 0;
|
||||
virtual const uint64_t *getResults() const = 0;
|
||||
virtual uint32_t getCount() const = 0;
|
||||
};
|
||||
|
||||
struct RenderDevice {
|
||||
virtual ~RenderDevice() { }
|
||||
virtual std::unique_ptr<RenderCommandList> createCommandList(RenderCommandListType type) = 0;
|
||||
@@ -226,6 +235,7 @@ namespace plume {
|
||||
virtual std::unique_ptr<RenderCommandFence> createCommandFence() = 0;
|
||||
virtual std::unique_ptr<RenderCommandSemaphore> createCommandSemaphore() = 0;
|
||||
virtual std::unique_ptr<RenderFramebuffer> createFramebuffer(const RenderFramebufferDesc &desc) = 0;
|
||||
virtual std::unique_ptr<RenderQueryPool> createQueryPool(uint32_t queryCount) = 0;
|
||||
virtual void setBottomLevelASBuildInfo(RenderBottomLevelASBuildInfo &buildInfo, const RenderBottomLevelASMesh *meshes, uint32_t meshCount, bool preferFastBuild = true, bool preferFastTrace = false) = 0;
|
||||
virtual void setTopLevelASBuildInfo(RenderTopLevelASBuildInfo &buildInfo, const RenderTopLevelASInstance *instances, uint32_t instanceCount, bool preferFastBuild = true, bool preferFastTrace = false) = 0;
|
||||
virtual void setShaderBindingTableInfo(RenderShaderBindingTableInfo &tableInfo, const RenderShaderBindingGroups &groups, const RenderPipeline *pipeline, RenderDescriptorSet **descriptorSets, uint32_t descriptorSetCount) = 0;
|
||||
|
||||
@@ -69,6 +69,7 @@ namespace plume {
|
||||
struct RenderSampler;
|
||||
struct RenderShader;
|
||||
struct RenderTexture;
|
||||
struct RenderQueryPool;
|
||||
|
||||
// Enums.
|
||||
|
||||
|
||||
@@ -2522,6 +2522,80 @@ namespace plume {
|
||||
return (depthAttachment == attachment);
|
||||
}
|
||||
|
||||
// VulkanQueryPool
|
||||
|
||||
VulkanQueryPool::VulkanQueryPool(VulkanDevice *device, uint32_t queryCount) {
|
||||
assert(device != nullptr);
|
||||
assert(queryCount > 0);
|
||||
|
||||
this->device = device;
|
||||
|
||||
VkQueryPoolCreateInfo createInfo = {};
|
||||
createInfo.sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO;
|
||||
createInfo.queryType = VK_QUERY_TYPE_TIMESTAMP;
|
||||
createInfo.queryCount = queryCount;
|
||||
|
||||
VkResult res = vkCreateQueryPool(device->vk, &createInfo, nullptr, &vk);
|
||||
if (res != VK_SUCCESS) {
|
||||
fprintf(stderr, "vkCreateQueryPool failed with error code 0x%X.\n", res);
|
||||
return;
|
||||
}
|
||||
|
||||
results.resize(queryCount);
|
||||
}
|
||||
|
||||
VulkanQueryPool::~VulkanQueryPool() {
|
||||
vkDestroyQueryPool(device->vk, vk, nullptr);
|
||||
}
|
||||
|
||||
void VulkanQueryPool::queryResults() {
|
||||
VkResult res = vkGetQueryPoolResults(device->vk, vk, 0, uint32_t(results.size()), sizeof(uint64_t) * results.size(), results.data(), sizeof(uint64_t), VK_QUERY_RESULT_64_BIT);
|
||||
if (res != VK_SUCCESS) {
|
||||
fprintf(stderr, "vkGetQueryPoolResults failed with error code 0x%X.\n", res);
|
||||
return;
|
||||
}
|
||||
|
||||
// Conversion sourced from Godot Engine's Vulkan Rendering Driver.
|
||||
auto mult64to128 = [](uint64_t u, uint64_t v, uint64_t &h, uint64_t &l) {
|
||||
uint64_t u1 = (u & 0xffffffff);
|
||||
uint64_t v1 = (v & 0xffffffff);
|
||||
uint64_t t = (u1 * v1);
|
||||
uint64_t w3 = (t & 0xffffffff);
|
||||
uint64_t k = (t >> 32);
|
||||
|
||||
u >>= 32;
|
||||
t = (u * v1) + k;
|
||||
k = (t & 0xffffffff);
|
||||
uint64_t w1 = (t >> 32);
|
||||
|
||||
v >>= 32;
|
||||
t = (u1 * v) + k;
|
||||
k = (t >> 32);
|
||||
|
||||
h = (u * v) + w1 + k;
|
||||
l = (t << 32) + w3;
|
||||
};
|
||||
|
||||
// Convert results to timestamps.
|
||||
constexpr uint64_t shift_bits = 16;
|
||||
double timestampPeriod = double(device->physicalDeviceProperties.limits.timestampPeriod);
|
||||
uint64_t h = 0, l = 0;
|
||||
for (uint64_t &result : results) {
|
||||
mult64to128(result, uint64_t(timestampPeriod * double(1 << shift_bits)), h, l);
|
||||
result = l;
|
||||
result >>= shift_bits;
|
||||
result |= h << (64 - shift_bits);
|
||||
}
|
||||
}
|
||||
|
||||
const uint64_t *VulkanQueryPool::getResults() const {
|
||||
return results.data();
|
||||
}
|
||||
|
||||
uint32_t VulkanQueryPool::getCount() const {
|
||||
return uint32_t(results.size());
|
||||
}
|
||||
|
||||
// VulkanCommandList
|
||||
|
||||
VulkanCommandList::VulkanCommandList(VulkanDevice *device, RenderCommandListType type) {
|
||||
@@ -3210,6 +3284,20 @@ namespace plume {
|
||||
// Not required in Vulkan.
|
||||
}
|
||||
|
||||
void VulkanCommandList::resetQueryPool(const RenderQueryPool *queryPool, uint32_t queryFirstIndex, uint32_t queryCount) {
|
||||
assert(queryPool != nullptr);
|
||||
|
||||
const VulkanQueryPool *interfaceQueryPool = static_cast<const VulkanQueryPool *>(queryPool);
|
||||
vkCmdResetQueryPool(vk, interfaceQueryPool->vk, queryFirstIndex, queryCount);
|
||||
}
|
||||
|
||||
void VulkanCommandList::writeTimestamp(const RenderQueryPool *queryPool, uint32_t queryIndex) {
|
||||
assert(queryPool != nullptr);
|
||||
|
||||
const VulkanQueryPool *interfaceQueryPool = static_cast<const VulkanQueryPool *>(queryPool);
|
||||
vkCmdWriteTimestamp(vk, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, interfaceQueryPool->vk, queryIndex);
|
||||
}
|
||||
|
||||
void VulkanCommandList::checkActiveRenderPass() {
|
||||
assert(targetFramebuffer != nullptr);
|
||||
|
||||
@@ -3891,6 +3979,10 @@ namespace plume {
|
||||
return std::make_unique<VulkanFramebuffer>(this, desc);
|
||||
}
|
||||
|
||||
std::unique_ptr<RenderQueryPool> VulkanDevice::createQueryPool(uint32_t queryCount) {
|
||||
return std::make_unique<VulkanQueryPool>(this, queryCount);
|
||||
}
|
||||
|
||||
void VulkanDevice::setBottomLevelASBuildInfo(RenderBottomLevelASBuildInfo &buildInfo, const RenderBottomLevelASMesh *meshes, uint32_t meshCount, bool preferFastBuild, bool preferFastTrace) {
|
||||
assert(meshes != nullptr);
|
||||
assert(meshCount > 0);
|
||||
|
||||
@@ -271,6 +271,18 @@ namespace plume {
|
||||
bool contains(const VulkanTexture *attachment) const;
|
||||
};
|
||||
|
||||
struct VulkanQueryPool : RenderQueryPool {
|
||||
VulkanDevice *device = nullptr;
|
||||
std::vector<uint64_t> results;
|
||||
VkQueryPool vk = VK_NULL_HANDLE;
|
||||
|
||||
VulkanQueryPool(VulkanDevice *device, uint32_t queryCount);
|
||||
virtual ~VulkanQueryPool() override;
|
||||
virtual void queryResults() override;
|
||||
virtual const uint64_t *getResults() const override;
|
||||
virtual uint32_t getCount() const override;
|
||||
};
|
||||
|
||||
struct VulkanCommandList : RenderCommandList {
|
||||
VkCommandBuffer vk = VK_NULL_HANDLE;
|
||||
VkCommandPool commandPool = VK_NULL_HANDLE;
|
||||
@@ -319,6 +331,8 @@ namespace plume {
|
||||
void buildBottomLevelAS(const RenderAccelerationStructure *dstAccelerationStructure, RenderBufferReference scratchBuffer, const RenderBottomLevelASBuildInfo &buildInfo) override;
|
||||
void buildTopLevelAS(const RenderAccelerationStructure *dstAccelerationStructure, RenderBufferReference scratchBuffer, RenderBufferReference instancesBuffer, const RenderTopLevelASBuildInfo &buildInfo) override;
|
||||
void discardTexture(const RenderTexture* texture) override;
|
||||
void resetQueryPool(const RenderQueryPool *queryPool, uint32_t queryFirstIndex, uint32_t queryCount) override;
|
||||
void writeTimestamp(const RenderQueryPool *queryPool, uint32_t queryIndex) override;
|
||||
void checkActiveRenderPass();
|
||||
void endActiveRenderPass();
|
||||
void setDescriptorSet(VkPipelineBindPoint bindPoint, const VulkanPipelineLayout *pipelineLayout, const RenderDescriptorSet *descriptorSet, uint32_t setIndex);
|
||||
@@ -409,6 +423,7 @@ namespace plume {
|
||||
std::unique_ptr<RenderCommandFence> createCommandFence() override;
|
||||
std::unique_ptr<RenderCommandSemaphore> createCommandSemaphore() override;
|
||||
std::unique_ptr<RenderFramebuffer> createFramebuffer(const RenderFramebufferDesc &desc) override;
|
||||
std::unique_ptr<RenderQueryPool> createQueryPool(uint32_t queryCount) override;
|
||||
void setBottomLevelASBuildInfo(RenderBottomLevelASBuildInfo &buildInfo, const RenderBottomLevelASMesh *meshes, uint32_t meshCount, bool preferFastBuild, bool preferFastTrace) override;
|
||||
void setTopLevelASBuildInfo(RenderTopLevelASBuildInfo &buildInfo, const RenderTopLevelASInstance *instances, uint32_t instanceCount, bool preferFastBuild, bool preferFastTrace) override;
|
||||
void setShaderBindingTableInfo(RenderShaderBindingTableInfo &tableInfo, const RenderShaderBindingGroups &groups, const RenderPipeline *pipeline, RenderDescriptorSet **descriptorSets, uint32_t descriptorSetCount) override;
|
||||
|
||||
Reference in New Issue
Block a user