From 66c20e73c9944686ff67cfaea326c3defa1c3b74 Mon Sep 17 00:00:00 2001 From: Skyth <19259897+blueskythlikesclouds@users.noreply.github.com> Date: Sun, 20 Oct 2024 18:47:26 +0300 Subject: [PATCH] Implement movie rendering & fix some validation errors. --- UnleashedRecomp/CMakeLists.txt | 2 + UnleashedRecomp/config.cpp | 1 + UnleashedRecomp/config.h | 1 + UnleashedRecomp/game.cpp | 18 ++ UnleashedRecomp/gpu/rhi/rt64_d3d12.cpp | 151 ++++++++-------- UnleashedRecomp/gpu/rhi/rt64_d3d12.h | 17 +- .../gpu/rhi/rt64_render_interface_types.h | 23 +++ UnleashedRecomp/gpu/rhi/rt64_vulkan.cpp | 30 +++- UnleashedRecomp/gpu/shader/movie_common.hlsl | 51 ++++++ UnleashedRecomp/gpu/shader/movie_ps.hlsl | 102 +++++++++++ UnleashedRecomp/gpu/shader/movie_vs.hlsl | 14 ++ UnleashedRecomp/gpu/video.cpp | 163 +++++++++++++++--- UnleashedRecomp/misc_impl.cpp | 14 -- UnleashedRecomp/res/config.toml | 3 +- UnleashedRecompLib/config/SWA.toml | 7 +- 15 files changed, 458 insertions(+), 139 deletions(-) create mode 100644 UnleashedRecomp/gpu/shader/movie_common.hlsl create mode 100644 UnleashedRecomp/gpu/shader/movie_ps.hlsl create mode 100644 UnleashedRecomp/gpu/shader/movie_vs.hlsl diff --git a/UnleashedRecomp/CMakeLists.txt b/UnleashedRecomp/CMakeLists.txt index ee697fc..c189d50 100644 --- a/UnleashedRecomp/CMakeLists.txt +++ b/UnleashedRecomp/CMakeLists.txt @@ -159,6 +159,8 @@ function(compile_pixel_shader FILE_PATH) endfunction() compile_vertex_shader(copy_vs) +compile_pixel_shader(movie_ps) +compile_vertex_shader(movie_vs) compile_pixel_shader(resolve_msaa_depth_2x) compile_pixel_shader(resolve_msaa_depth_4x) compile_pixel_shader(resolve_msaa_depth_8x) diff --git a/UnleashedRecomp/config.cpp b/UnleashedRecomp/config.cpp index 1c6da53..b4c5357 100644 --- a/UnleashedRecomp/config.cpp +++ b/UnleashedRecomp/config.cpp @@ -11,6 +11,7 @@ void Config::Load() TOML_READ_BOOLEAN(Hints); TOML_READ_BOOLEAN(UnleashOutOfControlDrain); TOML_READ_BOOLEAN(WerehogHubTransformVideo); + TOML_READ_BOOLEAN(LogoSkip); } TOML_END_SECTION(); diff --git a/UnleashedRecomp/config.h b/UnleashedRecomp/config.h index 63d59e2..d6159d9 100644 --- a/UnleashedRecomp/config.h +++ b/UnleashedRecomp/config.h @@ -52,6 +52,7 @@ public: inline static bool Hints = true; inline static bool UnleashOutOfControlDrain = true; inline static bool WerehogHubTransformVideo = true; + inline static bool LogoSkip = false; // Controls inline static bool XButtonHoming = true; diff --git a/UnleashedRecomp/game.cpp b/UnleashedRecomp/game.cpp index b3c7c5c..52ca173 100644 --- a/UnleashedRecomp/game.cpp +++ b/UnleashedRecomp/game.cpp @@ -236,4 +236,22 @@ void GetStageIDMidAsmHook(PPCRegister& r5) m_pStageID = *(xpointer*)g_memory.Translate(r5.u32); } +// Logo skip +PPC_FUNC_IMPL(__imp__sub_82547DF0); +PPC_FUNC(sub_82547DF0) +{ + if (Config::LogoSkip) + { + ctx.r4.u64 = 0; + ctx.r5.u64 = 0; + ctx.r6.u64 = 1; + ctx.r7.u64 = 0; + sub_825517C8(ctx, base); + } + else + { + __imp__sub_82547DF0(ctx, base); + } +} + #pragma endregion diff --git a/UnleashedRecomp/gpu/rhi/rt64_d3d12.cpp b/UnleashedRecomp/gpu/rhi/rt64_d3d12.cpp index bed7c9e..c8d3ad7 100644 --- a/UnleashedRecomp/gpu/rhi/rt64_d3d12.cpp +++ b/UnleashedRecomp/gpu/rhi/rt64_d3d12.cpp @@ -21,6 +21,7 @@ # define D3D12_DEBUG_LAYER_BREAK_ON_ERROR true # define D3D12_DEBUG_LAYER_BREAK_ON_WARNING false # define D3D12_DEBUG_LAYER_SUPRESS_SAMPLE_POSITIONS_ERROR // Supress error message that's been fixed in newer Agility SDK versions. +//# define D3D12_DEBUG_LAYER_GPU_BASED_VALIDATION_ENABLED #endif // Old Windows SDK versions don't provide this macro, so we workaround it by making sure it is defined. @@ -649,6 +650,37 @@ namespace RT64 { return flags; } + static UINT toD3D12(RenderSwizzle swizzle, UINT identity) { + switch (swizzle) { + case RenderSwizzle::IDENTITY: + return identity; + case RenderSwizzle::ZERO: + return D3D12_SHADER_COMPONENT_MAPPING_FORCE_VALUE_0; + case RenderSwizzle::ONE: + return D3D12_SHADER_COMPONENT_MAPPING_FORCE_VALUE_1; + case RenderSwizzle::R: + return D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_0; + case RenderSwizzle::G: + return D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_1; + case RenderSwizzle::B: + return D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_2; + case RenderSwizzle::A: + return D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_3; + default: + assert(false && "Unknown swizzle type."); + return identity; + } + } + + static UINT toD3D12(const RenderComponentMapping &componentMapping) { + return D3D12_ENCODE_SHADER_4_COMPONENT_MAPPING( + toD3D12(componentMapping.r, D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_0), + toD3D12(componentMapping.g, D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_1), + toD3D12(componentMapping.b, D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_2), + toD3D12(componentMapping.a, D3D12_SHADER_COMPONENT_MAPPING_FROM_MEMORY_COMPONENT_3) + ); + } + static void setObjectName(ID3D12Object *object, const std::string &name) { const std::wstring wideCharName = Utf8ToUtf16(name); object->SetName(wideCharName.c_str()); @@ -669,37 +701,29 @@ namespace RT64 { heapDesc.Type = heapType; descriptorHandleIncrement = device->d3d->GetDescriptorHandleIncrementSize(heapDesc.Type); - HRESULT res = device->d3d->CreateDescriptorHeap(&heapDesc, IID_PPV_ARGS(&hostHeap)); + const bool shaderVisible = (heapType == D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV) || (heapType == D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER); + if (shaderVisible) { + heapDesc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE; + } + + HRESULT res = device->d3d->CreateDescriptorHeap(&heapDesc, IID_PPV_ARGS(&heap)); if (FAILED(res)) { fprintf(stderr, "CreateDescriptorHeap failed with error code 0x%lX.\n", res); return; } + + cpuDescriptorHandle = heap->GetCPUDescriptorHandleForHeapStart(); - hostCPUDescriptorHandle = hostHeap->GetCPUDescriptorHandleForHeapStart(); - - const bool shaderVisible = (heapType == D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV) || (heapType == D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER); if (shaderVisible) { - heapDesc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE; - res = device->d3d->CreateDescriptorHeap(&heapDesc, IID_PPV_ARGS(&shaderHeap)); - if (FAILED(res)) { - fprintf(stderr, "CreateDescriptorHeap failed with error code 0x%lX.\n", res); - return; - } - - shaderCPUDescriptorHandle = shaderHeap->GetCPUDescriptorHandleForHeapStart(); - shaderGPUDescriptorHandle = shaderHeap->GetGPUDescriptorHandleForHeapStart(); + gpuDescriptorHandle = heap->GetGPUDescriptorHandleForHeapStart(); } addFreeBlock(0, heapSize); } D3D12DescriptorHeapAllocator::~D3D12DescriptorHeapAllocator() { - if (hostHeap != nullptr) { - hostHeap->Release(); - } - - if (shaderHeap != nullptr) { - shaderHeap->Release(); + if (heap != nullptr) { + heap->Release(); } } @@ -765,22 +789,16 @@ namespace RT64 { addFreeBlock(offset, size); } - D3D12_CPU_DESCRIPTOR_HANDLE D3D12DescriptorHeapAllocator::getHostCPUHandleAt(uint32_t index) const { + D3D12_CPU_DESCRIPTOR_HANDLE D3D12DescriptorHeapAllocator::getCPUHandleAt(uint32_t index) const { assert(index < heapSize); - assert(hostCPUDescriptorHandle.ptr > 0); - return { hostCPUDescriptorHandle.ptr + uint64_t(index) * descriptorHandleIncrement }; + assert(cpuDescriptorHandle.ptr > 0); + return { cpuDescriptorHandle.ptr + uint64_t(index) * descriptorHandleIncrement }; } - D3D12_CPU_DESCRIPTOR_HANDLE D3D12DescriptorHeapAllocator::getShaderCPUHandleAt(uint32_t index) const { + D3D12_GPU_DESCRIPTOR_HANDLE D3D12DescriptorHeapAllocator::getGPUHandleAt(uint32_t index) const { assert(index < heapSize); - assert(shaderCPUDescriptorHandle.ptr > 0); - return { shaderCPUDescriptorHandle.ptr + uint64_t(index) * descriptorHandleIncrement }; - } - - D3D12_GPU_DESCRIPTOR_HANDLE D3D12DescriptorHeapAllocator::getShaderGPUHandleAt(uint32_t index) const { - assert(index < heapSize); - assert(shaderGPUDescriptorHandle.ptr > 0); - return { shaderGPUDescriptorHandle.ptr + uint64_t(index) * descriptorHandleIncrement }; + assert(gpuDescriptorHandle.ptr > 0); + return { gpuDescriptorHandle.ptr + uint64_t(index) * descriptorHandleIncrement }; } // D3D12DescriptorSet @@ -1001,7 +1019,7 @@ namespace RT64 { if ((nativeResource != nullptr) && (textureView != nullptr)) { const D3D12TextureView *interfaceTextureView = static_cast(textureView); D3D12_SHADER_RESOURCE_VIEW_DESC srvDesc = {}; - srvDesc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; + srvDesc.Shader4ComponentMapping = interfaceTextureView->shader4ComponentMapping; srvDesc.Format = interfaceTextureView->format; const bool isMSAA = (interfaceTextureView->texture->desc.multisampling.sampleCount > RenderSampleCount::COUNT_1); @@ -1096,9 +1114,8 @@ namespace RT64 { uint32_t descriptorIndexClamped = std::min(descriptorIndex, descriptorTypeMaxIndex); uint32_t descriptorIndexRelative = (descriptorIndex - descriptorIndexClamped); uint32_t descriptorHeapIndex = descriptorHeapIndices[descriptorIndexClamped]; - const D3D12_CPU_DESCRIPTOR_HANDLE cpuHandle = device->samplerHeapAllocator->getHostCPUHandleAt(samplerAllocation.offset + descriptorHeapIndex + descriptorIndexRelative); + const D3D12_CPU_DESCRIPTOR_HANDLE cpuHandle = device->samplerHeapAllocator->getCPUHandleAt(samplerAllocation.offset + descriptorHeapIndex + descriptorIndexRelative); device->d3d->CreateSampler(&interfaceSampler->samplerDesc, cpuHandle); - setHostModified(samplerAllocation, descriptorHeapIndex + descriptorIndexRelative); } } @@ -1125,9 +1142,8 @@ namespace RT64 { uint32_t descriptorIndexClamped = std::min(descriptorIndex, descriptorTypeMaxIndex); uint32_t descriptorIndexRelative = (descriptorIndex - descriptorIndexClamped); uint32_t descriptorHeapIndex = descriptorHeapIndices[descriptorIndexClamped]; - const D3D12_CPU_DESCRIPTOR_HANDLE cpuHandle = device->viewHeapAllocator->getHostCPUHandleAt(viewAllocation.offset + descriptorHeapIndex + descriptorIndexRelative); + const D3D12_CPU_DESCRIPTOR_HANDLE cpuHandle = device->viewHeapAllocator->getCPUHandleAt(viewAllocation.offset + descriptorHeapIndex + descriptorIndexRelative); device->d3d->CreateShaderResourceView(resource, viewDesc, cpuHandle); - setHostModified(viewAllocation, descriptorHeapIndex + descriptorIndexRelative); } } @@ -1136,9 +1152,8 @@ namespace RT64 { uint32_t descriptorIndexClamped = std::min(descriptorIndex, descriptorTypeMaxIndex); uint32_t descriptorIndexRelative = (descriptorIndex - descriptorIndexClamped); uint32_t descriptorHeapIndex = descriptorHeapIndices[descriptorIndexClamped]; - const D3D12_CPU_DESCRIPTOR_HANDLE cpuHandle = device->viewHeapAllocator->getHostCPUHandleAt(viewAllocation.offset + descriptorHeapIndex + descriptorIndexRelative); + const D3D12_CPU_DESCRIPTOR_HANDLE cpuHandle = device->viewHeapAllocator->getCPUHandleAt(viewAllocation.offset + descriptorHeapIndex + descriptorIndexRelative); device->d3d->CreateUnorderedAccessView(resource, nullptr, viewDesc, cpuHandle); - setHostModified(viewAllocation, descriptorHeapIndex + descriptorIndexRelative); } } @@ -1151,23 +1166,8 @@ namespace RT64 { uint32_t descriptorIndexClamped = std::min(descriptorIndex, descriptorTypeMaxIndex); uint32_t descriptorIndexRelative = (descriptorIndex - descriptorIndexClamped); uint32_t descriptorHeapIndex = descriptorHeapIndices[descriptorIndexClamped]; - const D3D12_CPU_DESCRIPTOR_HANDLE cpuHandle = device->viewHeapAllocator->getHostCPUHandleAt(viewAllocation.offset + descriptorHeapIndex + descriptorIndexRelative); + const D3D12_CPU_DESCRIPTOR_HANDLE cpuHandle = device->viewHeapAllocator->getCPUHandleAt(viewAllocation.offset + descriptorHeapIndex + descriptorIndexRelative); device->d3d->CreateConstantBufferView(&viewDesc, cpuHandle); - setHostModified(viewAllocation, descriptorHeapIndex + descriptorIndexRelative); - } - } - - void D3D12DescriptorSet::setHostModified(HeapAllocation &heapAllocation, uint32_t heapIndex) { - if (heapAllocation.hostModifiedCount == 0) { - heapAllocation.hostModifiedIndex = heapIndex; - heapAllocation.hostModifiedCount = 1; - } - else if (heapIndex < heapAllocation.hostModifiedIndex) { - heapAllocation.hostModifiedCount = heapAllocation.hostModifiedIndex + heapAllocation.hostModifiedCount - heapIndex; - heapAllocation.hostModifiedIndex = heapIndex; - } - else if (heapIndex >= (heapAllocation.hostModifiedIndex + heapAllocation.hostModifiedCount)) { - heapAllocation.hostModifiedCount = heapIndex - heapAllocation.hostModifiedIndex + 1; } } @@ -1351,7 +1351,7 @@ namespace RT64 { const D3D12Texture *interfaceTexture = static_cast(desc.colorAttachments[i]); assert((interfaceTexture->desc.flags & RenderTextureFlag::RENDER_TARGET) && "Color attachment must be a render target."); colorTargets.emplace_back(interfaceTexture); - colorHandles.emplace_back(device->colorTargetHeapAllocator->getHostCPUHandleAt(interfaceTexture->targetAllocatorOffset)); + colorHandles.emplace_back(device->colorTargetHeapAllocator->getCPUHandleAt(interfaceTexture->targetAllocatorOffset)); if (i == 0) { width = interfaceTexture->desc.width; @@ -1367,10 +1367,10 @@ namespace RT64 { // The read-only handle is on the second slot on the DSV heap. if (desc.depthAttachmentReadOnly) { - depthHandle = device->depthTargetHeapAllocator->getHostCPUHandleAt(interfaceTexture->targetAllocatorOffset + 1); + depthHandle = device->depthTargetHeapAllocator->getCPUHandleAt(interfaceTexture->targetAllocatorOffset + 1); } else { - depthHandle = device->depthTargetHeapAllocator->getHostCPUHandleAt(interfaceTexture->targetAllocatorOffset); + depthHandle = device->depthTargetHeapAllocator->getCPUHandleAt(interfaceTexture->targetAllocatorOffset); } if (desc.colorAttachmentsCount == 0) { @@ -1954,7 +1954,7 @@ namespace RT64 { void D3D12CommandList::checkDescriptorHeaps() { if (!descriptorHeapsSet) { - ID3D12DescriptorHeap *descriptorHeaps[] = { device->viewHeapAllocator->shaderHeap, device->samplerHeapAllocator->shaderHeap }; + ID3D12DescriptorHeap *descriptorHeaps[] = { device->viewHeapAllocator->heap, device->samplerHeapAllocator->heap }; d3d->SetDescriptorHeaps(std::size(descriptorHeaps), descriptorHeaps); descriptorHeapsSet = true; } @@ -2014,29 +2014,15 @@ namespace RT64 { } } - static void updateShaderVisibleSet(D3D12Device *device, D3D12DescriptorHeapAllocator *heapAllocator, D3D12DescriptorSet::HeapAllocation &heapAllocation, D3D12_DESCRIPTOR_HEAP_TYPE heapType) { - if (heapAllocation.hostModifiedCount == 0) { - return; - } - - const D3D12_CPU_DESCRIPTOR_HANDLE dstHandle = heapAllocator->getShaderCPUHandleAt(heapAllocation.offset + heapAllocation.hostModifiedIndex); - const D3D12_CPU_DESCRIPTOR_HANDLE srcHandle = heapAllocator->getHostCPUHandleAt(heapAllocation.offset + heapAllocation.hostModifiedIndex); - device->d3d->CopyDescriptorsSimple(heapAllocation.hostModifiedCount, dstHandle, srcHandle, heapType); - heapAllocation.hostModifiedIndex = 0; - heapAllocation.hostModifiedCount = 0; - } void D3D12CommandList::setDescriptorSet(const D3D12PipelineLayout *activePipelineLayout, RenderDescriptorSet *descriptorSet, uint32_t setIndex, bool setCompute) { assert(descriptorSet != nullptr); assert(activePipelineLayout != nullptr); assert(setIndex < activePipelineLayout->setCount); - // Copy descriptors if the shader visible heap is outdated. - D3D12DescriptorSet *interfaceDescriptorSet = static_cast(descriptorSet); - updateShaderVisibleSet(device, device->viewHeapAllocator.get(), interfaceDescriptorSet->viewAllocation, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); - updateShaderVisibleSet(device, device->samplerHeapAllocator.get(), interfaceDescriptorSet->samplerAllocation, D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER); checkDescriptorHeaps(); + D3D12DescriptorSet *interfaceDescriptorSet = static_cast(descriptorSet); setRootDescriptorTable(device->viewHeapAllocator.get(), interfaceDescriptorSet->viewAllocation, activePipelineLayout->setViewRootIndices[setIndex], setCompute); setRootDescriptorTable(device->samplerHeapAllocator.get(), interfaceDescriptorSet->samplerAllocation, activePipelineLayout->setSamplerRootIndices[setIndex], setCompute); } @@ -2046,7 +2032,7 @@ namespace RT64 { return; } - const D3D12_GPU_DESCRIPTOR_HANDLE gpuHandle = heapAllocator->getShaderGPUHandleAt(heapAllocation.offset); + const D3D12_GPU_DESCRIPTOR_HANDLE gpuHandle = heapAllocator->getGPUHandleAt(heapAllocation.offset); if (setCompute) { d3d->SetComputeRootDescriptorTable(rootIndex, gpuHandle); } @@ -2337,6 +2323,7 @@ namespace RT64 { this->dimension = desc.dimension; this->mipLevels = desc.mipLevels; this->mipSlice = desc.mipSlice; + this->shader4ComponentMapping = toD3D12(desc.componentMapping); // D3D12 and Vulkan disagree on whether D32 is usable as a texture view format. We just make D3D12 use R32 instead. if (format == DXGI_FORMAT_D32_FLOAT) { @@ -2453,7 +2440,7 @@ namespace RT64 { break; } - const D3D12_CPU_DESCRIPTOR_HANDLE cpuHandle = device->colorTargetHeapAllocator->getHostCPUHandleAt(targetAllocatorOffset); + const D3D12_CPU_DESCRIPTOR_HANDLE cpuHandle = device->colorTargetHeapAllocator->getCPUHandleAt(targetAllocatorOffset); device->d3d->CreateRenderTargetView(d3d, &rtvDesc, cpuHandle); } @@ -2491,8 +2478,8 @@ namespace RT64 { break; } - const D3D12_CPU_DESCRIPTOR_HANDLE writeHandle = device->depthTargetHeapAllocator->getHostCPUHandleAt(targetAllocatorOffset); - const D3D12_CPU_DESCRIPTOR_HANDLE readOnlyHandle = device->depthTargetHeapAllocator->getHostCPUHandleAt(targetAllocatorOffset + 1); + const D3D12_CPU_DESCRIPTOR_HANDLE writeHandle = device->depthTargetHeapAllocator->getCPUHandleAt(targetAllocatorOffset); + const D3D12_CPU_DESCRIPTOR_HANDLE readOnlyHandle = device->depthTargetHeapAllocator->getCPUHandleAt(targetAllocatorOffset + 1); device->d3d->CreateDepthStencilView(d3d, &dsvDesc, writeHandle); dsvDesc.Flags = D3D12_DSV_FLAG_READ_ONLY_DEPTH; @@ -3346,6 +3333,7 @@ namespace RT64 { # ifdef D3D12_DEBUG_LAYER_SUPRESS_SAMPLE_POSITIONS_ERROR D3D12_MESSAGE_ID_SAMPLEPOSITIONS_MISMATCH_RECORDTIME_ASSUMEDFROMCLEAR, D3D12_MESSAGE_ID_SAMPLEPOSITIONS_MISMATCH_DEFERRED, + D3D12_MESSAGE_ID_RESOURCE_BARRIER_DUPLICATE_SUBRESOURCE_TRANSITIONS, # endif }; @@ -3574,12 +3562,12 @@ namespace RT64 { if (interfaceDescriptorSet != nullptr) { if (interfaceDescriptorSet->viewAllocation.count > 0) { uint32_t viewRootIndex = raytracingPipeline->pipelineLayout->setViewRootIndices[i]; - descriptorHandles[viewRootIndex] = viewHeapAllocator->getShaderGPUHandleAt(interfaceDescriptorSet->viewAllocation.offset).ptr; + descriptorHandles[viewRootIndex] = viewHeapAllocator->getGPUHandleAt(interfaceDescriptorSet->viewAllocation.offset).ptr; } if (interfaceDescriptorSet->samplerAllocation.count > 0) { uint32_t samplerRootIndex = raytracingPipeline->pipelineLayout->setSamplerRootIndices[i]; - descriptorHandles[samplerRootIndex] = samplerHeapAllocator->getShaderGPUHandleAt(interfaceDescriptorSet->samplerAllocation.offset).ptr; + descriptorHandles[samplerRootIndex] = samplerHeapAllocator->getGPUHandleAt(interfaceDescriptorSet->samplerAllocation.offset).ptr; } } } @@ -3660,9 +3648,12 @@ namespace RT64 { UINT dxgiFactoryFlags = 0; # ifdef D3D12_DEBUG_LAYER_ENABLED - ID3D12Debug *debugController; + ID3D12Debug1 *debugController; if (SUCCEEDED(D3D12GetDebugInterface(IID_PPV_ARGS(&debugController)))) { debugController->EnableDebugLayer(); +# ifdef D3D12_DEBUG_LAYER_GPU_BASED_VALIDATION_ENABLED + debugController->SetEnableGPUBasedValidation(TRUE); +# endif // Enable additional debug layers. dxgiFactoryFlags |= DXGI_CREATE_FACTORY_DEBUG; diff --git a/UnleashedRecomp/gpu/rhi/rt64_d3d12.h b/UnleashedRecomp/gpu/rhi/rt64_d3d12.h index fcc9f58..961d827 100644 --- a/UnleashedRecomp/gpu/rhi/rt64_d3d12.h +++ b/UnleashedRecomp/gpu/rhi/rt64_d3d12.h @@ -46,14 +46,12 @@ namespace RT64 { } }; - ID3D12DescriptorHeap *hostHeap = nullptr; - ID3D12DescriptorHeap *shaderHeap = nullptr; + ID3D12DescriptorHeap *heap = nullptr; uint32_t heapSize = 0; uint32_t freeSize = 0; D3D12Device *device = nullptr; - D3D12_CPU_DESCRIPTOR_HANDLE hostCPUDescriptorHandle = {}; - D3D12_CPU_DESCRIPTOR_HANDLE shaderCPUDescriptorHandle = {}; - D3D12_GPU_DESCRIPTOR_HANDLE shaderGPUDescriptorHandle = {}; + D3D12_CPU_DESCRIPTOR_HANDLE cpuDescriptorHandle = {}; + D3D12_GPU_DESCRIPTOR_HANDLE gpuDescriptorHandle = {}; UINT descriptorHandleIncrement = 0; OffsetFreeBlockMap offsetFreeBlockMap; SizeFreeBlockMap sizeFreeBlockMap; @@ -64,9 +62,8 @@ namespace RT64 { void addFreeBlock(uint32_t offset, uint32_t size); uint32_t allocate(uint32_t size); void free(uint32_t offset, uint32_t size); - D3D12_CPU_DESCRIPTOR_HANDLE getHostCPUHandleAt(uint32_t index) const; - D3D12_CPU_DESCRIPTOR_HANDLE getShaderCPUHandleAt(uint32_t index) const; - D3D12_GPU_DESCRIPTOR_HANDLE getShaderGPUHandleAt(uint32_t index) const; + D3D12_CPU_DESCRIPTOR_HANDLE getCPUHandleAt(uint32_t index) const; + D3D12_GPU_DESCRIPTOR_HANDLE getGPUHandleAt(uint32_t index) const; }; struct D3D12DescriptorSet : RenderDescriptorSet { @@ -75,8 +72,6 @@ namespace RT64 { struct HeapAllocation { uint32_t offset = 0; uint32_t count = 0; - uint32_t hostModifiedIndex = 0; - uint32_t hostModifiedCount = 0; }; HeapAllocation viewAllocation; @@ -94,7 +89,6 @@ namespace RT64 { void setSRV(uint32_t descriptorIndex, ID3D12Resource *resource, const D3D12_SHADER_RESOURCE_VIEW_DESC *viewDesc); void setUAV(uint32_t descriptorIndex, ID3D12Resource *resource, const D3D12_UNORDERED_ACCESS_VIEW_DESC *viewDesc); void setCBV(uint32_t descriptorIndex, ID3D12Resource *resource, uint64_t bufferSize); - void setHostModified(HeapAllocation &heapAllocation, uint32_t heapIndex); }; struct D3D12SwapChain : RenderSwapChain { @@ -289,6 +283,7 @@ namespace RT64 { RenderTextureViewDimension dimension = RenderTextureViewDimension::UNKNOWN; uint32_t mipLevels = 0; uint32_t mipSlice = 0; + uint32_t shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; D3D12TextureView(D3D12Texture *texture, const RenderTextureViewDesc &desc); ~D3D12TextureView() override; diff --git a/UnleashedRecomp/gpu/rhi/rt64_render_interface_types.h b/UnleashedRecomp/gpu/rhi/rt64_render_interface_types.h index 50d20a5..a14c2fc 100644 --- a/UnleashedRecomp/gpu/rhi/rt64_render_interface_types.h +++ b/UnleashedRecomp/gpu/rhi/rt64_render_interface_types.h @@ -921,11 +921,34 @@ namespace RT64 { } }; + enum class RenderSwizzle : uint8_t + { + IDENTITY = 0, + ZERO = 1, + ONE = 2, + R = 3, + G = 4, + B = 5, + A = 6 + }; + + struct RenderComponentMapping + { + RenderSwizzle r = RenderSwizzle::IDENTITY; + RenderSwizzle g = RenderSwizzle::IDENTITY; + RenderSwizzle b = RenderSwizzle::IDENTITY; + RenderSwizzle a = RenderSwizzle::IDENTITY; + + RenderComponentMapping() = default; + RenderComponentMapping(RenderSwizzle r, RenderSwizzle g, RenderSwizzle b, RenderSwizzle a) : r(r), g(g), b(b), a(a) {} + }; + struct RenderTextureViewDesc { RenderFormat format = RenderFormat::UNKNOWN; RenderTextureViewDimension dimension = RenderTextureViewDimension::UNKNOWN; uint32_t mipLevels = 0; uint32_t mipSlice = 0; + RenderComponentMapping componentMapping; RenderTextureViewDesc() = default; diff --git a/UnleashedRecomp/gpu/rhi/rt64_vulkan.cpp b/UnleashedRecomp/gpu/rhi/rt64_vulkan.cpp index 82978c6..1073c82 100644 --- a/UnleashedRecomp/gpu/rhi/rt64_vulkan.cpp +++ b/UnleashedRecomp/gpu/rhi/rt64_vulkan.cpp @@ -677,6 +677,28 @@ namespace RT64 { } } + static VkComponentSwizzle toVk(RenderSwizzle swizzle) { + switch (swizzle) { + case RenderSwizzle::IDENTITY: + return VK_COMPONENT_SWIZZLE_IDENTITY; + case RenderSwizzle::ZERO: + return VK_COMPONENT_SWIZZLE_ZERO; + case RenderSwizzle::ONE: + return VK_COMPONENT_SWIZZLE_ONE; + case RenderSwizzle::R: + return VK_COMPONENT_SWIZZLE_R; + case RenderSwizzle::G: + return VK_COMPONENT_SWIZZLE_G; + case RenderSwizzle::B: + return VK_COMPONENT_SWIZZLE_B; + case RenderSwizzle::A: + return VK_COMPONENT_SWIZZLE_A; + default: + assert(false && "Unknown swizzle type."); + return VK_COMPONENT_SWIZZLE_IDENTITY; + } + } + static void setObjectName(VkDevice device, VkDebugReportObjectTypeEXT objectType, uint64_t object, const std::string &name) { # ifdef VULKAN_OBJECT_NAMES_ENABLED VkDebugMarkerObjectNameInfoEXT nameInfo = {}; @@ -982,10 +1004,10 @@ namespace RT64 { viewInfo.image = texture->vk; viewInfo.viewType = toImageViewType(desc.dimension); viewInfo.format = toVk(desc.format); - viewInfo.components.r = VK_COMPONENT_SWIZZLE_IDENTITY; - viewInfo.components.g = VK_COMPONENT_SWIZZLE_IDENTITY; - viewInfo.components.b = VK_COMPONENT_SWIZZLE_IDENTITY; - viewInfo.components.a = VK_COMPONENT_SWIZZLE_IDENTITY; + viewInfo.components.r = toVk(desc.componentMapping.r); + viewInfo.components.g = toVk(desc.componentMapping.g); + viewInfo.components.b = toVk(desc.componentMapping.b); + viewInfo.components.a = toVk(desc.componentMapping.a); viewInfo.subresourceRange.aspectMask = (texture->desc.flags & RenderTextureFlag::DEPTH_TARGET) ? VK_IMAGE_ASPECT_DEPTH_BIT : VK_IMAGE_ASPECT_COLOR_BIT; viewInfo.subresourceRange.baseMipLevel = desc.mipSlice; viewInfo.subresourceRange.levelCount = desc.mipLevels; diff --git a/UnleashedRecomp/gpu/shader/movie_common.hlsl b/UnleashedRecomp/gpu/shader/movie_common.hlsl new file mode 100644 index 0000000..9111346 --- /dev/null +++ b/UnleashedRecomp/gpu/shader/movie_common.hlsl @@ -0,0 +1,51 @@ +#include "../../../thirdparty/ShaderRecomp/ShaderRecomp/shader_common.hlsli" + +CONSTANT_BUFFER(VertexShaderConstants, b0) +{ + [[vk::offset(2880)]] float4 g_ViewportSize PACK_OFFSET(c180); +}; + +CONSTANT_BUFFER(PixelShaderConstants, b1) +{ + [[vk::offset(0)]] float fZmin PACK_OFFSET(c0); + [[vk::offset(4)]] float fZmax PACK_OFFSET(c1); +}; + +CONSTANT_BUFFER(SharedConstants, b2) +{ + [[vk::offset(0)]] uint Tex0_ResourceDescriptorIndex PACK_OFFSET(c0.x); + [[vk::offset(4)]] uint Tex1_ResourceDescriptorIndex PACK_OFFSET(c0.y); + [[vk::offset(8)]] uint Tex2_ResourceDescriptorIndex PACK_OFFSET(c0.z); + [[vk::offset(12)]] uint Tex3_ResourceDescriptorIndex PACK_OFFSET(c0.w); + [[vk::offset(16)]] uint Tex4_ResourceDescriptorIndex PACK_OFFSET(c1.x); + + [[vk::offset(64)]] uint Tex0_SamplerDescriptorIndex PACK_OFFSET(c4.x); + [[vk::offset(68)]] uint Tex1_SamplerDescriptorIndex PACK_OFFSET(c4.y); + [[vk::offset(72)]] uint Tex2_SamplerDescriptorIndex PACK_OFFSET(c4.z); + [[vk::offset(76)]] uint Tex3_SamplerDescriptorIndex PACK_OFFSET(c4.w); + [[vk::offset(80)]] uint Tex4_SamplerDescriptorIndex PACK_OFFSET(c5.x); + + SHARED_CONSTANTS; +}; + +#define bCsc (GET_SHARED_CONSTANT(g_Booleans) & (1 << (16 + 0))) +#define bAmv (GET_SHARED_CONSTANT(g_Booleans) & (1 << (16 + 1))) +#define bZmv (GET_SHARED_CONSTANT(g_Booleans) & (1 << (16 + 2))) + +struct VertexShaderInput +{ + [[vk::location(0)]] float4 ObjPos : POSITION; + [[vk::location(4)]] float2 UV : TEXCOORD; +}; + +struct Interpolators +{ + float4 ProjPos : SV_Position; + float2 UV : TEXCOORD0; +}; + +struct PixelShaderOutput +{ + float4 Color : SV_Target0; + float Depth : SV_Depth; +}; diff --git a/UnleashedRecomp/gpu/shader/movie_ps.hlsl b/UnleashedRecomp/gpu/shader/movie_ps.hlsl new file mode 100644 index 0000000..2e83756 --- /dev/null +++ b/UnleashedRecomp/gpu/shader/movie_ps.hlsl @@ -0,0 +1,102 @@ +#include "movie_common.hlsl" + +PixelShaderOutput main(in Interpolators In) +{ +#ifdef __spirv__ + PixelShaderConstants constants = vk::RawBufferLoad(g_PushConstants.PixelShaderConstants, 0x100); + SharedConstants sharedConstants = vk::RawBufferLoad(g_PushConstants.SharedConstants, 0x100); +#endif + + Texture2D Tex0 = g_Texture2DDescriptorHeap[GET_SHARED_CONSTANT(Tex0_ResourceDescriptorIndex)]; + Texture2D Tex1 = g_Texture2DDescriptorHeap[GET_SHARED_CONSTANT(Tex1_ResourceDescriptorIndex)]; + Texture2D Tex2 = g_Texture2DDescriptorHeap[GET_SHARED_CONSTANT(Tex2_ResourceDescriptorIndex)]; + Texture2D Tex3 = g_Texture2DDescriptorHeap[GET_SHARED_CONSTANT(Tex3_ResourceDescriptorIndex)]; + Texture2D Tex4 = g_Texture2DDescriptorHeap[GET_SHARED_CONSTANT(Tex4_ResourceDescriptorIndex)]; + + SamplerState Tex0_s = g_SamplerDescriptorHeap[GET_SHARED_CONSTANT(Tex0_SamplerDescriptorIndex)]; + SamplerState Tex1_s = g_SamplerDescriptorHeap[GET_SHARED_CONSTANT(Tex1_SamplerDescriptorIndex)]; + SamplerState Tex2_s = g_SamplerDescriptorHeap[GET_SHARED_CONSTANT(Tex2_SamplerDescriptorIndex)]; + SamplerState Tex3_s = g_SamplerDescriptorHeap[GET_SHARED_CONSTANT(Tex3_SamplerDescriptorIndex)]; + SamplerState Tex4_s = g_SamplerDescriptorHeap[GET_SHARED_CONSTANT(Tex4_SamplerDescriptorIndex)]; + + PixelShaderOutput Out; + float ValY = Tex0.Sample(Tex0_s, In.UV).r; + float ValU = Tex1.Sample(Tex1_s, In.UV).r - 0.5; + float ValV = Tex2.Sample(Tex2_s, In.UV).r - 0.5; + float ValA = 1.0; + float ValD = 0.0; + if (bAmv) + ValA = (Tex3.Sample(Tex3_s, In.UV).r - 0.0625) * 1.164; + if (bZmv) + { + ValD = (Tex4.Sample(Tex4_s, In.UV).r - 0.0625) * 1.164; + if (ValD < 9.0 / 255.0) + { + ValD = 0.0; + } + else if (ValD < 17.0 / 255.0) + { + ValD = GET_CONSTANT(fZmin); + } + else if (ValD < 224.0 / 255.0) + { + ValD = (ValD - 17.0 / 255.0) / (223.0 / 255.0 - 17.0 / 255.0) * (GET_CONSTANT(fZmax) - GET_CONSTANT(fZmin)) + GET_CONSTANT(fZmin); + } + else if (ValD < 240.0 / 255.0) + { + ValD = GET_CONSTANT(fZmax); + } + else + { + ValD = 1.0; + } + } + if (bCsc) + { + if (ValY < 16.0 / 255.0) + { + ValY = ValY * 3.0 / 2.0; + } + else if (ValY < 176.0 / 255.0) + { + ValY = 24.0 / 255.0 + (ValY - 16.0 / 255.0) / 2.0; + } + else if (ValY < 192.0 / 255.0) + { + ValY = 104.0 / 255.0 + (ValY - 176.0 / 255.0) / 1.0; + } + else + { + ValY = 120.0 / 255.0 + (ValY - 192.0 / 255.0) * 2.0; + } + if (abs(ValU) < 24.0 / 255.0) + { + ValU /= 3.0; + } + else + { + ValU = (8.0 / 255.0 + (abs(ValU) - 24.0 / 255.0) * (120.0 / 104.0)) * sign(ValU); + } + if (abs(ValV) < 24.0 / 255.0) + { + ValV /= 3.0; + } + else + { + ValV = (8.0 / 255.0 + (abs(ValV) - 24.0 / 255.0) * (120.0 / 104.0)) * sign(ValV); + } + Out.Color.r = ValY + ValV * 1.402; + Out.Color.g = ValY - ValU * 0.344 - ValV * 0.714; + Out.Color.b = ValY + ValU * 1.772; + } + else + { + ValY = (ValY - 0.0625) * 1.164; + Out.Color.r = ValY + ValV * 1.596; + Out.Color.g = ValY - ValU * 0.392 - ValV * 0.813; + Out.Color.b = ValY + ValU * 2.017; + } + Out.Color.a = ValA; + Out.Depth = ValD; + return Out; +} diff --git a/UnleashedRecomp/gpu/shader/movie_vs.hlsl b/UnleashedRecomp/gpu/shader/movie_vs.hlsl new file mode 100644 index 0000000..e4fc8f8 --- /dev/null +++ b/UnleashedRecomp/gpu/shader/movie_vs.hlsl @@ -0,0 +1,14 @@ +#include "movie_common.hlsl" + +Interpolators main(in VertexShaderInput In) +{ +#ifdef __spirv__ + VertexShaderConstants constants = vk::RawBufferLoad(g_PushConstants.VertexShaderConstants, 0x100); +#endif + + Interpolators Out; + Out.ProjPos = In.ObjPos; + Out.ProjPos.xy += float2(GET_CONSTANT(g_ViewportSize).z, -GET_CONSTANT(g_ViewportSize).w); + Out.UV = In.UV; + return Out; +} diff --git a/UnleashedRecomp/gpu/video.cpp b/UnleashedRecomp/gpu/video.cpp index 6d7f3b2..be476b0 100644 --- a/UnleashedRecomp/gpu/video.cpp +++ b/UnleashedRecomp/gpu/video.cpp @@ -13,6 +13,10 @@ #include "shader/copy_vs.hlsl.dxil.h" #include "shader/copy_vs.hlsl.spirv.h" +#include "shader/movie_vs.hlsl.dxil.h" +#include "shader/movie_vs.hlsl.spirv.h" +#include "shader/movie_ps.hlsl.dxil.h" +#include "shader/movie_ps.hlsl.spirv.h" #include "shader/resolve_msaa_depth_2x.hlsl.dxil.h" #include "shader/resolve_msaa_depth_2x.hlsl.spirv.h" #include "shader/resolve_msaa_depth_4x.hlsl.dxil.h" @@ -280,6 +284,8 @@ static std::vector g_tempDescriptorIndices[NUM_FRAMES]; static RenderBufferReference g_quadIndexBuffer; static uint32_t g_quadCount; +static uint32_t g_mainThreadId; + static std::vector g_barriers; static void FlushBarriers() @@ -519,6 +525,13 @@ static const std::pair g_setRenderStateFunctions[] = static std::unique_ptr g_resolveMsaaDepthPipelines[3]; +#define CREATE_SHADER(NAME) \ + g_device->createShader( \ + g_vulkan ? g_##NAME##_spirv : g_##NAME##_dxil, \ + g_vulkan ? sizeof(g_##NAME##_spirv) : sizeof(g_##NAME##_dxil), \ + "main", \ + g_vulkan ? RenderShaderFormat::SPIRV : RenderShaderFormat::DXIL) + static void CreateHostDevice() { for (uint32_t i = 0; i < 16; i++) @@ -550,6 +563,8 @@ static void CreateHostDevice() for (auto& renderSemaphore : g_renderSemaphores) renderSemaphore = g_device->createCommandSemaphore(); + g_mainThreadId = GetCurrentThreadId(); + RenderPipelineLayoutBuilder pipelineLayoutBuilder; pipelineLayoutBuilder.begin(false, true); @@ -562,6 +577,8 @@ static void CreateHostDevice() descriptorSetBuilder.end(true, TEXTURE_DESCRIPTOR_SIZE); g_textureDescriptorSet = descriptorSetBuilder.create(g_device.get()); + g_textureDescriptorSet->setTexture(0, nullptr, RenderTextureLayout::SHADER_READ); + pipelineLayoutBuilder.addDescriptorSet(descriptorSetBuilder); pipelineLayoutBuilder.addDescriptorSet(descriptorSetBuilder); pipelineLayoutBuilder.addDescriptorSet(descriptorSetBuilder); @@ -571,6 +588,11 @@ static void CreateHostDevice() descriptorSetBuilder.end(true, SAMPLER_DESCRIPTOR_SIZE); g_samplerDescriptorSet = descriptorSetBuilder.create(g_device.get()); + auto& [descriptorIndex, sampler] = g_samplerStates[XXH3_64bits(&g_samplerDescs[0], sizeof(RenderSamplerDesc))]; + descriptorIndex = 1; + sampler = g_device->createSampler(g_samplerDescs[0]); + g_samplerDescriptorSet->setSampler(0, sampler.get()); + pipelineLayoutBuilder.addDescriptorSet(descriptorSetBuilder); if (g_vulkan) @@ -588,13 +610,6 @@ static void CreateHostDevice() g_pipelineLayout = pipelineLayoutBuilder.create(g_device.get()); -#define CREATE_SHADER(NAME) \ - g_device->createShader( \ - g_vulkan ? g_##NAME##_spirv : g_##NAME##_dxil, \ - g_vulkan ? sizeof(g_##NAME##_spirv) : sizeof(g_##NAME##_dxil), \ - "main", \ - g_vulkan ? RenderShaderFormat::SPIRV : RenderShaderFormat::DXIL) - auto copyShader = CREATE_SHADER(copy_vs); for (size_t i = 0; i < std::size(g_resolveMsaaDepthPipelines); i++) @@ -623,8 +638,6 @@ static void CreateHostDevice() desc.depthTargetFormat = RenderFormat::D32_FLOAT; g_resolveMsaaDepthPipelines[i] = g_device->createGraphicsPipeline(desc); } - -#undef CREATE_SHADER } static void WaitForGPU() @@ -819,31 +832,41 @@ static void DestructResource(GuestResource* resource) static constexpr uint32_t PITCH_ALIGNMENT = 0x100; static constexpr uint32_t PLACEMENT_ALIGNMENT = 0x200; +static uint32_t ComputeTexturePitch(GuestTexture* texture) +{ + return (texture->width * RenderFormatSize(texture->format) + PITCH_ALIGNMENT - 1) & ~(PITCH_ALIGNMENT - 1); +} + static void LockTextureRect(GuestTexture* texture, uint32_t, GuestLockedRect* lockedRect) { - uint32_t pitch = (texture->width * RenderFormatSize(texture->format) + PITCH_ALIGNMENT - 1) & ~(PITCH_ALIGNMENT - 1); + uint32_t pitch = ComputeTexturePitch(texture); + uint32_t slicePitch = pitch * texture->height; if (texture->mappedMemory == nullptr) - texture->mappedMemory = g_userHeap.AllocPhysical(pitch * texture->height, 0x10); + texture->mappedMemory = g_userHeap.AllocPhysical(slicePitch, 0x10); lockedRect->pitch = pitch; lockedRect->bits = g_memory.MapVirtual(texture->mappedMemory); } -template -static void ExecuteCopyCommandList(const T& function) -{ - std::lock_guard lock(g_copyMutex); - - g_copyCommandList->begin(); - function(); - g_copyCommandList->end(); - g_copyQueue->executeCommandLists(g_copyCommandList.get(), g_copyCommandFence.get()); - g_copyQueue->waitForCommandFence(g_copyCommandFence.get()); -} - static void UnlockTextureRect(GuestTexture* texture) { + assert(GetCurrentThreadId() == g_mainThreadId); + + g_barriers.emplace_back(texture->texture.get(), RenderTextureLayout::COPY_DEST); + FlushBarriers(); + + uint32_t pitch = ComputeTexturePitch(texture); + uint32_t slicePitch = pitch * texture->height; + + auto allocation = g_uploadAllocators[g_frame].allocate(slicePitch, PLACEMENT_ALIGNMENT); + memcpy(allocation.memory, texture->mappedMemory, slicePitch); + + g_commandLists[g_frame]->copyTextureRegion( + RenderTextureCopyLocation::Subresource(texture->texture.get(), 0), + RenderTextureCopyLocation::PlacedFootprint(allocation.bufferReference.ref, texture->format, texture->width, texture->height, 1, pitch / RenderFormatSize(texture->format), allocation.bufferReference.offset)); + + texture->pendingBarrier = true; } static void* LockBuffer(GuestBuffer* buffer, uint32_t flags) @@ -861,6 +884,18 @@ static void* LockVertexBuffer(GuestBuffer* buffer, uint32_t, uint32_t, uint32_t return LockBuffer(buffer, flags); } +template +static void ExecuteCopyCommandList(const T& function) +{ + std::lock_guard lock(g_copyMutex); + + g_copyCommandList->begin(); + function(); + g_copyCommandList->end(); + g_copyQueue->executeCommandLists(g_copyCommandList.get(), g_copyCommandFence.get()); + g_copyQueue->waitForCommandFence(g_copyCommandFence.get()); +} + template static void UnlockBuffer(GuestBuffer* buffer) { @@ -1046,6 +1081,21 @@ static GuestTexture* CreateTexture(uint32_t width, uint32_t height, uint32_t dep viewDesc.format = desc.format; viewDesc.dimension = texture->type == ResourceType::VolumeTexture ? RenderTextureViewDimension::TEXTURE_3D : RenderTextureViewDimension::TEXTURE_2D; viewDesc.mipLevels = levels; + + switch (format) + { + case D3DFMT_D24FS8: + case D3DFMT_D24S8: + case D3DFMT_L8: + case D3DFMT_L8_2: + viewDesc.componentMapping = RenderComponentMapping(RenderSwizzle::R, RenderSwizzle::R, RenderSwizzle::R, RenderSwizzle::ONE); + break; + + case D3DFMT_X8R8G8B8: + viewDesc.componentMapping = RenderComponentMapping(RenderSwizzle::G, RenderSwizzle::B, RenderSwizzle::A, RenderSwizzle::ONE); + break; + } + texture->textureView = texture->texture->createTextureView(viewDesc); texture->width = width; @@ -1056,6 +1106,10 @@ static GuestTexture* CreateTexture(uint32_t width, uint32_t height, uint32_t dep g_textureDescriptorSet->setTexture(texture->descriptorIndex, texture->texture.get(), RenderTextureLayout::SHADER_READ, texture->textureView.get()); +#ifdef _DEBUG + texture->texture->setName(std::format("Texture {:X}", g_memory.MapVirtual(texture))); +#endif + return texture; } @@ -1118,6 +1172,10 @@ static GuestSurface* CreateSurface(uint32_t width, uint32_t height, uint32_t for g_textureDescriptorSet->setTexture(surface->descriptorIndex, surface->textureHolder.get(), RenderTextureLayout::SHADER_READ, surface->textureView.get()); } +#ifdef _DEBUG + surface->texture->setName(std::format("{} {:X}", desc.flags & RenderTextureFlag::RENDER_TARGET ? "Render Target" : "Depth Stencil", g_memory.MapVirtual(surface))); +#endif + return surface; } @@ -1588,10 +1646,10 @@ static void FlushRenderState(GuestDevice* device) descriptorIndex = g_samplerStates.size(); sampler = g_device->createSampler(samplerDesc); - g_samplerDescriptorSet->setSampler(descriptorIndex, sampler.get()); + g_samplerDescriptorSet->setSampler(descriptorIndex - 1, sampler.get()); } - SetDirtyValue(g_dirtyStates.sharedConstants, g_sharedConstants.samplerIndices[i], descriptorIndex); + SetDirtyValue(g_dirtyStates.sharedConstants, g_sharedConstants.samplerIndices[i], descriptorIndex - 1); } device->dirtyFlags[3] = device->dirtyFlags[3].get() & ~mask; @@ -2337,6 +2395,8 @@ static RenderFormat ConvertDXGIFormat(ddspp::DXGIFormat format) return RenderFormat::R8G8B8A8_SINT; case ddspp::B8G8R8A8_UNORM: return RenderFormat::B8G8R8A8_UNORM; + case ddspp::B8G8R8X8_UNORM: + return RenderFormat::B8G8R8A8_UNORM; case ddspp::R16G16_TYPELESS: return RenderFormat::R16G16_TYPELESS; case ddspp::R16G16_FLOAT: @@ -2567,7 +2627,8 @@ void IndexBufferLengthMidAsmHook(PPCRegister& r3) void SetShadowResolutionMidAsmHook(PPCRegister& r11) { - r11.u64 = 4096; + if (Config::ShadowResolution > 0) + r11.u64 = Config::ShadowResolution; } void Primitive2DHalfPixelOffsetMidAsmHook(PPCRegister& f13) @@ -2577,8 +2638,8 @@ void Primitive2DHalfPixelOffsetMidAsmHook(PPCRegister& f13) static void SetResolution(be* device) { - uint32_t width = g_swapChain->getWidth(); - uint32_t height = g_swapChain->getHeight(); + uint32_t width = uint32_t(g_swapChain->getWidth() * Config::ResolutionScale); + uint32_t height = uint32_t(g_swapChain->getHeight() * Config::ResolutionScale); device[46] = width == 0 ? 880 : width; device[47] = height == 0 ? 720 : height; } @@ -2588,6 +2649,50 @@ static uint32_t StubFunction() return 0; } +static GuestShader* g_movieVertexShader; +static GuestShader* g_moviePixelShader; +static GuestVertexDeclaration* g_movieVertexDeclaration; + +static void ScreenShaderInit(be* a1, uint32_t a2, uint32_t a3, GuestVertexElement* vertexElements) +{ + if (g_moviePixelShader == nullptr) + { + g_moviePixelShader = g_userHeap.AllocPhysical(ResourceType::PixelShader); + g_moviePixelShader->shader = CREATE_SHADER(movie_ps); + } + + if (g_movieVertexShader == nullptr) + { + g_movieVertexShader = g_userHeap.AllocPhysical(ResourceType::VertexShader); + g_movieVertexShader->shader = CREATE_SHADER(movie_vs); + } + + if (g_movieVertexDeclaration == nullptr) + g_movieVertexDeclaration = CreateVertexDeclaration(vertexElements); + + g_moviePixelShader->AddRef(); + g_movieVertexShader->AddRef(); + g_movieVertexDeclaration->AddRef(); + + a1[2] = g_memory.MapVirtual(g_moviePixelShader); + a1[3] = g_memory.MapVirtual(g_movieVertexShader); + a1[4] = g_memory.MapVirtual(g_movieVertexDeclaration); +} + +void MovieRendererMidAsmHook(PPCRegister& r3) +{ + auto device = reinterpret_cast(g_memory.Translate(r3.u32)); + + // Force linear filtering & clamp addressing + for (size_t i = 0; i < 3; i++) + { + device->samplerStates[i].data[0] = (device->samplerStates[i].data[0].get() & ~0x7fc00) | 0x24800; + device->samplerStates[i].data[3] = (device->samplerStates[i].data[3].get() & ~0x1f80000) | 0x1280000; + } + + device->dirtyFlags[3] = device->dirtyFlags[3].get() | 0xe0000000ull; +} + GUEST_FUNCTION_HOOK(sub_82BD99B0, CreateDevice); GUEST_FUNCTION_HOOK(sub_82BE6230, DestructResource); @@ -2653,6 +2758,8 @@ GUEST_FUNCTION_HOOK(sub_82E43FC8, MakePictureData); GUEST_FUNCTION_HOOK(sub_82E9EE38, SetResolution); GUEST_FUNCTION_HOOK(sub_82BE77B0, StubFunction); +GUEST_FUNCTION_HOOK(sub_82AE2BF8, ScreenShaderInit); + GUEST_FUNCTION_STUB(sub_822C15D8); GUEST_FUNCTION_STUB(sub_822C1810); GUEST_FUNCTION_STUB(sub_82BD97A8); diff --git a/UnleashedRecomp/misc_impl.cpp b/UnleashedRecomp/misc_impl.cpp index 9debae2..e9d391c 100644 --- a/UnleashedRecomp/misc_impl.cpp +++ b/UnleashedRecomp/misc_impl.cpp @@ -50,17 +50,3 @@ PPC_FUNC(sub_82BD4AE8) { sub_831B1630(ctx, base); } - -// Movie player stubs -GUEST_FUNCTION_STUB(sub_82AE3638); -GUEST_FUNCTION_STUB(sub_82AE2BF8); - -// Logo skip -PPC_FUNC(sub_82547DF0) -{ - ctx.r4.u64 = 0; - ctx.r5.u64 = 0; - ctx.r6.u64 = 1; - ctx.r7.u64 = 0; - sub_825517C8(ctx, base); -} diff --git a/UnleashedRecomp/res/config.toml b/UnleashedRecomp/res/config.toml index 55a30c4..2b65155 100644 --- a/UnleashedRecomp/res/config.toml +++ b/UnleashedRecomp/res/config.toml @@ -7,6 +7,7 @@ Hints = true # Determines whether to spawn hint rings and vo UnleashOutOfControlDrain = true # Determines whether to drain Dark Gaia energy whilst the player cannot move. WerehogHubTransformVideo = true # Determines whether to play the transition video for switching time of day in the hub areas. # Setting this to false will instead play a generic transition without artificial loading times. +LogoSkip = false # Determines whether the intro logos will be skipped on boot. [Controls] XButtonHoming = true # Determines whether to decouple the homing attack from the X button. @@ -35,4 +36,4 @@ AlphaToCoverage = true Fullscreen = false VSync = false BufferCount = 3 # Double buffering = 2; Triple buffering = 3. - # Triple buffering can increase FPS and improve frame pacing at the expense of higher input latency. \ No newline at end of file + # Triple buffering can increase FPS and improve frame pacing at the expense of higher input latency. diff --git a/UnleashedRecompLib/config/SWA.toml b/UnleashedRecompLib/config/SWA.toml index d56c799..d567d2c 100644 --- a/UnleashedRecompLib/config/SWA.toml +++ b/UnleashedRecompLib/config/SWA.toml @@ -197,4 +197,9 @@ registers = ["r5"] [[midasm_hook]] name = "PostUnleashMidAsmHook" address = 0x823C6788 -registers = ["r30"] \ No newline at end of file +registers = ["r30"] + +[[midasm_hook]] +name = "MovieRendererMidAsmHook" +address = 0x82AE3200 +registers = ["r3"]