From 90c635bf783a82c28f0ed3dd0d7c23fadf425ddc Mon Sep 17 00:00:00 2001 From: PixelyIon Date: Wed, 27 Apr 2022 13:22:34 +0530 Subject: [PATCH] Coalesce subpasses with compatible attachments together We run into a lot of successive subpasses with the exact same framebuffer configuration which we now exploit to avoid the creation of a new subpass due to the overhead involved with this. This provides significant performance boosts in certain cases due to the magnitude of difference in the amount of subpasses being created while providing next to no benefit in other cases. --- .../gpu/interconnect/command_executor.cpp | 128 +++++++++++------- .../gpu/interconnect/command_executor.h | 16 ++- 2 files changed, 90 insertions(+), 54 deletions(-) diff --git a/app/src/main/cpp/skyline/gpu/interconnect/command_executor.cpp b/app/src/main/cpp/skyline/gpu/interconnect/command_executor.cpp index f1b0e9e2..571765a2 100644 --- a/app/src/main/cpp/skyline/gpu/interconnect/command_executor.cpp +++ b/app/src/main/cpp/skyline/gpu/interconnect/command_executor.cpp @@ -11,21 +11,64 @@ namespace skyline::gpu::interconnect { cycle->Cancel(); } - bool CommandExecutor::CreateRenderPass(vk::Rect2D renderArea) { - if (renderPass && (renderPass->renderArea != renderArea || subpassCount > gpu.traits.quirks.maxSubpassCount)) { + bool CommandExecutor::CreateRenderPassWithSubpass(vk::Rect2D renderArea, span inputAttachments, span colorAttachments, TextureView *depthStencilAttachment) { + auto addSubpass{[&] { + renderPass->AddSubpass(inputAttachments, colorAttachments, depthStencilAttachment); + + lastSubpassAttachments.clear(); + auto insertAttachmentRange{[this](auto &attachments) -> std::pair { + size_t beginIndex{lastSubpassAttachments.size()}; + lastSubpassAttachments.insert(lastSubpassAttachments.end(), attachments.begin(), attachments.end()); + return {beginIndex, attachments.size()}; + }}; + + auto rangeToSpan{[this](auto &range) -> span { + return {lastSubpassAttachments.data() + range.first, range.second}; + }}; + + auto inputAttachmentRange{insertAttachmentRange(inputAttachments)}; + auto colorAttachmentRange{insertAttachmentRange(colorAttachments)}; + + lastSubpassInputAttachments = rangeToSpan(inputAttachmentRange); + lastSubpassColorAttachments = rangeToSpan(colorAttachmentRange); + lastSubpassDepthStencilAttachment = depthStencilAttachment; + }}; + + if (renderPass == nullptr || (renderPass && (renderPass->renderArea != renderArea || subpassCount > gpu.traits.quirks.maxSubpassCount))) { + // We need to create a render pass if one doesn't already exist or the current one isn't compatible + if (renderPass != nullptr) + nodes.emplace_back(std::in_place_type_t()); + renderPass = &std::get(nodes.emplace_back(std::in_place_type_t(), renderArea)); + addSubpass(); + subpassCount = 0; + return false; + } else { + if (ranges::equal(lastSubpassInputAttachments, inputAttachments) && + ranges::equal(lastSubpassColorAttachments, colorAttachments) && + lastSubpassDepthStencilAttachment == depthStencilAttachment) { + // The last subpass had the same attachments, so we can reuse them + return false; + } else { + // The last subpass had different attachments, so we need to create a new one + addSubpass(); + subpassCount++; + return true; + } + } + } + + void CommandExecutor::FinishRenderPass() { + if (renderPass) { nodes.emplace_back(std::in_place_type_t()); + renderPass = nullptr; subpassCount = 0; + + lastSubpassAttachments.clear(); + lastSubpassInputAttachments = nullptr; + lastSubpassColorAttachments = nullptr; + lastSubpassDepthStencilAttachment = nullptr; } - - bool newRenderPass{renderPass == nullptr}; - if (newRenderPass) - // We need to create a render pass if one doesn't already exist or the current one isn't compatible - renderPass = &std::get(nodes.emplace_back(std::in_place_type_t(), renderArea)); - else - subpassCount++; - - return newRenderPass; } void CommandExecutor::AttachTexture(TextureView *view) { @@ -52,39 +95,27 @@ namespace skyline::gpu::interconnect { } void CommandExecutor::AddSubpass(std::function &, GPU &, vk::RenderPass, u32)> &&function, vk::Rect2D renderArea, span inputAttachments, span colorAttachments, TextureView *depthStencilAttachment, bool exclusiveSubpass) { - if (exclusiveSubpass && renderPass) { - nodes.emplace_back(std::in_place_type_t()); - renderPass = nullptr; - subpassCount = 0; - } + if (exclusiveSubpass) + FinishRenderPass(); - bool newRenderPass{CreateRenderPass(renderArea)}; - renderPass->AddSubpass(inputAttachments, colorAttachments, depthStencilAttachment ? &*depthStencilAttachment : nullptr); - if (newRenderPass) - nodes.emplace_back(std::in_place_type_t(), std::forward(function)); - else + bool gotoNext{CreateRenderPassWithSubpass(renderArea, inputAttachments, colorAttachments, depthStencilAttachment ? &*depthStencilAttachment : nullptr)}; + if (gotoNext) nodes.emplace_back(std::in_place_type_t(), std::forward(function)); + else + nodes.emplace_back(std::in_place_type_t(), std::forward(function)); } void CommandExecutor::AddOutsideRpCommand(std::function &, GPU &)> &&function) { - if (renderPass) { - // End render pass, if we're in one - nodes.emplace_back(std::in_place_type_t()); - renderPass = nullptr; - subpassCount = 0; - } + if (renderPass) + FinishRenderPass(); nodes.emplace_back(std::in_place_type_t(), std::forward(function)); } void CommandExecutor::AddClearColorSubpass(TextureView *attachment, const vk::ClearColorValue &value) { - bool newRenderPass{CreateRenderPass(vk::Rect2D{ - .extent = attachment->texture->dimensions, - })}; - renderPass->AddSubpass({}, attachment, nullptr); - + bool gotoNext{CreateRenderPassWithSubpass(vk::Rect2D{.extent = attachment->texture->dimensions}, {}, attachment, nullptr)}; if (renderPass->ClearColorAttachment(0, value)) { - if (!newRenderPass) + if (gotoNext) nodes.emplace_back(std::in_place_type_t()); } else { auto function{[scissor = attachment->texture->dimensions, value](vk::raii::CommandBuffer &commandBuffer, const std::shared_ptr &, GPU &, vk::RenderPass, u32) { @@ -99,21 +130,17 @@ namespace skyline::gpu::interconnect { }); }}; - if (newRenderPass) - nodes.emplace_back(std::in_place_type_t(), function); - else + if (gotoNext) nodes.emplace_back(std::in_place_type_t(), function); + else + nodes.emplace_back(std::in_place_type_t(), function); } } void CommandExecutor::AddClearDepthStencilSubpass(TextureView *attachment, const vk::ClearDepthStencilValue &value) { - bool newRenderPass{CreateRenderPass(vk::Rect2D{ - .extent = attachment->texture->dimensions, - })}; - renderPass->AddSubpass({}, {}, attachment); - + bool gotoNext{CreateRenderPassWithSubpass(vk::Rect2D{.extent = attachment->texture->dimensions}, {}, {}, attachment)}; if (renderPass->ClearDepthStencilAttachment(value)) { - if (!newRenderPass) + if (gotoNext) nodes.emplace_back(std::in_place_type_t()); } else { auto function{[aspect = attachment->format->vkAspect, extent = attachment->texture->dimensions, value](vk::raii::CommandBuffer &commandBuffer, const std::shared_ptr &, GPU &, vk::RenderPass, u32) { @@ -127,10 +154,10 @@ namespace skyline::gpu::interconnect { }); }}; - if (newRenderPass) - nodes.emplace_back(std::in_place_type_t(), function); - else + if (gotoNext) nodes.emplace_back(std::in_place_type_t(), function); + else + nodes.emplace_back(std::in_place_type_t(), function); } } @@ -138,11 +165,8 @@ namespace skyline::gpu::interconnect { if (!nodes.empty()) { TRACE_EVENT("gpu", "CommandExecutor::Execute"); - if (renderPass) { - nodes.emplace_back(std::in_place_type_t()); - renderPass = nullptr; - subpassCount = 0; - } + if (renderPass) + FinishRenderPass(); { auto &commandBuffer{*activeCommandBuffer}; @@ -155,7 +179,7 @@ namespace skyline::gpu::interconnect { texture->MarkGpuDirty(); } - for (const auto& delegate : attachedBuffers) + for (const auto &delegate : attachedBuffers) delegate->usageCallback = nullptr; vk::RenderPass lRenderPass; @@ -187,7 +211,7 @@ namespace skyline::gpu::interconnect { commandBuffer.end(); gpu.scheduler.SubmitCommandBuffer(commandBuffer, activeCommandBuffer.GetFence()); - for (const auto& delegate : attachedBuffers) + for (const auto &delegate : attachedBuffers) delegate->buffer->InvalidateMegaBuffer(); nodes.clear(); diff --git a/app/src/main/cpp/skyline/gpu/interconnect/command_executor.h b/app/src/main/cpp/skyline/gpu/interconnect/command_executor.h index 4a8245f7..6d778c69 100644 --- a/app/src/main/cpp/skyline/gpu/interconnect/command_executor.h +++ b/app/src/main/cpp/skyline/gpu/interconnect/command_executor.h @@ -24,10 +24,22 @@ namespace skyline::gpu::interconnect { using SharedBufferDelegate = std::shared_ptr; std::unordered_set attachedBuffers; //!< All buffers that are attached to the current execution + std::vector lastSubpassAttachments; //!< The storage backing for attachments used in the last subpass + span lastSubpassInputAttachments; //!< The set of input attachments used in the last subpass + span lastSubpassColorAttachments; //!< The set of color attachments used in the last subpass + TextureView* lastSubpassDepthStencilAttachment{}; //!< The depth stencil attachment used in the last subpass + /** - * @return If a new render pass was created by the function or the current one was reused as it was compatible + * @brief Create a new render pass and subpass with the specified attachments, if one doesn't already exist or the current one isn't compatible + * @note This also checks for subpass coalescing and will merge the new subpass with the previous one when possible + * @return If the next subpass must be started prior to issuing any commands */ - bool CreateRenderPass(vk::Rect2D renderArea); + bool CreateRenderPassWithSubpass(vk::Rect2D renderArea, span inputAttachments, span colorAttachments, TextureView *depthStencilAttachment); + + /** + * @brief Ends a render pass if one is currently active and resets all corresponding state + */ + void FinishRenderPass(); public: std::shared_ptr cycle; //!< The fence cycle that this command executor uses to wait for the GPU to finish executing commands