mirror of
https://github.com/skyline-emu/skyline.git
synced 2024-11-22 10:09:24 +01:00
Coalesce subpasses with compatible attachments together
We run into a lot of successive subpasses with the exact same framebuffer configuration which we now exploit to avoid the creation of a new subpass due to the overhead involved with this. This provides significant performance boosts in certain cases due to the magnitude of difference in the amount of subpasses being created while providing next to no benefit in other cases.
This commit is contained in:
parent
a947933bf0
commit
90c635bf78
@ -11,21 +11,64 @@ namespace skyline::gpu::interconnect {
|
||||
cycle->Cancel();
|
||||
}
|
||||
|
||||
bool CommandExecutor::CreateRenderPass(vk::Rect2D renderArea) {
|
||||
if (renderPass && (renderPass->renderArea != renderArea || subpassCount > gpu.traits.quirks.maxSubpassCount)) {
|
||||
bool CommandExecutor::CreateRenderPassWithSubpass(vk::Rect2D renderArea, span<TextureView *> inputAttachments, span<TextureView *> colorAttachments, TextureView *depthStencilAttachment) {
|
||||
auto addSubpass{[&] {
|
||||
renderPass->AddSubpass(inputAttachments, colorAttachments, depthStencilAttachment);
|
||||
|
||||
lastSubpassAttachments.clear();
|
||||
auto insertAttachmentRange{[this](auto &attachments) -> std::pair<size_t, size_t> {
|
||||
size_t beginIndex{lastSubpassAttachments.size()};
|
||||
lastSubpassAttachments.insert(lastSubpassAttachments.end(), attachments.begin(), attachments.end());
|
||||
return {beginIndex, attachments.size()};
|
||||
}};
|
||||
|
||||
auto rangeToSpan{[this](auto &range) -> span<TextureView *> {
|
||||
return {lastSubpassAttachments.data() + range.first, range.second};
|
||||
}};
|
||||
|
||||
auto inputAttachmentRange{insertAttachmentRange(inputAttachments)};
|
||||
auto colorAttachmentRange{insertAttachmentRange(colorAttachments)};
|
||||
|
||||
lastSubpassInputAttachments = rangeToSpan(inputAttachmentRange);
|
||||
lastSubpassColorAttachments = rangeToSpan(colorAttachmentRange);
|
||||
lastSubpassDepthStencilAttachment = depthStencilAttachment;
|
||||
}};
|
||||
|
||||
if (renderPass == nullptr || (renderPass && (renderPass->renderArea != renderArea || subpassCount > gpu.traits.quirks.maxSubpassCount))) {
|
||||
// We need to create a render pass if one doesn't already exist or the current one isn't compatible
|
||||
if (renderPass != nullptr)
|
||||
nodes.emplace_back(std::in_place_type_t<node::RenderPassEndNode>());
|
||||
renderPass = &std::get<node::RenderPassNode>(nodes.emplace_back(std::in_place_type_t<node::RenderPassNode>(), renderArea));
|
||||
addSubpass();
|
||||
subpassCount = 0;
|
||||
return false;
|
||||
} else {
|
||||
if (ranges::equal(lastSubpassInputAttachments, inputAttachments) &&
|
||||
ranges::equal(lastSubpassColorAttachments, colorAttachments) &&
|
||||
lastSubpassDepthStencilAttachment == depthStencilAttachment) {
|
||||
// The last subpass had the same attachments, so we can reuse them
|
||||
return false;
|
||||
} else {
|
||||
// The last subpass had different attachments, so we need to create a new one
|
||||
addSubpass();
|
||||
subpassCount++;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void CommandExecutor::FinishRenderPass() {
|
||||
if (renderPass) {
|
||||
nodes.emplace_back(std::in_place_type_t<node::RenderPassEndNode>());
|
||||
|
||||
renderPass = nullptr;
|
||||
subpassCount = 0;
|
||||
|
||||
lastSubpassAttachments.clear();
|
||||
lastSubpassInputAttachments = nullptr;
|
||||
lastSubpassColorAttachments = nullptr;
|
||||
lastSubpassDepthStencilAttachment = nullptr;
|
||||
}
|
||||
|
||||
bool newRenderPass{renderPass == nullptr};
|
||||
if (newRenderPass)
|
||||
// We need to create a render pass if one doesn't already exist or the current one isn't compatible
|
||||
renderPass = &std::get<node::RenderPassNode>(nodes.emplace_back(std::in_place_type_t<node::RenderPassNode>(), renderArea));
|
||||
else
|
||||
subpassCount++;
|
||||
|
||||
return newRenderPass;
|
||||
}
|
||||
|
||||
void CommandExecutor::AttachTexture(TextureView *view) {
|
||||
@ -52,39 +95,27 @@ namespace skyline::gpu::interconnect {
|
||||
}
|
||||
|
||||
void CommandExecutor::AddSubpass(std::function<void(vk::raii::CommandBuffer &, const std::shared_ptr<FenceCycle> &, GPU &, vk::RenderPass, u32)> &&function, vk::Rect2D renderArea, span<TextureView *> inputAttachments, span<TextureView *> colorAttachments, TextureView *depthStencilAttachment, bool exclusiveSubpass) {
|
||||
if (exclusiveSubpass && renderPass) {
|
||||
nodes.emplace_back(std::in_place_type_t<node::RenderPassEndNode>());
|
||||
renderPass = nullptr;
|
||||
subpassCount = 0;
|
||||
}
|
||||
if (exclusiveSubpass)
|
||||
FinishRenderPass();
|
||||
|
||||
bool newRenderPass{CreateRenderPass(renderArea)};
|
||||
renderPass->AddSubpass(inputAttachments, colorAttachments, depthStencilAttachment ? &*depthStencilAttachment : nullptr);
|
||||
if (newRenderPass)
|
||||
nodes.emplace_back(std::in_place_type_t<node::SubpassFunctionNode>(), std::forward<decltype(function)>(function));
|
||||
else
|
||||
bool gotoNext{CreateRenderPassWithSubpass(renderArea, inputAttachments, colorAttachments, depthStencilAttachment ? &*depthStencilAttachment : nullptr)};
|
||||
if (gotoNext)
|
||||
nodes.emplace_back(std::in_place_type_t<node::NextSubpassFunctionNode>(), std::forward<decltype(function)>(function));
|
||||
else
|
||||
nodes.emplace_back(std::in_place_type_t<node::SubpassFunctionNode>(), std::forward<decltype(function)>(function));
|
||||
}
|
||||
|
||||
void CommandExecutor::AddOutsideRpCommand(std::function<void(vk::raii::CommandBuffer &, const std::shared_ptr<FenceCycle> &, GPU &)> &&function) {
|
||||
if (renderPass) {
|
||||
// End render pass, if we're in one
|
||||
nodes.emplace_back(std::in_place_type_t<node::RenderPassEndNode>());
|
||||
renderPass = nullptr;
|
||||
subpassCount = 0;
|
||||
}
|
||||
if (renderPass)
|
||||
FinishRenderPass();
|
||||
|
||||
nodes.emplace_back(std::in_place_type_t<node::FunctionNode>(), std::forward<decltype(function)>(function));
|
||||
}
|
||||
|
||||
void CommandExecutor::AddClearColorSubpass(TextureView *attachment, const vk::ClearColorValue &value) {
|
||||
bool newRenderPass{CreateRenderPass(vk::Rect2D{
|
||||
.extent = attachment->texture->dimensions,
|
||||
})};
|
||||
renderPass->AddSubpass({}, attachment, nullptr);
|
||||
|
||||
bool gotoNext{CreateRenderPassWithSubpass(vk::Rect2D{.extent = attachment->texture->dimensions}, {}, attachment, nullptr)};
|
||||
if (renderPass->ClearColorAttachment(0, value)) {
|
||||
if (!newRenderPass)
|
||||
if (gotoNext)
|
||||
nodes.emplace_back(std::in_place_type_t<node::NextSubpassNode>());
|
||||
} else {
|
||||
auto function{[scissor = attachment->texture->dimensions, value](vk::raii::CommandBuffer &commandBuffer, const std::shared_ptr<FenceCycle> &, GPU &, vk::RenderPass, u32) {
|
||||
@ -99,21 +130,17 @@ namespace skyline::gpu::interconnect {
|
||||
});
|
||||
}};
|
||||
|
||||
if (newRenderPass)
|
||||
nodes.emplace_back(std::in_place_type_t<node::SubpassFunctionNode>(), function);
|
||||
else
|
||||
if (gotoNext)
|
||||
nodes.emplace_back(std::in_place_type_t<node::NextSubpassFunctionNode>(), function);
|
||||
else
|
||||
nodes.emplace_back(std::in_place_type_t<node::SubpassFunctionNode>(), function);
|
||||
}
|
||||
}
|
||||
|
||||
void CommandExecutor::AddClearDepthStencilSubpass(TextureView *attachment, const vk::ClearDepthStencilValue &value) {
|
||||
bool newRenderPass{CreateRenderPass(vk::Rect2D{
|
||||
.extent = attachment->texture->dimensions,
|
||||
})};
|
||||
renderPass->AddSubpass({}, {}, attachment);
|
||||
|
||||
bool gotoNext{CreateRenderPassWithSubpass(vk::Rect2D{.extent = attachment->texture->dimensions}, {}, {}, attachment)};
|
||||
if (renderPass->ClearDepthStencilAttachment(value)) {
|
||||
if (!newRenderPass)
|
||||
if (gotoNext)
|
||||
nodes.emplace_back(std::in_place_type_t<node::NextSubpassNode>());
|
||||
} else {
|
||||
auto function{[aspect = attachment->format->vkAspect, extent = attachment->texture->dimensions, value](vk::raii::CommandBuffer &commandBuffer, const std::shared_ptr<FenceCycle> &, GPU &, vk::RenderPass, u32) {
|
||||
@ -127,10 +154,10 @@ namespace skyline::gpu::interconnect {
|
||||
});
|
||||
}};
|
||||
|
||||
if (newRenderPass)
|
||||
nodes.emplace_back(std::in_place_type_t<node::SubpassFunctionNode>(), function);
|
||||
else
|
||||
if (gotoNext)
|
||||
nodes.emplace_back(std::in_place_type_t<node::NextSubpassFunctionNode>(), function);
|
||||
else
|
||||
nodes.emplace_back(std::in_place_type_t<node::SubpassFunctionNode>(), function);
|
||||
}
|
||||
}
|
||||
|
||||
@ -138,11 +165,8 @@ namespace skyline::gpu::interconnect {
|
||||
if (!nodes.empty()) {
|
||||
TRACE_EVENT("gpu", "CommandExecutor::Execute");
|
||||
|
||||
if (renderPass) {
|
||||
nodes.emplace_back(std::in_place_type_t<node::RenderPassEndNode>());
|
||||
renderPass = nullptr;
|
||||
subpassCount = 0;
|
||||
}
|
||||
if (renderPass)
|
||||
FinishRenderPass();
|
||||
|
||||
{
|
||||
auto &commandBuffer{*activeCommandBuffer};
|
||||
@ -155,7 +179,7 @@ namespace skyline::gpu::interconnect {
|
||||
texture->MarkGpuDirty();
|
||||
}
|
||||
|
||||
for (const auto& delegate : attachedBuffers)
|
||||
for (const auto &delegate : attachedBuffers)
|
||||
delegate->usageCallback = nullptr;
|
||||
|
||||
vk::RenderPass lRenderPass;
|
||||
@ -187,7 +211,7 @@ namespace skyline::gpu::interconnect {
|
||||
commandBuffer.end();
|
||||
gpu.scheduler.SubmitCommandBuffer(commandBuffer, activeCommandBuffer.GetFence());
|
||||
|
||||
for (const auto& delegate : attachedBuffers)
|
||||
for (const auto &delegate : attachedBuffers)
|
||||
delegate->buffer->InvalidateMegaBuffer();
|
||||
|
||||
nodes.clear();
|
||||
|
@ -24,10 +24,22 @@ namespace skyline::gpu::interconnect {
|
||||
using SharedBufferDelegate = std::shared_ptr<Buffer::BufferDelegate>;
|
||||
std::unordered_set<SharedBufferDelegate> attachedBuffers; //!< All buffers that are attached to the current execution
|
||||
|
||||
std::vector<TextureView*> lastSubpassAttachments; //!< The storage backing for attachments used in the last subpass
|
||||
span<TextureView*> lastSubpassInputAttachments; //!< The set of input attachments used in the last subpass
|
||||
span<TextureView*> lastSubpassColorAttachments; //!< The set of color attachments used in the last subpass
|
||||
TextureView* lastSubpassDepthStencilAttachment{}; //!< The depth stencil attachment used in the last subpass
|
||||
|
||||
/**
|
||||
* @return If a new render pass was created by the function or the current one was reused as it was compatible
|
||||
* @brief Create a new render pass and subpass with the specified attachments, if one doesn't already exist or the current one isn't compatible
|
||||
* @note This also checks for subpass coalescing and will merge the new subpass with the previous one when possible
|
||||
* @return If the next subpass must be started prior to issuing any commands
|
||||
*/
|
||||
bool CreateRenderPass(vk::Rect2D renderArea);
|
||||
bool CreateRenderPassWithSubpass(vk::Rect2D renderArea, span<TextureView *> inputAttachments, span<TextureView *> colorAttachments, TextureView *depthStencilAttachment);
|
||||
|
||||
/**
|
||||
* @brief Ends a render pass if one is currently active and resets all corresponding state
|
||||
*/
|
||||
void FinishRenderPass();
|
||||
|
||||
public:
|
||||
std::shared_ptr<FenceCycle> cycle; //!< The fence cycle that this command executor uses to wait for the GPU to finish executing commands
|
||||
|
Loading…
Reference in New Issue
Block a user