Implement Optimized LoadOp Clears for Depth/Stencil Attachments

Implements `AddClearDepthStencilSubpass` in `CommandExecutor` which is similar to `ClearColorAttachment` in that it uses `VK_ATTACHMENT_LOAD_OP_CLEAR` for the clear which is far more efficient than using `VK_ATTACHMENT_LOAD_OP_LOAD` then doing the clear.
This commit is contained in:
PixelyIon 2021-12-08 02:01:36 +05:30
parent 6f6413f02d
commit bf89f96bf5
4 changed files with 68 additions and 2 deletions

View File

@ -89,6 +89,36 @@ namespace skyline::gpu::interconnect {
}
}
void CommandExecutor::AddClearDepthStencilSubpass(TextureView *attachment, const vk::ClearDepthStencilValue &value) {
AttachTexture(attachment->texture);
bool newRenderPass{CreateRenderPass(vk::Rect2D{
.extent = attachment->texture->dimensions,
})};
renderPass->AddSubpass({}, {}, attachment);
if (renderPass->ClearDepthStencilAttachment(value)) {
if (!newRenderPass)
nodes.emplace_back(std::in_place_type_t<node::NextSubpassNode>());
} else {
auto function{[aspect = attachment->format->vkAspect, extent = attachment->texture->dimensions, value](vk::raii::CommandBuffer &commandBuffer, const std::shared_ptr<FenceCycle> &, GPU &, vk::RenderPass, u32) {
commandBuffer.clearAttachments(vk::ClearAttachment{
.aspectMask = aspect,
.clearValue = value,
}, vk::ClearRect{
.rect.extent = extent,
.baseArrayLayer = 0,
.layerCount = 1,
});
}};
if (newRenderPass)
nodes.emplace_back(std::in_place_type_t<node::SubpassFunctionNode>(), function);
else
nodes.emplace_back(std::in_place_type_t<node::NextSubpassFunctionNode>(), function);
}
}
void CommandExecutor::Execute() {
if (!nodes.empty()) {
TRACE_EVENT("gpu", "CommandExecutor::Execute");

View File

@ -52,11 +52,17 @@ namespace skyline::gpu::interconnect {
void AddSubpass(std::function<void(vk::raii::CommandBuffer &, const std::shared_ptr<FenceCycle> &, GPU &, vk::RenderPass, u32)> &&function, vk::Rect2D renderArea, span<TextureView *> inputAttachments = {}, span<TextureView *> colorAttachments = {}, TextureView *depthStencilAttachment = {});
/**
* @brief Adds a subpass that clears the entirety of the specified attachment with a value, it may utilize VK_ATTACHMENT_LOAD_OP_CLEAR for a more efficient clear when possible
* @brief Adds a subpass that clears the entirety of the specified attachment with a color value, it may utilize VK_ATTACHMENT_LOAD_OP_CLEAR for a more efficient clear when possible
* @note Any texture supplied to this **must** be locked by the calling thread, it should also undergo no persistent layout transitions till execution
*/
void AddClearColorSubpass(TextureView *attachment, const vk::ClearColorValue &value);
/**
* @brief Adds a subpass that clears the entirety of the specified attachment with a depth/stencil value, it may utilize VK_ATTACHMENT_LOAD_OP_CLEAR for a more efficient clear when possible
* @note Any texture supplied to this **must** be locked by the calling thread, it should also undergo no persistent layout transitions till execution
*/
void AddClearDepthStencilSubpass(TextureView *attachment, const vk::ClearDepthStencilValue &value);
/**
* @brief Execute all the nodes and submit the resulting command buffer to the GPU
*/

View File

@ -166,6 +166,29 @@ namespace skyline::gpu::interconnect::node {
return false;
}
bool RenderPassNode::ClearDepthStencilAttachment(const vk::ClearDepthStencilValue &value) {
auto attachmentReference{RebasePointer(attachmentReferences, subpassDescriptions.back().pDepthStencilAttachment)};
auto attachmentIndex{attachmentReference->attachment};
for (const auto &reference : attachmentReferences)
if (reference.attachment == attachmentIndex && &reference != attachmentReference)
return false;
auto &attachmentDescription{attachmentDescriptions.at(attachmentIndex)};
if (attachmentDescription.loadOp == vk::AttachmentLoadOp::eLoad) {
attachmentDescription.loadOp = vk::AttachmentLoadOp::eClear;
clearValues.resize(attachmentIndex + 1);
clearValues[attachmentIndex].depthStencil = value;
return true;
} else if (attachmentDescription.loadOp == vk::AttachmentLoadOp::eClear && clearValues[attachmentIndex].depthStencil == value) {
return true;
}
return false;
}
vk::RenderPass RenderPassNode::operator()(vk::raii::CommandBuffer &commandBuffer, const std::shared_ptr<FenceCycle> &cycle, GPU &gpu) {
storage->device = &gpu.vkDevice;

View File

@ -79,13 +79,20 @@ namespace skyline::gpu::interconnect::node {
void AddSubpass(span<TextureView *> inputAttachments, span<TextureView *> colorAttachments, TextureView *depthStencilAttachment);
/**
* @brief Clears a color attachment in the current subpass with VK_ATTACHMENT_LOAD_OP_LOAD
* @brief Clears a color attachment in the current subpass with VK_ATTACHMENT_LOAD_OP_CLEAR
* @param colorAttachment The index of the attachment in the attachments bound to the current subpass
* @return If the attachment could be cleared or not due to conflicts with other operations
* @note We require a subpass to be attached during this as the clear will not take place unless it's referenced by a subpass
*/
bool ClearColorAttachment(u32 colorAttachment, const vk::ClearColorValue &value);
/**
* @brief Clears the depth/stencil attachment in the current subpass with VK_ATTACHMENT_LOAD_OP_CLEAR
* @return If the attachment could be cleared or not due to conflicts with other operations
* @note We require a subpass to be attached during this as the clear will not take place unless it's referenced by a subpass
*/
bool ClearDepthStencilAttachment(const vk::ClearDepthStencilValue &value);
vk::RenderPass operator()(vk::raii::CommandBuffer &commandBuffer, const std::shared_ptr<FenceCycle> &cycle, GPU &gpu);
};