From 42573170c6db1a0ecf7940b55bb35fc171777390 Mon Sep 17 00:00:00 2001 From: PixelyIon Date: Sun, 1 May 2022 18:06:55 +0530 Subject: [PATCH] Implement Framebuffer Cache Implements a cache for storing `VkFramebuffer` objects with a special path on devices with `VK_KHR_imageless_framebuffer` to allow for more cache hits due to an abstract image rather than a specific one. Caching framebuffers is a fairly crucial optimization due to the cost of creating framebuffers on TBDRs since it involves calculating tiling memory allocations and in the case of Adreno's proprietary driver involves several kernel calls for mapping and allocating the corresponding framebuffer memory. --- app/CMakeLists.txt | 1 + app/src/main/cpp/skyline/gpu.cpp | 6 +- app/src/main/cpp/skyline/gpu.h | 2 + .../skyline/gpu/cache/framebuffer_cache.cpp | 152 ++++++++++++++++++ .../cpp/skyline/gpu/cache/framebuffer_cache.h | 78 +++++++++ .../gpu/interconnect/command_executor.cpp | 6 +- .../gpu/interconnect/command_executor.h | 10 +- .../gpu/interconnect/command_nodes.cpp | 91 +++++++---- .../skyline/gpu/interconnect/command_nodes.h | 21 +-- .../gpu/interconnect/graphics_context.h | 7 +- .../cpp/skyline/gpu/presentation_engine.cpp | 2 +- .../main/cpp/skyline/gpu/texture/texture.cpp | 12 +- .../main/cpp/skyline/gpu/texture/texture.h | 4 +- .../main/cpp/skyline/gpu/trait_manager.cpp | 13 +- app/src/main/cpp/skyline/gpu/trait_manager.h | 4 +- 15 files changed, 336 insertions(+), 73 deletions(-) create mode 100644 app/src/main/cpp/skyline/gpu/cache/framebuffer_cache.cpp create mode 100644 app/src/main/cpp/skyline/gpu/cache/framebuffer_cache.h diff --git a/app/CMakeLists.txt b/app/CMakeLists.txt index 35ae069a..d45a1570 100644 --- a/app/CMakeLists.txt +++ b/app/CMakeLists.txt @@ -171,6 +171,7 @@ add_library(skyline SHARED ${source_DIR}/skyline/gpu/shader_manager.cpp ${source_DIR}/skyline/gpu/cache/graphics_pipeline_cache.cpp ${source_DIR}/skyline/gpu/cache/renderpass_cache.cpp + ${source_DIR}/skyline/gpu/cache/framebuffer_cache.cpp ${source_DIR}/skyline/gpu/interconnect/command_executor.cpp ${source_DIR}/skyline/gpu/interconnect/command_nodes.cpp ${source_DIR}/skyline/gpu/interconnect/conversion/quads.cpp diff --git a/app/src/main/cpp/skyline/gpu.cpp b/app/src/main/cpp/skyline/gpu.cpp index 574115b2..7b4364e7 100644 --- a/app/src/main/cpp/skyline/gpu.cpp +++ b/app/src/main/cpp/skyline/gpu.cpp @@ -140,7 +140,8 @@ namespace skyline::gpu { vk::PhysicalDeviceUniformBufferStandardLayoutFeatures, vk::PhysicalDeviceShaderDrawParametersFeatures, vk::PhysicalDeviceProvokingVertexFeaturesEXT, - vk::PhysicalDevicePrimitiveTopologyListRestartFeaturesEXT>()}; + vk::PhysicalDevicePrimitiveTopologyListRestartFeaturesEXT, + vk::PhysicalDeviceImagelessFramebufferFeatures>()}; decltype(deviceFeatures2) enabledFeatures2{}; // We only want to enable features we required due to potential overhead from unused features #define FEAT_REQ(structName, feature) \ @@ -279,5 +280,6 @@ namespace skyline::gpu { descriptor(*this), shader(state, *this), graphicsPipelineCache(*this), - renderPassCache(*this) {} + renderPassCache(*this), + framebufferCache(*this) {} } diff --git a/app/src/main/cpp/skyline/gpu.h b/app/src/main/cpp/skyline/gpu.h index dea96159..c446c8c0 100644 --- a/app/src/main/cpp/skyline/gpu.h +++ b/app/src/main/cpp/skyline/gpu.h @@ -13,6 +13,7 @@ #include "gpu/shader_manager.h" #include "gpu/cache/graphics_pipeline_cache.h" #include "gpu/cache/renderpass_cache.h" +#include "gpu/cache/framebuffer_cache.h" namespace skyline::gpu { static constexpr u32 VkApiVersion{VK_API_VERSION_1_1}; //!< The version of core Vulkan that we require @@ -49,6 +50,7 @@ namespace skyline::gpu { cache::GraphicsPipelineCache graphicsPipelineCache; cache::RenderPassCache renderPassCache; + cache::FramebufferCache framebufferCache; GPU(const DeviceState &state); }; diff --git a/app/src/main/cpp/skyline/gpu/cache/framebuffer_cache.cpp b/app/src/main/cpp/skyline/gpu/cache/framebuffer_cache.cpp new file mode 100644 index 00000000..2114986a --- /dev/null +++ b/app/src/main/cpp/skyline/gpu/cache/framebuffer_cache.cpp @@ -0,0 +1,152 @@ +// SPDX-License-Identifier: MPL-2.0 +// Copyright © 2022 Skyline Team and Contributors (https://github.com/skyline-emu/) + +#include +#include +#include "framebuffer_cache.h" + +namespace skyline::gpu::cache { + FramebufferCache::FramebufferCache(GPU &gpu) : gpu(gpu) {} + + FramebufferCache::FramebufferImagelessAttachment::FramebufferImagelessAttachment(const vk::FramebufferAttachmentImageInfo &info) : flags(info.flags), usage(info.usage), width(info.width), height(info.height), layers(info.layerCount), format(*info.pViewFormats) {} + + FramebufferCache::FramebufferCacheKey::FramebufferCacheKey(const FramebufferCreateInfo &createInfo) { + auto &info{createInfo.get()}; + flags = info.flags; + renderPass = info.renderPass; + width = info.width; + height = info.height; + layers = info.layers; + + if (createInfo.isLinked()) { + auto &attachmentInfo{createInfo.get()}; + std::vector imagelessAttachments; + imagelessAttachments.reserve(attachmentInfo.attachmentImageInfoCount); + for (const auto &image : span(attachmentInfo.pAttachmentImageInfos, attachmentInfo.attachmentImageInfoCount)) + imagelessAttachments.emplace_back(image); + attachments.emplace>(std::move(imagelessAttachments)); + } else { + std::vector imageViews; + imageViews.reserve(info.attachmentCount); + for (const auto &image : span(info.pAttachments, info.attachmentCount)) + imageViews.emplace_back(image); + attachments.emplace>(std::move(imageViews)); + } + } + + #define HASH(x) boost::hash_combine(hash, x) + + size_t FramebufferCache::FramebufferHash::operator()(const FramebufferCacheKey &key) const { + size_t hash{}; + + HASH(static_cast(key.flags)); + HASH(static_cast(key.renderPass)); + HASH(key.width); + HASH(key.height); + HASH(key.layers); + + std::visit(VariantVisitor{ + [&hash](const std::vector &attachments) { + HASH(attachments.size()); + for (const auto &attachment : attachments) { + HASH(static_cast(attachment.flags)); + HASH(static_cast(attachment.usage)); + HASH(attachment.width); + HASH(attachment.height); + HASH(attachment.layers); + HASH(attachment.format); + } + }, + [&hash](const std::vector &attachments) { + HASH(attachments.size()); + for (const auto &attachment : attachments) + HASH(static_cast(attachment)); + } + }, key.attachments); + + return hash; + } + + size_t FramebufferCache::FramebufferHash::operator()(const FramebufferCreateInfo &key) const { + size_t hash{}; + + auto &info{key.get()}; + + HASH(static_cast(info.flags)); + HASH(static_cast(info.renderPass)); + HASH(info.width); + HASH(info.height); + HASH(info.layers); + + if (info.flags & vk::FramebufferCreateFlagBits::eImageless) { + auto &attachmentInfo{key.get()}; + for (const vk::FramebufferAttachmentImageInfo &image : span(attachmentInfo.pAttachmentImageInfos, attachmentInfo.attachmentImageInfoCount)) { + HASH(static_cast(image.flags)); + HASH(static_cast(image.usage)); + HASH(image.width); + HASH(image.height); + HASH(image.layerCount); + HASH(*image.pViewFormats); + } + } else { + HASH(info.attachmentCount); + for (const auto &view : span(info.pAttachments, info.attachmentCount)) + HASH(static_cast(view)); + } + + return hash; + } + + #undef HASH + + bool FramebufferCache::FramebufferEqual::operator()(const FramebufferCacheKey &lhs, const FramebufferCacheKey &rhs) const { + return lhs == rhs; + } + + bool FramebufferCache::FramebufferEqual::operator()(const FramebufferCacheKey &lhs, const FramebufferCreateInfo &rhs) const { + #define RETF(condition) if (condition) { return false; } + + auto &rhsInfo{rhs.get()}; + + RETF(lhs.flags != rhsInfo.flags) + RETF(lhs.renderPass != rhsInfo.renderPass) + RETF(lhs.width != rhsInfo.width) + RETF(lhs.height != rhsInfo.height) + RETF(lhs.layers != rhsInfo.layers) + + if (lhs.flags & vk::FramebufferCreateFlagBits::eImageless) { + auto &lhsAttachments{std::get>(lhs.attachments)}; + auto &rhsAttachments{rhs.get()}; + + RETF(lhsAttachments.size() != rhsAttachments.attachmentImageInfoCount) + const vk::FramebufferAttachmentImageInfo *rhsAttachmentInfo{rhsAttachments.pAttachmentImageInfos}; + for (const auto &attachment : lhsAttachments) { + RETF(attachment.flags != rhsAttachmentInfo->flags) + RETF(attachment.usage != rhsAttachmentInfo->usage) + RETF(attachment.width != rhsAttachmentInfo->width) + RETF(attachment.height != rhsAttachmentInfo->height) + RETF(attachment.layers != rhsAttachmentInfo->layerCount) + RETF(attachment.format != *rhsAttachmentInfo->pViewFormats) + rhsAttachmentInfo++; + } + } else { + auto &lhsAttachments{std::get>(lhs.attachments)}; + span rhsAttachments{rhsInfo.pAttachments, rhsInfo.attachmentCount}; + RETF(!std::equal(lhsAttachments.begin(), lhsAttachments.end(), rhsAttachments.begin(), rhsAttachments.end())) + } + + #undef RETF + + return true; + } + + vk::Framebuffer FramebufferCache::GetFramebuffer(const FramebufferCreateInfo &createInfo) { + std::scoped_lock lock{mutex}; + auto it{framebufferCache.find(createInfo)}; + if (it != framebufferCache.end()) + return *it->second; + + auto entryIt{framebufferCache.try_emplace(FramebufferCacheKey{createInfo}, gpu.vkDevice, createInfo.get())}; + return *entryIt.first->second; + } +} diff --git a/app/src/main/cpp/skyline/gpu/cache/framebuffer_cache.h b/app/src/main/cpp/skyline/gpu/cache/framebuffer_cache.h new file mode 100644 index 00000000..da57b481 --- /dev/null +++ b/app/src/main/cpp/skyline/gpu/cache/framebuffer_cache.h @@ -0,0 +1,78 @@ +// SPDX-License-Identifier: MPL-2.0 +// Copyright © 2022 Skyline Team and Contributors (https://github.com/skyline-emu/) + +#pragma once + +#include "common.h" + +namespace skyline::gpu::cache { + using FramebufferCreateInfo = vk::StructureChain; + + /** + * @brief A cache for Vulkan framebuffers to avoid unnecessary recreation, optimized for both fixed image and imageless attachments + * @note It is generally expensive to create a framebuffer on TBDRs since it involves calculating tiling memory allocations and in the case of Adreno's proprietary driver involves several kernel calls for mapping and allocating the corresponding framebuffer memory + */ + class FramebufferCache { + private: + GPU &gpu; + std::mutex mutex; //!< Synchronizes access to the cache + + private: + /** + * @brief An equivalent to VkFramebufferAttachmentImageInfo with more suitable semantics for storage + * @url https://www.khronos.org/registry/vulkan/specs/1.3-extensions/man/html/VkFramebufferAttachmentImageInfo.html + */ + struct FramebufferImagelessAttachment { + vk::ImageCreateFlags flags; + vk::ImageUsageFlags usage; + u32 width; + u32 height; + u32 layers; + vk::Format format; + + FramebufferImagelessAttachment(const vk::FramebufferAttachmentImageInfo &info); + + bool operator==(const FramebufferImagelessAttachment &other) const = default; + }; + + struct FramebufferCacheKey { + vk::FramebufferCreateFlags flags; + vk::RenderPass renderPass; + u32 width; + u32 height; + u32 layers; + std::variant, std::vector> attachments; + + FramebufferCacheKey(const FramebufferCreateInfo &createInfo); + + bool operator==(const FramebufferCacheKey &other) const = default; + }; + + struct FramebufferHash { + using is_transparent = std::true_type; + + size_t operator()(const FramebufferCacheKey &key) const; + + size_t operator()(const FramebufferCreateInfo &key) const; + }; + + struct FramebufferEqual { + using is_transparent = std::true_type; + + bool operator()(const FramebufferCacheKey &lhs, const FramebufferCacheKey &rhs) const; + + bool operator()(const FramebufferCacheKey &lhs, const FramebufferCreateInfo &rhs) const; + }; + + std::unordered_map framebufferCache; + + public: + FramebufferCache(GPU &gpu); + + /** + * @note When using imageless framebuffer attachments, VkFramebufferAttachmentImageInfo **must** have a single view format + * @note When using image framebuffer attachments, it is expected that the supplied image handle will remain stable for the cache to function + */ + vk::Framebuffer GetFramebuffer(const FramebufferCreateInfo &createInfo); + }; +} diff --git a/app/src/main/cpp/skyline/gpu/interconnect/command_executor.cpp b/app/src/main/cpp/skyline/gpu/interconnect/command_executor.cpp index 571765a2..9fccbdfc 100644 --- a/app/src/main/cpp/skyline/gpu/interconnect/command_executor.cpp +++ b/app/src/main/cpp/skyline/gpu/interconnect/command_executor.cpp @@ -13,7 +13,7 @@ namespace skyline::gpu::interconnect { bool CommandExecutor::CreateRenderPassWithSubpass(vk::Rect2D renderArea, span inputAttachments, span colorAttachments, TextureView *depthStencilAttachment) { auto addSubpass{[&] { - renderPass->AddSubpass(inputAttachments, colorAttachments, depthStencilAttachment); + renderPass->AddSubpass(inputAttachments, colorAttachments, depthStencilAttachment, gpu); lastSubpassAttachments.clear(); auto insertAttachmentRange{[this](auto &attachments) -> std::pair { @@ -114,7 +114,7 @@ namespace skyline::gpu::interconnect { void CommandExecutor::AddClearColorSubpass(TextureView *attachment, const vk::ClearColorValue &value) { bool gotoNext{CreateRenderPassWithSubpass(vk::Rect2D{.extent = attachment->texture->dimensions}, {}, attachment, nullptr)}; - if (renderPass->ClearColorAttachment(0, value)) { + if (renderPass->ClearColorAttachment(0, value, gpu)) { if (gotoNext) nodes.emplace_back(std::in_place_type_t()); } else { @@ -139,7 +139,7 @@ namespace skyline::gpu::interconnect { void CommandExecutor::AddClearDepthStencilSubpass(TextureView *attachment, const vk::ClearDepthStencilValue &value) { bool gotoNext{CreateRenderPassWithSubpass(vk::Rect2D{.extent = attachment->texture->dimensions}, {}, {}, attachment)}; - if (renderPass->ClearDepthStencilAttachment(value)) { + if (renderPass->ClearDepthStencilAttachment(value, gpu)) { if (gotoNext) nodes.emplace_back(std::in_place_type_t()); } else { diff --git a/app/src/main/cpp/skyline/gpu/interconnect/command_executor.h b/app/src/main/cpp/skyline/gpu/interconnect/command_executor.h index 6d778c69..474bd4c7 100644 --- a/app/src/main/cpp/skyline/gpu/interconnect/command_executor.h +++ b/app/src/main/cpp/skyline/gpu/interconnect/command_executor.h @@ -19,15 +19,15 @@ namespace skyline::gpu::interconnect { boost::container::stable_vector nodes; node::RenderPassNode *renderPass{}; size_t subpassCount{}; //!< The number of subpasses in the current render pass - std::unordered_set attachedTextures; //!< All textures that need to be synced prior to and after execution + std::unordered_set attachedTextures; //!< All textures that need to be synced prior to and after execution using SharedBufferDelegate = std::shared_ptr; std::unordered_set attachedBuffers; //!< All buffers that are attached to the current execution - std::vector lastSubpassAttachments; //!< The storage backing for attachments used in the last subpass - span lastSubpassInputAttachments; //!< The set of input attachments used in the last subpass - span lastSubpassColorAttachments; //!< The set of color attachments used in the last subpass - TextureView* lastSubpassDepthStencilAttachment{}; //!< The depth stencil attachment used in the last subpass + std::vector lastSubpassAttachments; //!< The storage backing for attachments used in the last subpass + span lastSubpassInputAttachments; //!< The set of input attachments used in the last subpass + span lastSubpassColorAttachments; //!< The set of color attachments used in the last subpass + TextureView *lastSubpassDepthStencilAttachment{}; //!< The depth stencil attachment used in the last subpass /** * @brief Create a new render pass and subpass with the specified attachments, if one doesn't already exist or the current one isn't compatible diff --git a/app/src/main/cpp/skyline/gpu/interconnect/command_nodes.cpp b/app/src/main/cpp/skyline/gpu/interconnect/command_nodes.cpp index 52a3bf40..1ee64fef 100644 --- a/app/src/main/cpp/skyline/gpu/interconnect/command_nodes.cpp +++ b/app/src/main/cpp/skyline/gpu/interconnect/command_nodes.cpp @@ -16,20 +16,26 @@ namespace skyline::gpu::interconnect::node { .dstAccessMask = vk::AccessFlagBits::eMemoryRead | vk::AccessFlagBits::eMemoryWrite, } } - ), storage(std::make_shared()), renderArea(renderArea) {} + ), renderArea(renderArea) {} - RenderPassNode::Storage::~Storage() { - if (device) - if (framebuffer) - (**device).destroy(framebuffer, nullptr, *device->getDispatcher()); - } - - u32 RenderPassNode::AddAttachment(TextureView *view) { + u32 RenderPassNode::AddAttachment(TextureView *view, GPU &gpu) { auto vkView{view->GetView()}; auto attachment{std::find(attachments.begin(), attachments.end(), vkView)}; if (attachment == attachments.end()) { // If we cannot find any matches for the specified attachment, we add it as a new one attachments.push_back(vkView); + + if (gpu.traits.supportsImagelessFramebuffers) + attachmentInfo.push_back(vk::FramebufferAttachmentImageInfo{ + .flags = view->texture->flags, + .usage = view->texture->usage, + .width = view->texture->dimensions.width, + .height = view->texture->dimensions.height, + .layerCount = view->texture->layerCount, + .viewFormatCount = 1, + .pViewFormats = &view->format->vkFormat, + }); + attachmentDescriptions.push_back(vk::AttachmentDescription{ .format = *view->format, .initialLayout = view->texture->layout, @@ -109,13 +115,13 @@ namespace skyline::gpu::interconnect::node { } } - void RenderPassNode::AddSubpass(span inputAttachments, span colorAttachments, TextureView *depthStencilAttachment) { + void RenderPassNode::AddSubpass(span inputAttachments, span colorAttachments, TextureView *depthStencilAttachment, GPU& gpu) { attachmentReferences.reserve(attachmentReferences.size() + inputAttachments.size() + colorAttachments.size() + (depthStencilAttachment ? 1 : 0)); auto inputAttachmentsOffset{attachmentReferences.size() * sizeof(vk::AttachmentReference)}; for (auto &attachment : inputAttachments) { attachmentReferences.push_back(vk::AttachmentReference{ - .attachment = AddAttachment(attachment), + .attachment = AddAttachment(attachment, gpu), .layout = attachment->texture->layout, }); } @@ -123,7 +129,7 @@ namespace skyline::gpu::interconnect::node { auto colorAttachmentsOffset{attachmentReferences.size() * sizeof(vk::AttachmentReference)}; // Calculate new base offset as it has changed since we pushed the input attachments for (auto &attachment : colorAttachments) { attachmentReferences.push_back(vk::AttachmentReference{ - .attachment = AddAttachment(attachment), + .attachment = AddAttachment(attachment, gpu), .layout = attachment->texture->layout, }); } @@ -131,7 +137,7 @@ namespace skyline::gpu::interconnect::node { auto depthStencilAttachmentOffset{attachmentReferences.size() * sizeof(vk::AttachmentReference)}; if (depthStencilAttachment) { attachmentReferences.push_back(vk::AttachmentReference{ - .attachment = AddAttachment(depthStencilAttachment), + .attachment = AddAttachment(depthStencilAttachment, gpu), .layout = depthStencilAttachment->texture->layout, }); } @@ -149,7 +155,7 @@ namespace skyline::gpu::interconnect::node { }); } - bool RenderPassNode::ClearColorAttachment(u32 colorAttachment, const vk::ClearColorValue &value) { + bool RenderPassNode::ClearColorAttachment(u32 colorAttachment, const vk::ClearColorValue &value, GPU& gpu) { auto attachmentReference{RebasePointer(attachmentReferences, subpassDescriptions.back().pColorAttachments) + colorAttachment}; auto attachmentIndex{attachmentReference->attachment}; @@ -172,7 +178,7 @@ namespace skyline::gpu::interconnect::node { return false; } - bool RenderPassNode::ClearDepthStencilAttachment(const vk::ClearDepthStencilValue &value) { + bool RenderPassNode::ClearDepthStencilAttachment(const vk::ClearDepthStencilValue &value, GPU& gpu) { auto attachmentReference{RebasePointer(attachmentReferences, subpassDescriptions.back().pDepthStencilAttachment)}; auto attachmentIndex{attachmentReference->attachment}; @@ -196,8 +202,6 @@ namespace skyline::gpu::interconnect::node { } vk::RenderPass RenderPassNode::operator()(vk::raii::CommandBuffer &commandBuffer, const std::shared_ptr &cycle, GPU &gpu) { - storage->device = &gpu.vkDevice; - auto preserveAttachmentIt{preserveAttachmentReferences.begin()}; for (auto &subpassDescription : subpassDescriptions) { subpassDescription.pInputAttachments = RebasePointer(attachmentReferences, subpassDescription.pInputAttachments); @@ -223,25 +227,46 @@ namespace skyline::gpu::interconnect::node { .pDependencies = subpassDependencies.data(), })}; - auto framebuffer{(*gpu.vkDevice).createFramebuffer(vk::FramebufferCreateInfo{ - .renderPass = renderPass, - .attachmentCount = static_cast(attachments.size()), - .pAttachments = attachments.data(), - .width = renderArea.extent.width, - .height = renderArea.extent.height, - .layers = 1, - }, nullptr, *gpu.vkDevice.getDispatcher())}; - storage->framebuffer = framebuffer; + auto useImagelessFramebuffer{gpu.traits.supportsImagelessFramebuffers}; + cache::FramebufferCreateInfo framebufferCreateInfo{ + vk::FramebufferCreateInfo{ + .flags = useImagelessFramebuffer ? vk::FramebufferCreateFlagBits::eImageless : vk::FramebufferCreateFlags{}, + .renderPass = renderPass, + .attachmentCount = static_cast(attachments.size()), + .pAttachments = attachments.data(), + .width = renderArea.extent.width, + .height = renderArea.extent.height, + .layers = 1, + }, + vk::FramebufferAttachmentsCreateInfo{ + .attachmentImageInfoCount = static_cast(attachmentInfo.size()), + .pAttachmentImageInfos = attachmentInfo.data(), + } + }; - commandBuffer.beginRenderPass(vk::RenderPassBeginInfo{ - .renderPass = renderPass, - .framebuffer = framebuffer, - .renderArea = renderArea, - .clearValueCount = static_cast(clearValues.size()), - .pClearValues = clearValues.data(), - }, vk::SubpassContents::eInline); + if (!useImagelessFramebuffer) + framebufferCreateInfo.unlink(); - cycle->AttachObject(storage); + auto framebuffer{gpu.framebufferCache.GetFramebuffer(framebufferCreateInfo)}; + + vk::StructureChain renderPassBeginInfo{ + vk::RenderPassBeginInfo{ + .renderPass = renderPass, + .framebuffer = framebuffer, + .renderArea = renderArea, + .clearValueCount = static_cast(clearValues.size()), + .pClearValues = clearValues.data(), + }, + vk::RenderPassAttachmentBeginInfo{ + .attachmentCount = static_cast(attachments.size()), + .pAttachments = attachments.data(), + } + }; + + if (!useImagelessFramebuffer) + renderPassBeginInfo.unlink(); + + commandBuffer.beginRenderPass(renderPassBeginInfo.get(), vk::SubpassContents::eInline); return renderPass; } diff --git a/app/src/main/cpp/skyline/gpu/interconnect/command_nodes.h b/app/src/main/cpp/skyline/gpu/interconnect/command_nodes.h index feca8ba5..e74b235d 100644 --- a/app/src/main/cpp/skyline/gpu/interconnect/command_nodes.h +++ b/app/src/main/cpp/skyline/gpu/interconnect/command_nodes.h @@ -28,19 +28,8 @@ namespace skyline::gpu::interconnect::node { */ struct RenderPassNode { private: - /** - * @brief Storage for all resources in the VkRenderPass that have their lifetimes bond to the completion fence - */ - struct Storage : public FenceCycleDependency { - vk::raii::Device *device{}; - vk::Framebuffer framebuffer{}; - - ~Storage(); - }; - - std::shared_ptr storage; - std::vector attachments; + std::vector attachmentInfo; std::vector attachmentDescriptions; std::vector attachmentReferences; @@ -69,12 +58,12 @@ namespace skyline::gpu::interconnect::node { * @note Any preservation of attachments from previous subpasses is automatically handled by this * @return The index of the attachment in the render pass which can be utilized with VkAttachmentReference */ - u32 AddAttachment(TextureView *view); + u32 AddAttachment(TextureView *view, GPU& gpu); /** * @brief Creates a subpass with the attachments bound in the specified order */ - void AddSubpass(span inputAttachments, span colorAttachments, TextureView *depthStencilAttachment); + void AddSubpass(span inputAttachments, span colorAttachments, TextureView *depthStencilAttachment, GPU& gpu); /** * @brief Clears a color attachment in the current subpass with VK_ATTACHMENT_LOAD_OP_CLEAR @@ -82,14 +71,14 @@ namespace skyline::gpu::interconnect::node { * @return If the attachment could be cleared or not due to conflicts with other operations * @note We require a subpass to be attached during this as the clear will not take place unless it's referenced by a subpass */ - bool ClearColorAttachment(u32 colorAttachment, const vk::ClearColorValue &value); + bool ClearColorAttachment(u32 colorAttachment, const vk::ClearColorValue &value, GPU& gpu); /** * @brief Clears the depth/stencil attachment in the current subpass with VK_ATTACHMENT_LOAD_OP_CLEAR * @return If the attachment could be cleared or not due to conflicts with other operations * @note We require a subpass to be attached during this as the clear will not take place unless it's referenced by a subpass */ - bool ClearDepthStencilAttachment(const vk::ClearDepthStencilValue &value); + bool ClearDepthStencilAttachment(const vk::ClearDepthStencilValue &value, GPU& gpu); vk::RenderPass operator()(vk::raii::CommandBuffer &commandBuffer, const std::shared_ptr &cycle, GPU &gpu); }; diff --git a/app/src/main/cpp/skyline/gpu/interconnect/graphics_context.h b/app/src/main/cpp/skyline/gpu/interconnect/graphics_context.h index 3d6e0d50..3462ca83 100644 --- a/app/src/main/cpp/skyline/gpu/interconnect/graphics_context.h +++ b/app/src/main/cpp/skyline/gpu/interconnect/graphics_context.h @@ -77,9 +77,12 @@ namespace skyline::gpu::interconnect { constexpr texture::Dimensions NullImageDimensions{1, 1, 1}; constexpr vk::ImageLayout NullImageInitialLayout{vk::ImageLayout::eUndefined}; constexpr vk::ImageTiling NullImageTiling{vk::ImageTiling::eOptimal}; + constexpr vk::ImageCreateFlags NullImageFlags{}; + constexpr vk::ImageUsageFlags NullImageUsage{vk::ImageUsageFlagBits::eColorAttachment | vk::ImageUsageFlagBits::eSampled}; auto vkImage{gpu.memory.AllocateImage( { + .flags = NullImageFlags, .imageType = vk::ImageType::e2D, .format = NullImageFormat->vkFormat, .extent = NullImageDimensions, @@ -87,7 +90,7 @@ namespace skyline::gpu::interconnect { .arrayLayers = 1, .samples = vk::SampleCountFlagBits::e1, .tiling = NullImageTiling, - .usage = vk::ImageUsageFlagBits::eColorAttachment | vk::ImageUsageFlagBits::eSampled, + .usage = NullImageUsage, .sharingMode = vk::SharingMode::eExclusive, .queueFamilyIndexCount = 1, .pQueueFamilyIndices = &gpu.vkQueueFamilyIndex, @@ -95,7 +98,7 @@ namespace skyline::gpu::interconnect { } )}; - auto nullTexture{std::make_shared(gpu, std::move(vkImage), NullImageDimensions, NullImageFormat, NullImageInitialLayout, NullImageTiling)}; + auto nullTexture{std::make_shared(gpu, std::move(vkImage), NullImageDimensions, NullImageFormat, NullImageInitialLayout, NullImageTiling, NullImageFlags, NullImageUsage)}; nullTexture->TransitionLayout(vk::ImageLayout::eGeneral); nullTextureView = nullTexture->GetView(vk::ImageViewType::e2D, vk::ImageSubresourceRange{ .aspectMask = vk::ImageAspectFlagBits::eColor, diff --git a/app/src/main/cpp/skyline/gpu/presentation_engine.cpp b/app/src/main/cpp/skyline/gpu/presentation_engine.cpp index 7557e4bb..bcf8a7e4 100644 --- a/app/src/main/cpp/skyline/gpu/presentation_engine.cpp +++ b/app/src/main/cpp/skyline/gpu/presentation_engine.cpp @@ -160,7 +160,7 @@ namespace skyline::gpu { for (size_t index{}; index < vkImages.size(); index++) { auto &slot{images[index]}; - slot = std::make_shared(*state.gpu, vkImages[index], extent, format, vk::ImageLayout::eUndefined, vk::ImageTiling::eOptimal); + slot = std::make_shared(*state.gpu, vkImages[index], extent, format, vk::ImageLayout::eUndefined, vk::ImageTiling::eOptimal, vk::ImageCreateFlags{}, presentUsage); slot->TransitionLayout(vk::ImageLayout::ePresentSrcKHR); } for (size_t index{vkImages.size()}; index < MaxSwapchainImageCount; index++) diff --git a/app/src/main/cpp/skyline/gpu/texture/texture.cpp b/app/src/main/cpp/skyline/gpu/texture/texture.cpp index b4926bbd..5f8d73dd 100644 --- a/app/src/main/cpp/skyline/gpu/texture/texture.cpp +++ b/app/src/main/cpp/skyline/gpu/texture/texture.cpp @@ -277,13 +277,15 @@ namespace skyline::gpu { texture->CopyToGuest(stagingBuffer ? stagingBuffer->data() : std::get(texture->backing).data()); } - Texture::Texture(GPU &gpu, BackingType &&backing, texture::Dimensions dimensions, texture::Format format, vk::ImageLayout layout, vk::ImageTiling tiling, u32 mipLevels, u32 layerCount, vk::SampleCountFlagBits sampleCount) + Texture::Texture(GPU &gpu, BackingType &&backing, texture::Dimensions dimensions, texture::Format format, vk::ImageLayout layout, vk::ImageTiling tiling, vk::ImageCreateFlags flags, vk::ImageUsageFlags usage, u32 mipLevels, u32 layerCount, vk::SampleCountFlagBits sampleCount) : gpu(gpu), backing(std::move(backing)), dimensions(dimensions), format(format), layout(layout), tiling(tiling), + flags(flags), + usage(usage), mipLevels(mipLevels), layerCount(layerCount), sampleCount(sampleCount) {} @@ -297,8 +299,9 @@ namespace skyline::gpu { tiling(vk::ImageTiling::eOptimal), // Force Optimal due to not adhering to host subresource layout during Linear synchronization mipLevels(1), layerCount(guest->layerCount), - sampleCount(vk::SampleCountFlagBits::e1) { - vk::ImageUsageFlags usage{vk::ImageUsageFlagBits::eTransferSrc | vk::ImageUsageFlagBits::eTransferDst | vk::ImageUsageFlagBits::eSampled}; + sampleCount(vk::SampleCountFlagBits::e1), + flags(gpu.traits.quirks.vkImageMutableFormatCostly ? vk::ImageCreateFlags{} : vk::ImageCreateFlagBits::eMutableFormat), + usage(vk::ImageUsageFlagBits::eTransferSrc | vk::ImageUsageFlagBits::eTransferDst | vk::ImageUsageFlagBits::eSampled) { if ((format->vkAspect & vk::ImageAspectFlagBits::eColor) && !format->IsCompressed()) usage |= vk::ImageUsageFlagBits::eColorAttachment; if (format->vkAspect & (vk::ImageAspectFlagBits::eDepth | vk::ImageAspectFlagBits::eStencil)) @@ -319,14 +322,11 @@ namespace skyline::gpu { } } - vk::ImageCreateFlags flags{gpu.traits.quirks.vkImageMutableFormatCostly ? vk::ImageCreateFlags{} : vk::ImageCreateFlagBits::eMutableFormat}; - if (imageType == vk::ImageType::e2D && dimensions.width == dimensions.height && layerCount >= 6) flags |= vk::ImageCreateFlagBits::eCubeCompatible; else if (imageType == vk::ImageType::e3D) flags |= vk::ImageCreateFlagBits::e2DArrayCompatible; - vk::ImageCreateInfo imageCreateInfo{ .flags = flags, .imageType = imageType, diff --git a/app/src/main/cpp/skyline/gpu/texture/texture.h b/app/src/main/cpp/skyline/gpu/texture/texture.h index bc58d6e3..632c345a 100644 --- a/app/src/main/cpp/skyline/gpu/texture/texture.h +++ b/app/src/main/cpp/skyline/gpu/texture/texture.h @@ -413,6 +413,8 @@ namespace skyline::gpu { texture::Format format; vk::ImageLayout layout; vk::ImageTiling tiling; + vk::ImageCreateFlags flags; + vk::ImageUsageFlags usage; u32 mipLevels; u32 layerCount; //!< The amount of array layers in the image, utilized for efficient binding (Not to be confused with the depth or faces in a cubemap) vk::SampleCountFlagBits sampleCount; @@ -421,7 +423,7 @@ namespace skyline::gpu { * @brief Creates a texture object wrapping the supplied backing with the supplied attributes * @param layout The initial layout of the texture, it **must** be eUndefined or ePreinitialized */ - Texture(GPU &gpu, BackingType &&backing, texture::Dimensions dimensions, texture::Format format, vk::ImageLayout layout, vk::ImageTiling tiling, u32 mipLevels = 1, u32 layerCount = 1, vk::SampleCountFlagBits sampleCount = vk::SampleCountFlagBits::e1); + Texture(GPU &gpu, BackingType &&backing, texture::Dimensions dimensions, texture::Format format, vk::ImageLayout layout, vk::ImageTiling tiling, vk::ImageCreateFlags flags, vk::ImageUsageFlags usage, u32 mipLevels = 1, u32 layerCount = 1, vk::SampleCountFlagBits sampleCount = vk::SampleCountFlagBits::e1); /** * @brief Creates a texture object wrapping the guest texture with a backing that can represent the guest texture data diff --git a/app/src/main/cpp/skyline/gpu/trait_manager.cpp b/app/src/main/cpp/skyline/gpu/trait_manager.cpp index 91e6c7d1..55d644f3 100644 --- a/app/src/main/cpp/skyline/gpu/trait_manager.cpp +++ b/app/src/main/cpp/skyline/gpu/trait_manager.cpp @@ -6,7 +6,7 @@ namespace skyline::gpu { TraitManager::TraitManager(const DeviceFeatures2 &deviceFeatures2, DeviceFeatures2 &enabledFeatures2, const std::vector &deviceExtensions, std::vector> &enabledExtensions, const DeviceProperties2 &deviceProperties2) : quirks(deviceProperties2.get().properties, deviceProperties2.get()) { - bool hasCustomBorderColorExt{}, hasShaderAtomicInt64Ext{}, hasShaderFloat16Int8Ext{}, hasShaderDemoteToHelperExt{}, hasVertexAttributeDivisorExt{}, hasProvokingVertexExt{}, hasPrimitiveTopologyListRestartExt{}; + bool hasCustomBorderColorExt{}, hasShaderAtomicInt64Ext{}, hasShaderFloat16Int8Ext{}, hasShaderDemoteToHelperExt{}, hasVertexAttributeDivisorExt{}, hasProvokingVertexExt{}, hasPrimitiveTopologyListRestartExt{}, hasImagelessFramebuffersExt{}; bool supportsUniformBufferStandardLayout{}; // We require VK_KHR_uniform_buffer_standard_layout but assume it is implicitly supported even when not present for (auto &extension : deviceExtensions) { @@ -36,6 +36,7 @@ namespace skyline::gpu { EXT_SET("VK_EXT_provoking_vertex", hasProvokingVertexExt); EXT_SET("VK_EXT_vertex_attribute_divisor", hasVertexAttributeDivisorExt); EXT_SET("VK_KHR_push_descriptor", supportsPushDescriptors); + EXT_SET("VK_KHR_imageless_framebuffer", hasImagelessFramebuffersExt); EXT_SET("VK_EXT_global_priority", supportsGlobalPriority); EXT_SET("VK_EXT_shader_viewport_index_layer", supportsShaderViewportIndexLayer); EXT_SET("VK_KHR_spirv_1_4", supportsSpirv14); @@ -120,6 +121,12 @@ namespace skyline::gpu { enabledFeatures2.unlink(); } + if (hasImagelessFramebuffersExt) { + FEAT_SET(vk::PhysicalDeviceImagelessFramebufferFeatures, imagelessFramebuffer, supportsImagelessFramebuffers) + } else { + enabledFeatures2.unlink(); + } + #undef FEAT_SET if (supportsFloatControls) @@ -132,8 +139,8 @@ namespace skyline::gpu { std::string TraitManager::Summary() { return fmt::format( - "\n* Supports U8 Indices: {}\n* Supports Sampler Mirror Clamp To Edge: {}\n* Supports Sampler Reduction Mode: {}\n* Supports Custom Border Color (Without Format): {}\n* Supports Last Provoking Vertex: {}\n* Supports Logical Operations: {}\n* Supports Vertex Attribute Divisor: {}\n* Supports Vertex Attribute Zero Divisor: {}\n* Supports Push Descriptors: {}\n* Supports Global Priority: {}\n* Supports Multiple Viewports: {}\n* Supports Shader Viewport Index: {}\n* Supports SPIR-V 1.4: {}\n* Supports Shader Invocation Demotion: {}\n* Supports 16-bit FP: {}\n* Supports 8-bit Integers: {}\n* Supports 16-bit Integers: {}\n* Supports 64-bit Integers: {}\n* Supports Atomic 64-bit Integers: {}\n* Supports Floating Point Behavior Control: {}\n* Supports Image Read Without Format: {}\n* Supports List Primitive Topology Restart: {}\n* Supports Patch List Primitive Topology Restart: {}\n* Supports Subgroup Vote: {}\n* Subgroup Size: {}", - supportsUint8Indices, supportsSamplerMirrorClampToEdge, supportsSamplerReductionMode, supportsCustomBorderColor, supportsLastProvokingVertex, supportsLogicOp, supportsVertexAttributeDivisor, supportsVertexAttributeZeroDivisor, supportsPushDescriptors, supportsGlobalPriority, supportsMultipleViewports, supportsShaderViewportIndexLayer, supportsSpirv14, supportsShaderDemoteToHelper, supportsFloat16, supportsInt8, supportsInt16, supportsInt64, supportsAtomicInt64, supportsFloatControls, supportsImageReadWithoutFormat, supportsTopologyListRestart, supportsTopologyPatchListRestart, supportsSubgroupVote, subgroupSize + "\n* Supports U8 Indices: {}\n* Supports Sampler Mirror Clamp To Edge: {}\n* Supports Sampler Reduction Mode: {}\n* Supports Custom Border Color (Without Format): {}\n* Supports Last Provoking Vertex: {}\n* Supports Logical Operations: {}\n* Supports Vertex Attribute Divisor: {}\n* Supports Vertex Attribute Zero Divisor: {}\n* Supports Push Descriptors: {}\n* Supports Imageless Framebuffers: {}\n* Supports Global Priority: {}\n* Supports Multiple Viewports: {}\n* Supports Shader Viewport Index: {}\n* Supports SPIR-V 1.4: {}\n* Supports Shader Invocation Demotion: {}\n* Supports 16-bit FP: {}\n* Supports 8-bit Integers: {}\n* Supports 16-bit Integers: {}\n* Supports 64-bit Integers: {}\n* Supports Atomic 64-bit Integers: {}\n* Supports Floating Point Behavior Control: {}\n* Supports Image Read Without Format: {}\n* Supports List Primitive Topology Restart: {}\n* Supports Patch List Primitive Topology Restart: {}\n* Supports Subgroup Vote: {}\n* Subgroup Size: {}", + supportsUint8Indices, supportsSamplerMirrorClampToEdge, supportsSamplerReductionMode, supportsCustomBorderColor, supportsLastProvokingVertex, supportsLogicOp, supportsVertexAttributeDivisor, supportsVertexAttributeZeroDivisor, supportsPushDescriptors, supportsImagelessFramebuffers, supportsGlobalPriority, supportsMultipleViewports, supportsShaderViewportIndexLayer, supportsSpirv14, supportsShaderDemoteToHelper, supportsFloat16, supportsInt8, supportsInt16, supportsInt64, supportsAtomicInt64, supportsFloatControls, supportsImageReadWithoutFormat, supportsTopologyListRestart, supportsTopologyPatchListRestart, supportsSubgroupVote, subgroupSize ); } diff --git a/app/src/main/cpp/skyline/gpu/trait_manager.h b/app/src/main/cpp/skyline/gpu/trait_manager.h index f32d912a..3bbec62a 100644 --- a/app/src/main/cpp/skyline/gpu/trait_manager.h +++ b/app/src/main/cpp/skyline/gpu/trait_manager.h @@ -21,6 +21,7 @@ namespace skyline::gpu { bool supportsVertexAttributeDivisor{}; //!< If the device supports a divisor for instance-rate vertex attributes (with VK_EXT_vertex_attribute_divisor) bool supportsVertexAttributeZeroDivisor{}; //!< If the device supports a zero divisor for instance-rate vertex attributes (with VK_EXT_vertex_attribute_divisor) bool supportsPushDescriptors{}; //!< If the device supports push descriptors (with VK_KHR_push_descriptor) + bool supportsImagelessFramebuffers{}; //!< If the device supports imageless framebuffers (with VK_KHR_imageless_framebuffer) bool supportsGlobalPriority{}; //!< If the device supports global priorities for queues (with VK_EXT_global_priority) bool supportsMultipleViewports{}; //!< If the device supports more than one viewport bool supportsShaderViewportIndexLayer{}; //!< If the device supports retrieving the viewport index in shaders (with VK_EXT_shader_viewport_index_layer) @@ -79,7 +80,8 @@ namespace skyline::gpu { vk::PhysicalDeviceUniformBufferStandardLayoutFeatures, vk::PhysicalDeviceShaderDrawParametersFeatures, vk::PhysicalDeviceProvokingVertexFeaturesEXT, - vk::PhysicalDevicePrimitiveTopologyListRestartFeaturesEXT>; + vk::PhysicalDevicePrimitiveTopologyListRestartFeaturesEXT, + vk::PhysicalDeviceImagelessFramebufferFeatures>; TraitManager(const DeviceFeatures2 &deviceFeatures2, DeviceFeatures2 &enabledFeatures2, const std::vector &deviceExtensions, std::vector> &enabledExtensions, const DeviceProperties2 &deviceProperties2);