diff --git a/app/CMakeLists.txt b/app/CMakeLists.txt index 35ae069a..d45a1570 100644 --- a/app/CMakeLists.txt +++ b/app/CMakeLists.txt @@ -171,6 +171,7 @@ add_library(skyline SHARED ${source_DIR}/skyline/gpu/shader_manager.cpp ${source_DIR}/skyline/gpu/cache/graphics_pipeline_cache.cpp ${source_DIR}/skyline/gpu/cache/renderpass_cache.cpp + ${source_DIR}/skyline/gpu/cache/framebuffer_cache.cpp ${source_DIR}/skyline/gpu/interconnect/command_executor.cpp ${source_DIR}/skyline/gpu/interconnect/command_nodes.cpp ${source_DIR}/skyline/gpu/interconnect/conversion/quads.cpp diff --git a/app/src/main/cpp/skyline/gpu.cpp b/app/src/main/cpp/skyline/gpu.cpp index 574115b2..7b4364e7 100644 --- a/app/src/main/cpp/skyline/gpu.cpp +++ b/app/src/main/cpp/skyline/gpu.cpp @@ -140,7 +140,8 @@ namespace skyline::gpu { vk::PhysicalDeviceUniformBufferStandardLayoutFeatures, vk::PhysicalDeviceShaderDrawParametersFeatures, vk::PhysicalDeviceProvokingVertexFeaturesEXT, - vk::PhysicalDevicePrimitiveTopologyListRestartFeaturesEXT>()}; + vk::PhysicalDevicePrimitiveTopologyListRestartFeaturesEXT, + vk::PhysicalDeviceImagelessFramebufferFeatures>()}; decltype(deviceFeatures2) enabledFeatures2{}; // We only want to enable features we required due to potential overhead from unused features #define FEAT_REQ(structName, feature) \ @@ -279,5 +280,6 @@ namespace skyline::gpu { descriptor(*this), shader(state, *this), graphicsPipelineCache(*this), - renderPassCache(*this) {} + renderPassCache(*this), + framebufferCache(*this) {} } diff --git a/app/src/main/cpp/skyline/gpu.h b/app/src/main/cpp/skyline/gpu.h index dea96159..c446c8c0 100644 --- a/app/src/main/cpp/skyline/gpu.h +++ b/app/src/main/cpp/skyline/gpu.h @@ -13,6 +13,7 @@ #include "gpu/shader_manager.h" #include "gpu/cache/graphics_pipeline_cache.h" #include "gpu/cache/renderpass_cache.h" +#include "gpu/cache/framebuffer_cache.h" namespace skyline::gpu { static constexpr u32 VkApiVersion{VK_API_VERSION_1_1}; //!< The version of core Vulkan that we require @@ -49,6 +50,7 @@ namespace skyline::gpu { cache::GraphicsPipelineCache graphicsPipelineCache; cache::RenderPassCache renderPassCache; + cache::FramebufferCache framebufferCache; GPU(const DeviceState &state); }; diff --git a/app/src/main/cpp/skyline/gpu/cache/framebuffer_cache.cpp b/app/src/main/cpp/skyline/gpu/cache/framebuffer_cache.cpp new file mode 100644 index 00000000..2114986a --- /dev/null +++ b/app/src/main/cpp/skyline/gpu/cache/framebuffer_cache.cpp @@ -0,0 +1,152 @@ +// SPDX-License-Identifier: MPL-2.0 +// Copyright © 2022 Skyline Team and Contributors (https://github.com/skyline-emu/) + +#include +#include +#include "framebuffer_cache.h" + +namespace skyline::gpu::cache { + FramebufferCache::FramebufferCache(GPU &gpu) : gpu(gpu) {} + + FramebufferCache::FramebufferImagelessAttachment::FramebufferImagelessAttachment(const vk::FramebufferAttachmentImageInfo &info) : flags(info.flags), usage(info.usage), width(info.width), height(info.height), layers(info.layerCount), format(*info.pViewFormats) {} + + FramebufferCache::FramebufferCacheKey::FramebufferCacheKey(const FramebufferCreateInfo &createInfo) { + auto &info{createInfo.get()}; + flags = info.flags; + renderPass = info.renderPass; + width = info.width; + height = info.height; + layers = info.layers; + + if (createInfo.isLinked()) { + auto &attachmentInfo{createInfo.get()}; + std::vector imagelessAttachments; + imagelessAttachments.reserve(attachmentInfo.attachmentImageInfoCount); + for (const auto &image : span(attachmentInfo.pAttachmentImageInfos, attachmentInfo.attachmentImageInfoCount)) + imagelessAttachments.emplace_back(image); + attachments.emplace>(std::move(imagelessAttachments)); + } else { + std::vector imageViews; + imageViews.reserve(info.attachmentCount); + for (const auto &image : span(info.pAttachments, info.attachmentCount)) + imageViews.emplace_back(image); + attachments.emplace>(std::move(imageViews)); + } + } + + #define HASH(x) boost::hash_combine(hash, x) + + size_t FramebufferCache::FramebufferHash::operator()(const FramebufferCacheKey &key) const { + size_t hash{}; + + HASH(static_cast(key.flags)); + HASH(static_cast(key.renderPass)); + HASH(key.width); + HASH(key.height); + HASH(key.layers); + + std::visit(VariantVisitor{ + [&hash](const std::vector &attachments) { + HASH(attachments.size()); + for (const auto &attachment : attachments) { + HASH(static_cast(attachment.flags)); + HASH(static_cast(attachment.usage)); + HASH(attachment.width); + HASH(attachment.height); + HASH(attachment.layers); + HASH(attachment.format); + } + }, + [&hash](const std::vector &attachments) { + HASH(attachments.size()); + for (const auto &attachment : attachments) + HASH(static_cast(attachment)); + } + }, key.attachments); + + return hash; + } + + size_t FramebufferCache::FramebufferHash::operator()(const FramebufferCreateInfo &key) const { + size_t hash{}; + + auto &info{key.get()}; + + HASH(static_cast(info.flags)); + HASH(static_cast(info.renderPass)); + HASH(info.width); + HASH(info.height); + HASH(info.layers); + + if (info.flags & vk::FramebufferCreateFlagBits::eImageless) { + auto &attachmentInfo{key.get()}; + for (const vk::FramebufferAttachmentImageInfo &image : span(attachmentInfo.pAttachmentImageInfos, attachmentInfo.attachmentImageInfoCount)) { + HASH(static_cast(image.flags)); + HASH(static_cast(image.usage)); + HASH(image.width); + HASH(image.height); + HASH(image.layerCount); + HASH(*image.pViewFormats); + } + } else { + HASH(info.attachmentCount); + for (const auto &view : span(info.pAttachments, info.attachmentCount)) + HASH(static_cast(view)); + } + + return hash; + } + + #undef HASH + + bool FramebufferCache::FramebufferEqual::operator()(const FramebufferCacheKey &lhs, const FramebufferCacheKey &rhs) const { + return lhs == rhs; + } + + bool FramebufferCache::FramebufferEqual::operator()(const FramebufferCacheKey &lhs, const FramebufferCreateInfo &rhs) const { + #define RETF(condition) if (condition) { return false; } + + auto &rhsInfo{rhs.get()}; + + RETF(lhs.flags != rhsInfo.flags) + RETF(lhs.renderPass != rhsInfo.renderPass) + RETF(lhs.width != rhsInfo.width) + RETF(lhs.height != rhsInfo.height) + RETF(lhs.layers != rhsInfo.layers) + + if (lhs.flags & vk::FramebufferCreateFlagBits::eImageless) { + auto &lhsAttachments{std::get>(lhs.attachments)}; + auto &rhsAttachments{rhs.get()}; + + RETF(lhsAttachments.size() != rhsAttachments.attachmentImageInfoCount) + const vk::FramebufferAttachmentImageInfo *rhsAttachmentInfo{rhsAttachments.pAttachmentImageInfos}; + for (const auto &attachment : lhsAttachments) { + RETF(attachment.flags != rhsAttachmentInfo->flags) + RETF(attachment.usage != rhsAttachmentInfo->usage) + RETF(attachment.width != rhsAttachmentInfo->width) + RETF(attachment.height != rhsAttachmentInfo->height) + RETF(attachment.layers != rhsAttachmentInfo->layerCount) + RETF(attachment.format != *rhsAttachmentInfo->pViewFormats) + rhsAttachmentInfo++; + } + } else { + auto &lhsAttachments{std::get>(lhs.attachments)}; + span rhsAttachments{rhsInfo.pAttachments, rhsInfo.attachmentCount}; + RETF(!std::equal(lhsAttachments.begin(), lhsAttachments.end(), rhsAttachments.begin(), rhsAttachments.end())) + } + + #undef RETF + + return true; + } + + vk::Framebuffer FramebufferCache::GetFramebuffer(const FramebufferCreateInfo &createInfo) { + std::scoped_lock lock{mutex}; + auto it{framebufferCache.find(createInfo)}; + if (it != framebufferCache.end()) + return *it->second; + + auto entryIt{framebufferCache.try_emplace(FramebufferCacheKey{createInfo}, gpu.vkDevice, createInfo.get())}; + return *entryIt.first->second; + } +} diff --git a/app/src/main/cpp/skyline/gpu/cache/framebuffer_cache.h b/app/src/main/cpp/skyline/gpu/cache/framebuffer_cache.h new file mode 100644 index 00000000..da57b481 --- /dev/null +++ b/app/src/main/cpp/skyline/gpu/cache/framebuffer_cache.h @@ -0,0 +1,78 @@ +// SPDX-License-Identifier: MPL-2.0 +// Copyright © 2022 Skyline Team and Contributors (https://github.com/skyline-emu/) + +#pragma once + +#include "common.h" + +namespace skyline::gpu::cache { + using FramebufferCreateInfo = vk::StructureChain; + + /** + * @brief A cache for Vulkan framebuffers to avoid unnecessary recreation, optimized for both fixed image and imageless attachments + * @note It is generally expensive to create a framebuffer on TBDRs since it involves calculating tiling memory allocations and in the case of Adreno's proprietary driver involves several kernel calls for mapping and allocating the corresponding framebuffer memory + */ + class FramebufferCache { + private: + GPU &gpu; + std::mutex mutex; //!< Synchronizes access to the cache + + private: + /** + * @brief An equivalent to VkFramebufferAttachmentImageInfo with more suitable semantics for storage + * @url https://www.khronos.org/registry/vulkan/specs/1.3-extensions/man/html/VkFramebufferAttachmentImageInfo.html + */ + struct FramebufferImagelessAttachment { + vk::ImageCreateFlags flags; + vk::ImageUsageFlags usage; + u32 width; + u32 height; + u32 layers; + vk::Format format; + + FramebufferImagelessAttachment(const vk::FramebufferAttachmentImageInfo &info); + + bool operator==(const FramebufferImagelessAttachment &other) const = default; + }; + + struct FramebufferCacheKey { + vk::FramebufferCreateFlags flags; + vk::RenderPass renderPass; + u32 width; + u32 height; + u32 layers; + std::variant, std::vector> attachments; + + FramebufferCacheKey(const FramebufferCreateInfo &createInfo); + + bool operator==(const FramebufferCacheKey &other) const = default; + }; + + struct FramebufferHash { + using is_transparent = std::true_type; + + size_t operator()(const FramebufferCacheKey &key) const; + + size_t operator()(const FramebufferCreateInfo &key) const; + }; + + struct FramebufferEqual { + using is_transparent = std::true_type; + + bool operator()(const FramebufferCacheKey &lhs, const FramebufferCacheKey &rhs) const; + + bool operator()(const FramebufferCacheKey &lhs, const FramebufferCreateInfo &rhs) const; + }; + + std::unordered_map framebufferCache; + + public: + FramebufferCache(GPU &gpu); + + /** + * @note When using imageless framebuffer attachments, VkFramebufferAttachmentImageInfo **must** have a single view format + * @note When using image framebuffer attachments, it is expected that the supplied image handle will remain stable for the cache to function + */ + vk::Framebuffer GetFramebuffer(const FramebufferCreateInfo &createInfo); + }; +} diff --git a/app/src/main/cpp/skyline/gpu/interconnect/command_executor.cpp b/app/src/main/cpp/skyline/gpu/interconnect/command_executor.cpp index 571765a2..9fccbdfc 100644 --- a/app/src/main/cpp/skyline/gpu/interconnect/command_executor.cpp +++ b/app/src/main/cpp/skyline/gpu/interconnect/command_executor.cpp @@ -13,7 +13,7 @@ namespace skyline::gpu::interconnect { bool CommandExecutor::CreateRenderPassWithSubpass(vk::Rect2D renderArea, span inputAttachments, span colorAttachments, TextureView *depthStencilAttachment) { auto addSubpass{[&] { - renderPass->AddSubpass(inputAttachments, colorAttachments, depthStencilAttachment); + renderPass->AddSubpass(inputAttachments, colorAttachments, depthStencilAttachment, gpu); lastSubpassAttachments.clear(); auto insertAttachmentRange{[this](auto &attachments) -> std::pair { @@ -114,7 +114,7 @@ namespace skyline::gpu::interconnect { void CommandExecutor::AddClearColorSubpass(TextureView *attachment, const vk::ClearColorValue &value) { bool gotoNext{CreateRenderPassWithSubpass(vk::Rect2D{.extent = attachment->texture->dimensions}, {}, attachment, nullptr)}; - if (renderPass->ClearColorAttachment(0, value)) { + if (renderPass->ClearColorAttachment(0, value, gpu)) { if (gotoNext) nodes.emplace_back(std::in_place_type_t()); } else { @@ -139,7 +139,7 @@ namespace skyline::gpu::interconnect { void CommandExecutor::AddClearDepthStencilSubpass(TextureView *attachment, const vk::ClearDepthStencilValue &value) { bool gotoNext{CreateRenderPassWithSubpass(vk::Rect2D{.extent = attachment->texture->dimensions}, {}, {}, attachment)}; - if (renderPass->ClearDepthStencilAttachment(value)) { + if (renderPass->ClearDepthStencilAttachment(value, gpu)) { if (gotoNext) nodes.emplace_back(std::in_place_type_t()); } else { diff --git a/app/src/main/cpp/skyline/gpu/interconnect/command_executor.h b/app/src/main/cpp/skyline/gpu/interconnect/command_executor.h index 6d778c69..474bd4c7 100644 --- a/app/src/main/cpp/skyline/gpu/interconnect/command_executor.h +++ b/app/src/main/cpp/skyline/gpu/interconnect/command_executor.h @@ -19,15 +19,15 @@ namespace skyline::gpu::interconnect { boost::container::stable_vector nodes; node::RenderPassNode *renderPass{}; size_t subpassCount{}; //!< The number of subpasses in the current render pass - std::unordered_set attachedTextures; //!< All textures that need to be synced prior to and after execution + std::unordered_set attachedTextures; //!< All textures that need to be synced prior to and after execution using SharedBufferDelegate = std::shared_ptr; std::unordered_set attachedBuffers; //!< All buffers that are attached to the current execution - std::vector lastSubpassAttachments; //!< The storage backing for attachments used in the last subpass - span lastSubpassInputAttachments; //!< The set of input attachments used in the last subpass - span lastSubpassColorAttachments; //!< The set of color attachments used in the last subpass - TextureView* lastSubpassDepthStencilAttachment{}; //!< The depth stencil attachment used in the last subpass + std::vector lastSubpassAttachments; //!< The storage backing for attachments used in the last subpass + span lastSubpassInputAttachments; //!< The set of input attachments used in the last subpass + span lastSubpassColorAttachments; //!< The set of color attachments used in the last subpass + TextureView *lastSubpassDepthStencilAttachment{}; //!< The depth stencil attachment used in the last subpass /** * @brief Create a new render pass and subpass with the specified attachments, if one doesn't already exist or the current one isn't compatible diff --git a/app/src/main/cpp/skyline/gpu/interconnect/command_nodes.cpp b/app/src/main/cpp/skyline/gpu/interconnect/command_nodes.cpp index 52a3bf40..1ee64fef 100644 --- a/app/src/main/cpp/skyline/gpu/interconnect/command_nodes.cpp +++ b/app/src/main/cpp/skyline/gpu/interconnect/command_nodes.cpp @@ -16,20 +16,26 @@ namespace skyline::gpu::interconnect::node { .dstAccessMask = vk::AccessFlagBits::eMemoryRead | vk::AccessFlagBits::eMemoryWrite, } } - ), storage(std::make_shared()), renderArea(renderArea) {} + ), renderArea(renderArea) {} - RenderPassNode::Storage::~Storage() { - if (device) - if (framebuffer) - (**device).destroy(framebuffer, nullptr, *device->getDispatcher()); - } - - u32 RenderPassNode::AddAttachment(TextureView *view) { + u32 RenderPassNode::AddAttachment(TextureView *view, GPU &gpu) { auto vkView{view->GetView()}; auto attachment{std::find(attachments.begin(), attachments.end(), vkView)}; if (attachment == attachments.end()) { // If we cannot find any matches for the specified attachment, we add it as a new one attachments.push_back(vkView); + + if (gpu.traits.supportsImagelessFramebuffers) + attachmentInfo.push_back(vk::FramebufferAttachmentImageInfo{ + .flags = view->texture->flags, + .usage = view->texture->usage, + .width = view->texture->dimensions.width, + .height = view->texture->dimensions.height, + .layerCount = view->texture->layerCount, + .viewFormatCount = 1, + .pViewFormats = &view->format->vkFormat, + }); + attachmentDescriptions.push_back(vk::AttachmentDescription{ .format = *view->format, .initialLayout = view->texture->layout, @@ -109,13 +115,13 @@ namespace skyline::gpu::interconnect::node { } } - void RenderPassNode::AddSubpass(span inputAttachments, span colorAttachments, TextureView *depthStencilAttachment) { + void RenderPassNode::AddSubpass(span inputAttachments, span colorAttachments, TextureView *depthStencilAttachment, GPU& gpu) { attachmentReferences.reserve(attachmentReferences.size() + inputAttachments.size() + colorAttachments.size() + (depthStencilAttachment ? 1 : 0)); auto inputAttachmentsOffset{attachmentReferences.size() * sizeof(vk::AttachmentReference)}; for (auto &attachment : inputAttachments) { attachmentReferences.push_back(vk::AttachmentReference{ - .attachment = AddAttachment(attachment), + .attachment = AddAttachment(attachment, gpu), .layout = attachment->texture->layout, }); } @@ -123,7 +129,7 @@ namespace skyline::gpu::interconnect::node { auto colorAttachmentsOffset{attachmentReferences.size() * sizeof(vk::AttachmentReference)}; // Calculate new base offset as it has changed since we pushed the input attachments for (auto &attachment : colorAttachments) { attachmentReferences.push_back(vk::AttachmentReference{ - .attachment = AddAttachment(attachment), + .attachment = AddAttachment(attachment, gpu), .layout = attachment->texture->layout, }); } @@ -131,7 +137,7 @@ namespace skyline::gpu::interconnect::node { auto depthStencilAttachmentOffset{attachmentReferences.size() * sizeof(vk::AttachmentReference)}; if (depthStencilAttachment) { attachmentReferences.push_back(vk::AttachmentReference{ - .attachment = AddAttachment(depthStencilAttachment), + .attachment = AddAttachment(depthStencilAttachment, gpu), .layout = depthStencilAttachment->texture->layout, }); } @@ -149,7 +155,7 @@ namespace skyline::gpu::interconnect::node { }); } - bool RenderPassNode::ClearColorAttachment(u32 colorAttachment, const vk::ClearColorValue &value) { + bool RenderPassNode::ClearColorAttachment(u32 colorAttachment, const vk::ClearColorValue &value, GPU& gpu) { auto attachmentReference{RebasePointer(attachmentReferences, subpassDescriptions.back().pColorAttachments) + colorAttachment}; auto attachmentIndex{attachmentReference->attachment}; @@ -172,7 +178,7 @@ namespace skyline::gpu::interconnect::node { return false; } - bool RenderPassNode::ClearDepthStencilAttachment(const vk::ClearDepthStencilValue &value) { + bool RenderPassNode::ClearDepthStencilAttachment(const vk::ClearDepthStencilValue &value, GPU& gpu) { auto attachmentReference{RebasePointer(attachmentReferences, subpassDescriptions.back().pDepthStencilAttachment)}; auto attachmentIndex{attachmentReference->attachment}; @@ -196,8 +202,6 @@ namespace skyline::gpu::interconnect::node { } vk::RenderPass RenderPassNode::operator()(vk::raii::CommandBuffer &commandBuffer, const std::shared_ptr &cycle, GPU &gpu) { - storage->device = &gpu.vkDevice; - auto preserveAttachmentIt{preserveAttachmentReferences.begin()}; for (auto &subpassDescription : subpassDescriptions) { subpassDescription.pInputAttachments = RebasePointer(attachmentReferences, subpassDescription.pInputAttachments); @@ -223,25 +227,46 @@ namespace skyline::gpu::interconnect::node { .pDependencies = subpassDependencies.data(), })}; - auto framebuffer{(*gpu.vkDevice).createFramebuffer(vk::FramebufferCreateInfo{ - .renderPass = renderPass, - .attachmentCount = static_cast(attachments.size()), - .pAttachments = attachments.data(), - .width = renderArea.extent.width, - .height = renderArea.extent.height, - .layers = 1, - }, nullptr, *gpu.vkDevice.getDispatcher())}; - storage->framebuffer = framebuffer; + auto useImagelessFramebuffer{gpu.traits.supportsImagelessFramebuffers}; + cache::FramebufferCreateInfo framebufferCreateInfo{ + vk::FramebufferCreateInfo{ + .flags = useImagelessFramebuffer ? vk::FramebufferCreateFlagBits::eImageless : vk::FramebufferCreateFlags{}, + .renderPass = renderPass, + .attachmentCount = static_cast(attachments.size()), + .pAttachments = attachments.data(), + .width = renderArea.extent.width, + .height = renderArea.extent.height, + .layers = 1, + }, + vk::FramebufferAttachmentsCreateInfo{ + .attachmentImageInfoCount = static_cast(attachmentInfo.size()), + .pAttachmentImageInfos = attachmentInfo.data(), + } + }; - commandBuffer.beginRenderPass(vk::RenderPassBeginInfo{ - .renderPass = renderPass, - .framebuffer = framebuffer, - .renderArea = renderArea, - .clearValueCount = static_cast(clearValues.size()), - .pClearValues = clearValues.data(), - }, vk::SubpassContents::eInline); + if (!useImagelessFramebuffer) + framebufferCreateInfo.unlink(); - cycle->AttachObject(storage); + auto framebuffer{gpu.framebufferCache.GetFramebuffer(framebufferCreateInfo)}; + + vk::StructureChain renderPassBeginInfo{ + vk::RenderPassBeginInfo{ + .renderPass = renderPass, + .framebuffer = framebuffer, + .renderArea = renderArea, + .clearValueCount = static_cast(clearValues.size()), + .pClearValues = clearValues.data(), + }, + vk::RenderPassAttachmentBeginInfo{ + .attachmentCount = static_cast(attachments.size()), + .pAttachments = attachments.data(), + } + }; + + if (!useImagelessFramebuffer) + renderPassBeginInfo.unlink(); + + commandBuffer.beginRenderPass(renderPassBeginInfo.get(), vk::SubpassContents::eInline); return renderPass; } diff --git a/app/src/main/cpp/skyline/gpu/interconnect/command_nodes.h b/app/src/main/cpp/skyline/gpu/interconnect/command_nodes.h index feca8ba5..e74b235d 100644 --- a/app/src/main/cpp/skyline/gpu/interconnect/command_nodes.h +++ b/app/src/main/cpp/skyline/gpu/interconnect/command_nodes.h @@ -28,19 +28,8 @@ namespace skyline::gpu::interconnect::node { */ struct RenderPassNode { private: - /** - * @brief Storage for all resources in the VkRenderPass that have their lifetimes bond to the completion fence - */ - struct Storage : public FenceCycleDependency { - vk::raii::Device *device{}; - vk::Framebuffer framebuffer{}; - - ~Storage(); - }; - - std::shared_ptr storage; - std::vector attachments; + std::vector attachmentInfo; std::vector attachmentDescriptions; std::vector attachmentReferences; @@ -69,12 +58,12 @@ namespace skyline::gpu::interconnect::node { * @note Any preservation of attachments from previous subpasses is automatically handled by this * @return The index of the attachment in the render pass which can be utilized with VkAttachmentReference */ - u32 AddAttachment(TextureView *view); + u32 AddAttachment(TextureView *view, GPU& gpu); /** * @brief Creates a subpass with the attachments bound in the specified order */ - void AddSubpass(span inputAttachments, span colorAttachments, TextureView *depthStencilAttachment); + void AddSubpass(span inputAttachments, span colorAttachments, TextureView *depthStencilAttachment, GPU& gpu); /** * @brief Clears a color attachment in the current subpass with VK_ATTACHMENT_LOAD_OP_CLEAR @@ -82,14 +71,14 @@ namespace skyline::gpu::interconnect::node { * @return If the attachment could be cleared or not due to conflicts with other operations * @note We require a subpass to be attached during this as the clear will not take place unless it's referenced by a subpass */ - bool ClearColorAttachment(u32 colorAttachment, const vk::ClearColorValue &value); + bool ClearColorAttachment(u32 colorAttachment, const vk::ClearColorValue &value, GPU& gpu); /** * @brief Clears the depth/stencil attachment in the current subpass with VK_ATTACHMENT_LOAD_OP_CLEAR * @return If the attachment could be cleared or not due to conflicts with other operations * @note We require a subpass to be attached during this as the clear will not take place unless it's referenced by a subpass */ - bool ClearDepthStencilAttachment(const vk::ClearDepthStencilValue &value); + bool ClearDepthStencilAttachment(const vk::ClearDepthStencilValue &value, GPU& gpu); vk::RenderPass operator()(vk::raii::CommandBuffer &commandBuffer, const std::shared_ptr &cycle, GPU &gpu); }; diff --git a/app/src/main/cpp/skyline/gpu/interconnect/graphics_context.h b/app/src/main/cpp/skyline/gpu/interconnect/graphics_context.h index 3d6e0d50..3462ca83 100644 --- a/app/src/main/cpp/skyline/gpu/interconnect/graphics_context.h +++ b/app/src/main/cpp/skyline/gpu/interconnect/graphics_context.h @@ -77,9 +77,12 @@ namespace skyline::gpu::interconnect { constexpr texture::Dimensions NullImageDimensions{1, 1, 1}; constexpr vk::ImageLayout NullImageInitialLayout{vk::ImageLayout::eUndefined}; constexpr vk::ImageTiling NullImageTiling{vk::ImageTiling::eOptimal}; + constexpr vk::ImageCreateFlags NullImageFlags{}; + constexpr vk::ImageUsageFlags NullImageUsage{vk::ImageUsageFlagBits::eColorAttachment | vk::ImageUsageFlagBits::eSampled}; auto vkImage{gpu.memory.AllocateImage( { + .flags = NullImageFlags, .imageType = vk::ImageType::e2D, .format = NullImageFormat->vkFormat, .extent = NullImageDimensions, @@ -87,7 +90,7 @@ namespace skyline::gpu::interconnect { .arrayLayers = 1, .samples = vk::SampleCountFlagBits::e1, .tiling = NullImageTiling, - .usage = vk::ImageUsageFlagBits::eColorAttachment | vk::ImageUsageFlagBits::eSampled, + .usage = NullImageUsage, .sharingMode = vk::SharingMode::eExclusive, .queueFamilyIndexCount = 1, .pQueueFamilyIndices = &gpu.vkQueueFamilyIndex, @@ -95,7 +98,7 @@ namespace skyline::gpu::interconnect { } )}; - auto nullTexture{std::make_shared(gpu, std::move(vkImage), NullImageDimensions, NullImageFormat, NullImageInitialLayout, NullImageTiling)}; + auto nullTexture{std::make_shared(gpu, std::move(vkImage), NullImageDimensions, NullImageFormat, NullImageInitialLayout, NullImageTiling, NullImageFlags, NullImageUsage)}; nullTexture->TransitionLayout(vk::ImageLayout::eGeneral); nullTextureView = nullTexture->GetView(vk::ImageViewType::e2D, vk::ImageSubresourceRange{ .aspectMask = vk::ImageAspectFlagBits::eColor, diff --git a/app/src/main/cpp/skyline/gpu/presentation_engine.cpp b/app/src/main/cpp/skyline/gpu/presentation_engine.cpp index 7557e4bb..bcf8a7e4 100644 --- a/app/src/main/cpp/skyline/gpu/presentation_engine.cpp +++ b/app/src/main/cpp/skyline/gpu/presentation_engine.cpp @@ -160,7 +160,7 @@ namespace skyline::gpu { for (size_t index{}; index < vkImages.size(); index++) { auto &slot{images[index]}; - slot = std::make_shared(*state.gpu, vkImages[index], extent, format, vk::ImageLayout::eUndefined, vk::ImageTiling::eOptimal); + slot = std::make_shared(*state.gpu, vkImages[index], extent, format, vk::ImageLayout::eUndefined, vk::ImageTiling::eOptimal, vk::ImageCreateFlags{}, presentUsage); slot->TransitionLayout(vk::ImageLayout::ePresentSrcKHR); } for (size_t index{vkImages.size()}; index < MaxSwapchainImageCount; index++) diff --git a/app/src/main/cpp/skyline/gpu/texture/texture.cpp b/app/src/main/cpp/skyline/gpu/texture/texture.cpp index b4926bbd..5f8d73dd 100644 --- a/app/src/main/cpp/skyline/gpu/texture/texture.cpp +++ b/app/src/main/cpp/skyline/gpu/texture/texture.cpp @@ -277,13 +277,15 @@ namespace skyline::gpu { texture->CopyToGuest(stagingBuffer ? stagingBuffer->data() : std::get(texture->backing).data()); } - Texture::Texture(GPU &gpu, BackingType &&backing, texture::Dimensions dimensions, texture::Format format, vk::ImageLayout layout, vk::ImageTiling tiling, u32 mipLevels, u32 layerCount, vk::SampleCountFlagBits sampleCount) + Texture::Texture(GPU &gpu, BackingType &&backing, texture::Dimensions dimensions, texture::Format format, vk::ImageLayout layout, vk::ImageTiling tiling, vk::ImageCreateFlags flags, vk::ImageUsageFlags usage, u32 mipLevels, u32 layerCount, vk::SampleCountFlagBits sampleCount) : gpu(gpu), backing(std::move(backing)), dimensions(dimensions), format(format), layout(layout), tiling(tiling), + flags(flags), + usage(usage), mipLevels(mipLevels), layerCount(layerCount), sampleCount(sampleCount) {} @@ -297,8 +299,9 @@ namespace skyline::gpu { tiling(vk::ImageTiling::eOptimal), // Force Optimal due to not adhering to host subresource layout during Linear synchronization mipLevels(1), layerCount(guest->layerCount), - sampleCount(vk::SampleCountFlagBits::e1) { - vk::ImageUsageFlags usage{vk::ImageUsageFlagBits::eTransferSrc | vk::ImageUsageFlagBits::eTransferDst | vk::ImageUsageFlagBits::eSampled}; + sampleCount(vk::SampleCountFlagBits::e1), + flags(gpu.traits.quirks.vkImageMutableFormatCostly ? vk::ImageCreateFlags{} : vk::ImageCreateFlagBits::eMutableFormat), + usage(vk::ImageUsageFlagBits::eTransferSrc | vk::ImageUsageFlagBits::eTransferDst | vk::ImageUsageFlagBits::eSampled) { if ((format->vkAspect & vk::ImageAspectFlagBits::eColor) && !format->IsCompressed()) usage |= vk::ImageUsageFlagBits::eColorAttachment; if (format->vkAspect & (vk::ImageAspectFlagBits::eDepth | vk::ImageAspectFlagBits::eStencil)) @@ -319,14 +322,11 @@ namespace skyline::gpu { } } - vk::ImageCreateFlags flags{gpu.traits.quirks.vkImageMutableFormatCostly ? vk::ImageCreateFlags{} : vk::ImageCreateFlagBits::eMutableFormat}; - if (imageType == vk::ImageType::e2D && dimensions.width == dimensions.height && layerCount >= 6) flags |= vk::ImageCreateFlagBits::eCubeCompatible; else if (imageType == vk::ImageType::e3D) flags |= vk::ImageCreateFlagBits::e2DArrayCompatible; - vk::ImageCreateInfo imageCreateInfo{ .flags = flags, .imageType = imageType, diff --git a/app/src/main/cpp/skyline/gpu/texture/texture.h b/app/src/main/cpp/skyline/gpu/texture/texture.h index bc58d6e3..632c345a 100644 --- a/app/src/main/cpp/skyline/gpu/texture/texture.h +++ b/app/src/main/cpp/skyline/gpu/texture/texture.h @@ -413,6 +413,8 @@ namespace skyline::gpu { texture::Format format; vk::ImageLayout layout; vk::ImageTiling tiling; + vk::ImageCreateFlags flags; + vk::ImageUsageFlags usage; u32 mipLevels; u32 layerCount; //!< The amount of array layers in the image, utilized for efficient binding (Not to be confused with the depth or faces in a cubemap) vk::SampleCountFlagBits sampleCount; @@ -421,7 +423,7 @@ namespace skyline::gpu { * @brief Creates a texture object wrapping the supplied backing with the supplied attributes * @param layout The initial layout of the texture, it **must** be eUndefined or ePreinitialized */ - Texture(GPU &gpu, BackingType &&backing, texture::Dimensions dimensions, texture::Format format, vk::ImageLayout layout, vk::ImageTiling tiling, u32 mipLevels = 1, u32 layerCount = 1, vk::SampleCountFlagBits sampleCount = vk::SampleCountFlagBits::e1); + Texture(GPU &gpu, BackingType &&backing, texture::Dimensions dimensions, texture::Format format, vk::ImageLayout layout, vk::ImageTiling tiling, vk::ImageCreateFlags flags, vk::ImageUsageFlags usage, u32 mipLevels = 1, u32 layerCount = 1, vk::SampleCountFlagBits sampleCount = vk::SampleCountFlagBits::e1); /** * @brief Creates a texture object wrapping the guest texture with a backing that can represent the guest texture data diff --git a/app/src/main/cpp/skyline/gpu/trait_manager.cpp b/app/src/main/cpp/skyline/gpu/trait_manager.cpp index 91e6c7d1..55d644f3 100644 --- a/app/src/main/cpp/skyline/gpu/trait_manager.cpp +++ b/app/src/main/cpp/skyline/gpu/trait_manager.cpp @@ -6,7 +6,7 @@ namespace skyline::gpu { TraitManager::TraitManager(const DeviceFeatures2 &deviceFeatures2, DeviceFeatures2 &enabledFeatures2, const std::vector &deviceExtensions, std::vector> &enabledExtensions, const DeviceProperties2 &deviceProperties2) : quirks(deviceProperties2.get().properties, deviceProperties2.get()) { - bool hasCustomBorderColorExt{}, hasShaderAtomicInt64Ext{}, hasShaderFloat16Int8Ext{}, hasShaderDemoteToHelperExt{}, hasVertexAttributeDivisorExt{}, hasProvokingVertexExt{}, hasPrimitiveTopologyListRestartExt{}; + bool hasCustomBorderColorExt{}, hasShaderAtomicInt64Ext{}, hasShaderFloat16Int8Ext{}, hasShaderDemoteToHelperExt{}, hasVertexAttributeDivisorExt{}, hasProvokingVertexExt{}, hasPrimitiveTopologyListRestartExt{}, hasImagelessFramebuffersExt{}; bool supportsUniformBufferStandardLayout{}; // We require VK_KHR_uniform_buffer_standard_layout but assume it is implicitly supported even when not present for (auto &extension : deviceExtensions) { @@ -36,6 +36,7 @@ namespace skyline::gpu { EXT_SET("VK_EXT_provoking_vertex", hasProvokingVertexExt); EXT_SET("VK_EXT_vertex_attribute_divisor", hasVertexAttributeDivisorExt); EXT_SET("VK_KHR_push_descriptor", supportsPushDescriptors); + EXT_SET("VK_KHR_imageless_framebuffer", hasImagelessFramebuffersExt); EXT_SET("VK_EXT_global_priority", supportsGlobalPriority); EXT_SET("VK_EXT_shader_viewport_index_layer", supportsShaderViewportIndexLayer); EXT_SET("VK_KHR_spirv_1_4", supportsSpirv14); @@ -120,6 +121,12 @@ namespace skyline::gpu { enabledFeatures2.unlink(); } + if (hasImagelessFramebuffersExt) { + FEAT_SET(vk::PhysicalDeviceImagelessFramebufferFeatures, imagelessFramebuffer, supportsImagelessFramebuffers) + } else { + enabledFeatures2.unlink(); + } + #undef FEAT_SET if (supportsFloatControls) @@ -132,8 +139,8 @@ namespace skyline::gpu { std::string TraitManager::Summary() { return fmt::format( - "\n* Supports U8 Indices: {}\n* Supports Sampler Mirror Clamp To Edge: {}\n* Supports Sampler Reduction Mode: {}\n* Supports Custom Border Color (Without Format): {}\n* Supports Last Provoking Vertex: {}\n* Supports Logical Operations: {}\n* Supports Vertex Attribute Divisor: {}\n* Supports Vertex Attribute Zero Divisor: {}\n* Supports Push Descriptors: {}\n* Supports Global Priority: {}\n* Supports Multiple Viewports: {}\n* Supports Shader Viewport Index: {}\n* Supports SPIR-V 1.4: {}\n* Supports Shader Invocation Demotion: {}\n* Supports 16-bit FP: {}\n* Supports 8-bit Integers: {}\n* Supports 16-bit Integers: {}\n* Supports 64-bit Integers: {}\n* Supports Atomic 64-bit Integers: {}\n* Supports Floating Point Behavior Control: {}\n* Supports Image Read Without Format: {}\n* Supports List Primitive Topology Restart: {}\n* Supports Patch List Primitive Topology Restart: {}\n* Supports Subgroup Vote: {}\n* Subgroup Size: {}", - supportsUint8Indices, supportsSamplerMirrorClampToEdge, supportsSamplerReductionMode, supportsCustomBorderColor, supportsLastProvokingVertex, supportsLogicOp, supportsVertexAttributeDivisor, supportsVertexAttributeZeroDivisor, supportsPushDescriptors, supportsGlobalPriority, supportsMultipleViewports, supportsShaderViewportIndexLayer, supportsSpirv14, supportsShaderDemoteToHelper, supportsFloat16, supportsInt8, supportsInt16, supportsInt64, supportsAtomicInt64, supportsFloatControls, supportsImageReadWithoutFormat, supportsTopologyListRestart, supportsTopologyPatchListRestart, supportsSubgroupVote, subgroupSize + "\n* Supports U8 Indices: {}\n* Supports Sampler Mirror Clamp To Edge: {}\n* Supports Sampler Reduction Mode: {}\n* Supports Custom Border Color (Without Format): {}\n* Supports Last Provoking Vertex: {}\n* Supports Logical Operations: {}\n* Supports Vertex Attribute Divisor: {}\n* Supports Vertex Attribute Zero Divisor: {}\n* Supports Push Descriptors: {}\n* Supports Imageless Framebuffers: {}\n* Supports Global Priority: {}\n* Supports Multiple Viewports: {}\n* Supports Shader Viewport Index: {}\n* Supports SPIR-V 1.4: {}\n* Supports Shader Invocation Demotion: {}\n* Supports 16-bit FP: {}\n* Supports 8-bit Integers: {}\n* Supports 16-bit Integers: {}\n* Supports 64-bit Integers: {}\n* Supports Atomic 64-bit Integers: {}\n* Supports Floating Point Behavior Control: {}\n* Supports Image Read Without Format: {}\n* Supports List Primitive Topology Restart: {}\n* Supports Patch List Primitive Topology Restart: {}\n* Supports Subgroup Vote: {}\n* Subgroup Size: {}", + supportsUint8Indices, supportsSamplerMirrorClampToEdge, supportsSamplerReductionMode, supportsCustomBorderColor, supportsLastProvokingVertex, supportsLogicOp, supportsVertexAttributeDivisor, supportsVertexAttributeZeroDivisor, supportsPushDescriptors, supportsImagelessFramebuffers, supportsGlobalPriority, supportsMultipleViewports, supportsShaderViewportIndexLayer, supportsSpirv14, supportsShaderDemoteToHelper, supportsFloat16, supportsInt8, supportsInt16, supportsInt64, supportsAtomicInt64, supportsFloatControls, supportsImageReadWithoutFormat, supportsTopologyListRestart, supportsTopologyPatchListRestart, supportsSubgroupVote, subgroupSize ); } diff --git a/app/src/main/cpp/skyline/gpu/trait_manager.h b/app/src/main/cpp/skyline/gpu/trait_manager.h index f32d912a..3bbec62a 100644 --- a/app/src/main/cpp/skyline/gpu/trait_manager.h +++ b/app/src/main/cpp/skyline/gpu/trait_manager.h @@ -21,6 +21,7 @@ namespace skyline::gpu { bool supportsVertexAttributeDivisor{}; //!< If the device supports a divisor for instance-rate vertex attributes (with VK_EXT_vertex_attribute_divisor) bool supportsVertexAttributeZeroDivisor{}; //!< If the device supports a zero divisor for instance-rate vertex attributes (with VK_EXT_vertex_attribute_divisor) bool supportsPushDescriptors{}; //!< If the device supports push descriptors (with VK_KHR_push_descriptor) + bool supportsImagelessFramebuffers{}; //!< If the device supports imageless framebuffers (with VK_KHR_imageless_framebuffer) bool supportsGlobalPriority{}; //!< If the device supports global priorities for queues (with VK_EXT_global_priority) bool supportsMultipleViewports{}; //!< If the device supports more than one viewport bool supportsShaderViewportIndexLayer{}; //!< If the device supports retrieving the viewport index in shaders (with VK_EXT_shader_viewport_index_layer) @@ -79,7 +80,8 @@ namespace skyline::gpu { vk::PhysicalDeviceUniformBufferStandardLayoutFeatures, vk::PhysicalDeviceShaderDrawParametersFeatures, vk::PhysicalDeviceProvokingVertexFeaturesEXT, - vk::PhysicalDevicePrimitiveTopologyListRestartFeaturesEXT>; + vk::PhysicalDevicePrimitiveTopologyListRestartFeaturesEXT, + vk::PhysicalDeviceImagelessFramebufferFeatures>; TraitManager(const DeviceFeatures2 &deviceFeatures2, DeviceFeatures2 &enabledFeatures2, const std::vector &deviceExtensions, std::vector> &enabledExtensions, const DeviceProperties2 &deviceProperties2);