Implement Framebuffer Cache

Implements a cache for storing `VkFramebuffer` objects with a special path on devices with `VK_KHR_imageless_framebuffer` to allow for more cache hits due to an abstract image rather than a specific one. 

Caching framebuffers is a fairly crucial optimization due to the cost of creating framebuffers on TBDRs since it involves calculating tiling memory allocations and in the case of Adreno's proprietary driver involves several kernel calls for mapping and allocating the corresponding framebuffer memory.
This commit is contained in:
PixelyIon 2022-05-01 18:06:55 +05:30
parent af7f0c301e
commit 42573170c6
15 changed files with 336 additions and 73 deletions

View File

@ -171,6 +171,7 @@ add_library(skyline SHARED
${source_DIR}/skyline/gpu/shader_manager.cpp
${source_DIR}/skyline/gpu/cache/graphics_pipeline_cache.cpp
${source_DIR}/skyline/gpu/cache/renderpass_cache.cpp
${source_DIR}/skyline/gpu/cache/framebuffer_cache.cpp
${source_DIR}/skyline/gpu/interconnect/command_executor.cpp
${source_DIR}/skyline/gpu/interconnect/command_nodes.cpp
${source_DIR}/skyline/gpu/interconnect/conversion/quads.cpp

View File

@ -140,7 +140,8 @@ namespace skyline::gpu {
vk::PhysicalDeviceUniformBufferStandardLayoutFeatures,
vk::PhysicalDeviceShaderDrawParametersFeatures,
vk::PhysicalDeviceProvokingVertexFeaturesEXT,
vk::PhysicalDevicePrimitiveTopologyListRestartFeaturesEXT>()};
vk::PhysicalDevicePrimitiveTopologyListRestartFeaturesEXT,
vk::PhysicalDeviceImagelessFramebufferFeatures>()};
decltype(deviceFeatures2) enabledFeatures2{}; // We only want to enable features we required due to potential overhead from unused features
#define FEAT_REQ(structName, feature) \
@ -279,5 +280,6 @@ namespace skyline::gpu {
descriptor(*this),
shader(state, *this),
graphicsPipelineCache(*this),
renderPassCache(*this) {}
renderPassCache(*this),
framebufferCache(*this) {}
}

View File

@ -13,6 +13,7 @@
#include "gpu/shader_manager.h"
#include "gpu/cache/graphics_pipeline_cache.h"
#include "gpu/cache/renderpass_cache.h"
#include "gpu/cache/framebuffer_cache.h"
namespace skyline::gpu {
static constexpr u32 VkApiVersion{VK_API_VERSION_1_1}; //!< The version of core Vulkan that we require
@ -49,6 +50,7 @@ namespace skyline::gpu {
cache::GraphicsPipelineCache graphicsPipelineCache;
cache::RenderPassCache renderPassCache;
cache::FramebufferCache framebufferCache;
GPU(const DeviceState &state);
};

View File

@ -0,0 +1,152 @@
// SPDX-License-Identifier: MPL-2.0
// Copyright © 2022 Skyline Team and Contributors (https://github.com/skyline-emu/)
#include <boost/functional/hash.hpp>
#include <gpu.h>
#include "framebuffer_cache.h"
namespace skyline::gpu::cache {
FramebufferCache::FramebufferCache(GPU &gpu) : gpu(gpu) {}
FramebufferCache::FramebufferImagelessAttachment::FramebufferImagelessAttachment(const vk::FramebufferAttachmentImageInfo &info) : flags(info.flags), usage(info.usage), width(info.width), height(info.height), layers(info.layerCount), format(*info.pViewFormats) {}
FramebufferCache::FramebufferCacheKey::FramebufferCacheKey(const FramebufferCreateInfo &createInfo) {
auto &info{createInfo.get<vk::FramebufferCreateInfo>()};
flags = info.flags;
renderPass = info.renderPass;
width = info.width;
height = info.height;
layers = info.layers;
if (createInfo.isLinked<vk::FramebufferAttachmentsCreateInfo>()) {
auto &attachmentInfo{createInfo.get<vk::FramebufferAttachmentsCreateInfo>()};
std::vector<FramebufferImagelessAttachment> imagelessAttachments;
imagelessAttachments.reserve(attachmentInfo.attachmentImageInfoCount);
for (const auto &image : span<const vk::FramebufferAttachmentImageInfo>(attachmentInfo.pAttachmentImageInfos, attachmentInfo.attachmentImageInfoCount))
imagelessAttachments.emplace_back(image);
attachments.emplace<std::vector<FramebufferImagelessAttachment>>(std::move(imagelessAttachments));
} else {
std::vector<vk::ImageView> imageViews;
imageViews.reserve(info.attachmentCount);
for (const auto &image : span<const vk::ImageView>(info.pAttachments, info.attachmentCount))
imageViews.emplace_back(image);
attachments.emplace<std::vector<vk::ImageView>>(std::move(imageViews));
}
}
#define HASH(x) boost::hash_combine(hash, x)
size_t FramebufferCache::FramebufferHash::operator()(const FramebufferCacheKey &key) const {
size_t hash{};
HASH(static_cast<VkFramebufferCreateFlags>(key.flags));
HASH(static_cast<VkRenderPass>(key.renderPass));
HASH(key.width);
HASH(key.height);
HASH(key.layers);
std::visit(VariantVisitor{
[&hash](const std::vector<FramebufferImagelessAttachment> &attachments) {
HASH(attachments.size());
for (const auto &attachment : attachments) {
HASH(static_cast<VkImageCreateFlags>(attachment.flags));
HASH(static_cast<VkImageUsageFlags>(attachment.usage));
HASH(attachment.width);
HASH(attachment.height);
HASH(attachment.layers);
HASH(attachment.format);
}
},
[&hash](const std::vector<vk::ImageView> &attachments) {
HASH(attachments.size());
for (const auto &attachment : attachments)
HASH(static_cast<VkImageView>(attachment));
}
}, key.attachments);
return hash;
}
size_t FramebufferCache::FramebufferHash::operator()(const FramebufferCreateInfo &key) const {
size_t hash{};
auto &info{key.get<vk::FramebufferCreateInfo>()};
HASH(static_cast<VkFramebufferCreateFlags>(info.flags));
HASH(static_cast<VkRenderPass>(info.renderPass));
HASH(info.width);
HASH(info.height);
HASH(info.layers);
if (info.flags & vk::FramebufferCreateFlagBits::eImageless) {
auto &attachmentInfo{key.get<vk::FramebufferAttachmentsCreateInfo>()};
for (const vk::FramebufferAttachmentImageInfo &image : span<const vk::FramebufferAttachmentImageInfo>(attachmentInfo.pAttachmentImageInfos, attachmentInfo.attachmentImageInfoCount)) {
HASH(static_cast<VkImageCreateFlags>(image.flags));
HASH(static_cast<VkImageUsageFlags>(image.usage));
HASH(image.width);
HASH(image.height);
HASH(image.layerCount);
HASH(*image.pViewFormats);
}
} else {
HASH(info.attachmentCount);
for (const auto &view : span<const vk::ImageView>(info.pAttachments, info.attachmentCount))
HASH(static_cast<VkImageView>(view));
}
return hash;
}
#undef HASH
bool FramebufferCache::FramebufferEqual::operator()(const FramebufferCacheKey &lhs, const FramebufferCacheKey &rhs) const {
return lhs == rhs;
}
bool FramebufferCache::FramebufferEqual::operator()(const FramebufferCacheKey &lhs, const FramebufferCreateInfo &rhs) const {
#define RETF(condition) if (condition) { return false; }
auto &rhsInfo{rhs.get<vk::FramebufferCreateInfo>()};
RETF(lhs.flags != rhsInfo.flags)
RETF(lhs.renderPass != rhsInfo.renderPass)
RETF(lhs.width != rhsInfo.width)
RETF(lhs.height != rhsInfo.height)
RETF(lhs.layers != rhsInfo.layers)
if (lhs.flags & vk::FramebufferCreateFlagBits::eImageless) {
auto &lhsAttachments{std::get<std::vector<FramebufferImagelessAttachment>>(lhs.attachments)};
auto &rhsAttachments{rhs.get<vk::FramebufferAttachmentsCreateInfo>()};
RETF(lhsAttachments.size() != rhsAttachments.attachmentImageInfoCount)
const vk::FramebufferAttachmentImageInfo *rhsAttachmentInfo{rhsAttachments.pAttachmentImageInfos};
for (const auto &attachment : lhsAttachments) {
RETF(attachment.flags != rhsAttachmentInfo->flags)
RETF(attachment.usage != rhsAttachmentInfo->usage)
RETF(attachment.width != rhsAttachmentInfo->width)
RETF(attachment.height != rhsAttachmentInfo->height)
RETF(attachment.layers != rhsAttachmentInfo->layerCount)
RETF(attachment.format != *rhsAttachmentInfo->pViewFormats)
rhsAttachmentInfo++;
}
} else {
auto &lhsAttachments{std::get<std::vector<vk::ImageView>>(lhs.attachments)};
span<const vk::ImageView> rhsAttachments{rhsInfo.pAttachments, rhsInfo.attachmentCount};
RETF(!std::equal(lhsAttachments.begin(), lhsAttachments.end(), rhsAttachments.begin(), rhsAttachments.end()))
}
#undef RETF
return true;
}
vk::Framebuffer FramebufferCache::GetFramebuffer(const FramebufferCreateInfo &createInfo) {
std::scoped_lock lock{mutex};
auto it{framebufferCache.find(createInfo)};
if (it != framebufferCache.end())
return *it->second;
auto entryIt{framebufferCache.try_emplace(FramebufferCacheKey{createInfo}, gpu.vkDevice, createInfo.get<vk::FramebufferCreateInfo>())};
return *entryIt.first->second;
}
}

View File

@ -0,0 +1,78 @@
// SPDX-License-Identifier: MPL-2.0
// Copyright © 2022 Skyline Team and Contributors (https://github.com/skyline-emu/)
#pragma once
#include "common.h"
namespace skyline::gpu::cache {
using FramebufferCreateInfo = vk::StructureChain<vk::FramebufferCreateInfo, vk::FramebufferAttachmentsCreateInfo>;
/**
* @brief A cache for Vulkan framebuffers to avoid unnecessary recreation, optimized for both fixed image and imageless attachments
* @note It is generally expensive to create a framebuffer on TBDRs since it involves calculating tiling memory allocations and in the case of Adreno's proprietary driver involves several kernel calls for mapping and allocating the corresponding framebuffer memory
*/
class FramebufferCache {
private:
GPU &gpu;
std::mutex mutex; //!< Synchronizes access to the cache
private:
/**
* @brief An equivalent to VkFramebufferAttachmentImageInfo with more suitable semantics for storage
* @url https://www.khronos.org/registry/vulkan/specs/1.3-extensions/man/html/VkFramebufferAttachmentImageInfo.html
*/
struct FramebufferImagelessAttachment {
vk::ImageCreateFlags flags;
vk::ImageUsageFlags usage;
u32 width;
u32 height;
u32 layers;
vk::Format format;
FramebufferImagelessAttachment(const vk::FramebufferAttachmentImageInfo &info);
bool operator==(const FramebufferImagelessAttachment &other) const = default;
};
struct FramebufferCacheKey {
vk::FramebufferCreateFlags flags;
vk::RenderPass renderPass;
u32 width;
u32 height;
u32 layers;
std::variant<std::vector<vk::ImageView>, std::vector<FramebufferImagelessAttachment>> attachments;
FramebufferCacheKey(const FramebufferCreateInfo &createInfo);
bool operator==(const FramebufferCacheKey &other) const = default;
};
struct FramebufferHash {
using is_transparent = std::true_type;
size_t operator()(const FramebufferCacheKey &key) const;
size_t operator()(const FramebufferCreateInfo &key) const;
};
struct FramebufferEqual {
using is_transparent = std::true_type;
bool operator()(const FramebufferCacheKey &lhs, const FramebufferCacheKey &rhs) const;
bool operator()(const FramebufferCacheKey &lhs, const FramebufferCreateInfo &rhs) const;
};
std::unordered_map<FramebufferCacheKey, vk::raii::Framebuffer, FramebufferHash, FramebufferEqual> framebufferCache;
public:
FramebufferCache(GPU &gpu);
/**
* @note When using imageless framebuffer attachments, VkFramebufferAttachmentImageInfo **must** have a single view format
* @note When using image framebuffer attachments, it is expected that the supplied image handle will remain stable for the cache to function
*/
vk::Framebuffer GetFramebuffer(const FramebufferCreateInfo &createInfo);
};
}

View File

@ -13,7 +13,7 @@ namespace skyline::gpu::interconnect {
bool CommandExecutor::CreateRenderPassWithSubpass(vk::Rect2D renderArea, span<TextureView *> inputAttachments, span<TextureView *> colorAttachments, TextureView *depthStencilAttachment) {
auto addSubpass{[&] {
renderPass->AddSubpass(inputAttachments, colorAttachments, depthStencilAttachment);
renderPass->AddSubpass(inputAttachments, colorAttachments, depthStencilAttachment, gpu);
lastSubpassAttachments.clear();
auto insertAttachmentRange{[this](auto &attachments) -> std::pair<size_t, size_t> {
@ -114,7 +114,7 @@ namespace skyline::gpu::interconnect {
void CommandExecutor::AddClearColorSubpass(TextureView *attachment, const vk::ClearColorValue &value) {
bool gotoNext{CreateRenderPassWithSubpass(vk::Rect2D{.extent = attachment->texture->dimensions}, {}, attachment, nullptr)};
if (renderPass->ClearColorAttachment(0, value)) {
if (renderPass->ClearColorAttachment(0, value, gpu)) {
if (gotoNext)
nodes.emplace_back(std::in_place_type_t<node::NextSubpassNode>());
} else {
@ -139,7 +139,7 @@ namespace skyline::gpu::interconnect {
void CommandExecutor::AddClearDepthStencilSubpass(TextureView *attachment, const vk::ClearDepthStencilValue &value) {
bool gotoNext{CreateRenderPassWithSubpass(vk::Rect2D{.extent = attachment->texture->dimensions}, {}, {}, attachment)};
if (renderPass->ClearDepthStencilAttachment(value)) {
if (renderPass->ClearDepthStencilAttachment(value, gpu)) {
if (gotoNext)
nodes.emplace_back(std::in_place_type_t<node::NextSubpassNode>());
} else {

View File

@ -19,15 +19,15 @@ namespace skyline::gpu::interconnect {
boost::container::stable_vector<node::NodeVariant> nodes;
node::RenderPassNode *renderPass{};
size_t subpassCount{}; //!< The number of subpasses in the current render pass
std::unordered_set<Texture *> attachedTextures; //!< All textures that need to be synced prior to and after execution
std::unordered_set<Texture *> attachedTextures; //!< All textures that need to be synced prior to and after execution
using SharedBufferDelegate = std::shared_ptr<Buffer::BufferDelegate>;
std::unordered_set<SharedBufferDelegate> attachedBuffers; //!< All buffers that are attached to the current execution
std::vector<TextureView*> lastSubpassAttachments; //!< The storage backing for attachments used in the last subpass
span<TextureView*> lastSubpassInputAttachments; //!< The set of input attachments used in the last subpass
span<TextureView*> lastSubpassColorAttachments; //!< The set of color attachments used in the last subpass
TextureView* lastSubpassDepthStencilAttachment{}; //!< The depth stencil attachment used in the last subpass
std::vector<TextureView *> lastSubpassAttachments; //!< The storage backing for attachments used in the last subpass
span<TextureView *> lastSubpassInputAttachments; //!< The set of input attachments used in the last subpass
span<TextureView *> lastSubpassColorAttachments; //!< The set of color attachments used in the last subpass
TextureView *lastSubpassDepthStencilAttachment{}; //!< The depth stencil attachment used in the last subpass
/**
* @brief Create a new render pass and subpass with the specified attachments, if one doesn't already exist or the current one isn't compatible

View File

@ -16,20 +16,26 @@ namespace skyline::gpu::interconnect::node {
.dstAccessMask = vk::AccessFlagBits::eMemoryRead | vk::AccessFlagBits::eMemoryWrite,
}
}
), storage(std::make_shared<Storage>()), renderArea(renderArea) {}
), renderArea(renderArea) {}
RenderPassNode::Storage::~Storage() {
if (device)
if (framebuffer)
(**device).destroy(framebuffer, nullptr, *device->getDispatcher());
}
u32 RenderPassNode::AddAttachment(TextureView *view) {
u32 RenderPassNode::AddAttachment(TextureView *view, GPU &gpu) {
auto vkView{view->GetView()};
auto attachment{std::find(attachments.begin(), attachments.end(), vkView)};
if (attachment == attachments.end()) {
// If we cannot find any matches for the specified attachment, we add it as a new one
attachments.push_back(vkView);
if (gpu.traits.supportsImagelessFramebuffers)
attachmentInfo.push_back(vk::FramebufferAttachmentImageInfo{
.flags = view->texture->flags,
.usage = view->texture->usage,
.width = view->texture->dimensions.width,
.height = view->texture->dimensions.height,
.layerCount = view->texture->layerCount,
.viewFormatCount = 1,
.pViewFormats = &view->format->vkFormat,
});
attachmentDescriptions.push_back(vk::AttachmentDescription{
.format = *view->format,
.initialLayout = view->texture->layout,
@ -109,13 +115,13 @@ namespace skyline::gpu::interconnect::node {
}
}
void RenderPassNode::AddSubpass(span<TextureView *> inputAttachments, span<TextureView *> colorAttachments, TextureView *depthStencilAttachment) {
void RenderPassNode::AddSubpass(span<TextureView *> inputAttachments, span<TextureView *> colorAttachments, TextureView *depthStencilAttachment, GPU& gpu) {
attachmentReferences.reserve(attachmentReferences.size() + inputAttachments.size() + colorAttachments.size() + (depthStencilAttachment ? 1 : 0));
auto inputAttachmentsOffset{attachmentReferences.size() * sizeof(vk::AttachmentReference)};
for (auto &attachment : inputAttachments) {
attachmentReferences.push_back(vk::AttachmentReference{
.attachment = AddAttachment(attachment),
.attachment = AddAttachment(attachment, gpu),
.layout = attachment->texture->layout,
});
}
@ -123,7 +129,7 @@ namespace skyline::gpu::interconnect::node {
auto colorAttachmentsOffset{attachmentReferences.size() * sizeof(vk::AttachmentReference)}; // Calculate new base offset as it has changed since we pushed the input attachments
for (auto &attachment : colorAttachments) {
attachmentReferences.push_back(vk::AttachmentReference{
.attachment = AddAttachment(attachment),
.attachment = AddAttachment(attachment, gpu),
.layout = attachment->texture->layout,
});
}
@ -131,7 +137,7 @@ namespace skyline::gpu::interconnect::node {
auto depthStencilAttachmentOffset{attachmentReferences.size() * sizeof(vk::AttachmentReference)};
if (depthStencilAttachment) {
attachmentReferences.push_back(vk::AttachmentReference{
.attachment = AddAttachment(depthStencilAttachment),
.attachment = AddAttachment(depthStencilAttachment, gpu),
.layout = depthStencilAttachment->texture->layout,
});
}
@ -149,7 +155,7 @@ namespace skyline::gpu::interconnect::node {
});
}
bool RenderPassNode::ClearColorAttachment(u32 colorAttachment, const vk::ClearColorValue &value) {
bool RenderPassNode::ClearColorAttachment(u32 colorAttachment, const vk::ClearColorValue &value, GPU& gpu) {
auto attachmentReference{RebasePointer(attachmentReferences, subpassDescriptions.back().pColorAttachments) + colorAttachment};
auto attachmentIndex{attachmentReference->attachment};
@ -172,7 +178,7 @@ namespace skyline::gpu::interconnect::node {
return false;
}
bool RenderPassNode::ClearDepthStencilAttachment(const vk::ClearDepthStencilValue &value) {
bool RenderPassNode::ClearDepthStencilAttachment(const vk::ClearDepthStencilValue &value, GPU& gpu) {
auto attachmentReference{RebasePointer(attachmentReferences, subpassDescriptions.back().pDepthStencilAttachment)};
auto attachmentIndex{attachmentReference->attachment};
@ -196,8 +202,6 @@ namespace skyline::gpu::interconnect::node {
}
vk::RenderPass RenderPassNode::operator()(vk::raii::CommandBuffer &commandBuffer, const std::shared_ptr<FenceCycle> &cycle, GPU &gpu) {
storage->device = &gpu.vkDevice;
auto preserveAttachmentIt{preserveAttachmentReferences.begin()};
for (auto &subpassDescription : subpassDescriptions) {
subpassDescription.pInputAttachments = RebasePointer(attachmentReferences, subpassDescription.pInputAttachments);
@ -223,25 +227,46 @@ namespace skyline::gpu::interconnect::node {
.pDependencies = subpassDependencies.data(),
})};
auto framebuffer{(*gpu.vkDevice).createFramebuffer(vk::FramebufferCreateInfo{
auto useImagelessFramebuffer{gpu.traits.supportsImagelessFramebuffers};
cache::FramebufferCreateInfo framebufferCreateInfo{
vk::FramebufferCreateInfo{
.flags = useImagelessFramebuffer ? vk::FramebufferCreateFlagBits::eImageless : vk::FramebufferCreateFlags{},
.renderPass = renderPass,
.attachmentCount = static_cast<u32>(attachments.size()),
.pAttachments = attachments.data(),
.width = renderArea.extent.width,
.height = renderArea.extent.height,
.layers = 1,
}, nullptr, *gpu.vkDevice.getDispatcher())};
storage->framebuffer = framebuffer;
},
vk::FramebufferAttachmentsCreateInfo{
.attachmentImageInfoCount = static_cast<u32>(attachmentInfo.size()),
.pAttachmentImageInfos = attachmentInfo.data(),
}
};
commandBuffer.beginRenderPass(vk::RenderPassBeginInfo{
if (!useImagelessFramebuffer)
framebufferCreateInfo.unlink<vk::FramebufferAttachmentsCreateInfo>();
auto framebuffer{gpu.framebufferCache.GetFramebuffer(framebufferCreateInfo)};
vk::StructureChain<vk::RenderPassBeginInfo, vk::RenderPassAttachmentBeginInfo> renderPassBeginInfo{
vk::RenderPassBeginInfo{
.renderPass = renderPass,
.framebuffer = framebuffer,
.renderArea = renderArea,
.clearValueCount = static_cast<u32>(clearValues.size()),
.pClearValues = clearValues.data(),
}, vk::SubpassContents::eInline);
},
vk::RenderPassAttachmentBeginInfo{
.attachmentCount = static_cast<u32>(attachments.size()),
.pAttachments = attachments.data(),
}
};
cycle->AttachObject(storage);
if (!useImagelessFramebuffer)
renderPassBeginInfo.unlink<vk::RenderPassAttachmentBeginInfo>();
commandBuffer.beginRenderPass(renderPassBeginInfo.get<vk::RenderPassBeginInfo>(), vk::SubpassContents::eInline);
return renderPass;
}

View File

@ -28,19 +28,8 @@ namespace skyline::gpu::interconnect::node {
*/
struct RenderPassNode {
private:
/**
* @brief Storage for all resources in the VkRenderPass that have their lifetimes bond to the completion fence
*/
struct Storage : public FenceCycleDependency {
vk::raii::Device *device{};
vk::Framebuffer framebuffer{};
~Storage();
};
std::shared_ptr<Storage> storage;
std::vector<vk::ImageView> attachments;
std::vector<vk::FramebufferAttachmentImageInfo> attachmentInfo;
std::vector<vk::AttachmentDescription> attachmentDescriptions;
std::vector<vk::AttachmentReference> attachmentReferences;
@ -69,12 +58,12 @@ namespace skyline::gpu::interconnect::node {
* @note Any preservation of attachments from previous subpasses is automatically handled by this
* @return The index of the attachment in the render pass which can be utilized with VkAttachmentReference
*/
u32 AddAttachment(TextureView *view);
u32 AddAttachment(TextureView *view, GPU& gpu);
/**
* @brief Creates a subpass with the attachments bound in the specified order
*/
void AddSubpass(span<TextureView *> inputAttachments, span<TextureView *> colorAttachments, TextureView *depthStencilAttachment);
void AddSubpass(span<TextureView *> inputAttachments, span<TextureView *> colorAttachments, TextureView *depthStencilAttachment, GPU& gpu);
/**
* @brief Clears a color attachment in the current subpass with VK_ATTACHMENT_LOAD_OP_CLEAR
@ -82,14 +71,14 @@ namespace skyline::gpu::interconnect::node {
* @return If the attachment could be cleared or not due to conflicts with other operations
* @note We require a subpass to be attached during this as the clear will not take place unless it's referenced by a subpass
*/
bool ClearColorAttachment(u32 colorAttachment, const vk::ClearColorValue &value);
bool ClearColorAttachment(u32 colorAttachment, const vk::ClearColorValue &value, GPU& gpu);
/**
* @brief Clears the depth/stencil attachment in the current subpass with VK_ATTACHMENT_LOAD_OP_CLEAR
* @return If the attachment could be cleared or not due to conflicts with other operations
* @note We require a subpass to be attached during this as the clear will not take place unless it's referenced by a subpass
*/
bool ClearDepthStencilAttachment(const vk::ClearDepthStencilValue &value);
bool ClearDepthStencilAttachment(const vk::ClearDepthStencilValue &value, GPU& gpu);
vk::RenderPass operator()(vk::raii::CommandBuffer &commandBuffer, const std::shared_ptr<FenceCycle> &cycle, GPU &gpu);
};

View File

@ -77,9 +77,12 @@ namespace skyline::gpu::interconnect {
constexpr texture::Dimensions NullImageDimensions{1, 1, 1};
constexpr vk::ImageLayout NullImageInitialLayout{vk::ImageLayout::eUndefined};
constexpr vk::ImageTiling NullImageTiling{vk::ImageTiling::eOptimal};
constexpr vk::ImageCreateFlags NullImageFlags{};
constexpr vk::ImageUsageFlags NullImageUsage{vk::ImageUsageFlagBits::eColorAttachment | vk::ImageUsageFlagBits::eSampled};
auto vkImage{gpu.memory.AllocateImage(
{
.flags = NullImageFlags,
.imageType = vk::ImageType::e2D,
.format = NullImageFormat->vkFormat,
.extent = NullImageDimensions,
@ -87,7 +90,7 @@ namespace skyline::gpu::interconnect {
.arrayLayers = 1,
.samples = vk::SampleCountFlagBits::e1,
.tiling = NullImageTiling,
.usage = vk::ImageUsageFlagBits::eColorAttachment | vk::ImageUsageFlagBits::eSampled,
.usage = NullImageUsage,
.sharingMode = vk::SharingMode::eExclusive,
.queueFamilyIndexCount = 1,
.pQueueFamilyIndices = &gpu.vkQueueFamilyIndex,
@ -95,7 +98,7 @@ namespace skyline::gpu::interconnect {
}
)};
auto nullTexture{std::make_shared<Texture>(gpu, std::move(vkImage), NullImageDimensions, NullImageFormat, NullImageInitialLayout, NullImageTiling)};
auto nullTexture{std::make_shared<Texture>(gpu, std::move(vkImage), NullImageDimensions, NullImageFormat, NullImageInitialLayout, NullImageTiling, NullImageFlags, NullImageUsage)};
nullTexture->TransitionLayout(vk::ImageLayout::eGeneral);
nullTextureView = nullTexture->GetView(vk::ImageViewType::e2D, vk::ImageSubresourceRange{
.aspectMask = vk::ImageAspectFlagBits::eColor,

View File

@ -160,7 +160,7 @@ namespace skyline::gpu {
for (size_t index{}; index < vkImages.size(); index++) {
auto &slot{images[index]};
slot = std::make_shared<Texture>(*state.gpu, vkImages[index], extent, format, vk::ImageLayout::eUndefined, vk::ImageTiling::eOptimal);
slot = std::make_shared<Texture>(*state.gpu, vkImages[index], extent, format, vk::ImageLayout::eUndefined, vk::ImageTiling::eOptimal, vk::ImageCreateFlags{}, presentUsage);
slot->TransitionLayout(vk::ImageLayout::ePresentSrcKHR);
}
for (size_t index{vkImages.size()}; index < MaxSwapchainImageCount; index++)

View File

@ -277,13 +277,15 @@ namespace skyline::gpu {
texture->CopyToGuest(stagingBuffer ? stagingBuffer->data() : std::get<memory::Image>(texture->backing).data());
}
Texture::Texture(GPU &gpu, BackingType &&backing, texture::Dimensions dimensions, texture::Format format, vk::ImageLayout layout, vk::ImageTiling tiling, u32 mipLevels, u32 layerCount, vk::SampleCountFlagBits sampleCount)
Texture::Texture(GPU &gpu, BackingType &&backing, texture::Dimensions dimensions, texture::Format format, vk::ImageLayout layout, vk::ImageTiling tiling, vk::ImageCreateFlags flags, vk::ImageUsageFlags usage, u32 mipLevels, u32 layerCount, vk::SampleCountFlagBits sampleCount)
: gpu(gpu),
backing(std::move(backing)),
dimensions(dimensions),
format(format),
layout(layout),
tiling(tiling),
flags(flags),
usage(usage),
mipLevels(mipLevels),
layerCount(layerCount),
sampleCount(sampleCount) {}
@ -297,8 +299,9 @@ namespace skyline::gpu {
tiling(vk::ImageTiling::eOptimal), // Force Optimal due to not adhering to host subresource layout during Linear synchronization
mipLevels(1),
layerCount(guest->layerCount),
sampleCount(vk::SampleCountFlagBits::e1) {
vk::ImageUsageFlags usage{vk::ImageUsageFlagBits::eTransferSrc | vk::ImageUsageFlagBits::eTransferDst | vk::ImageUsageFlagBits::eSampled};
sampleCount(vk::SampleCountFlagBits::e1),
flags(gpu.traits.quirks.vkImageMutableFormatCostly ? vk::ImageCreateFlags{} : vk::ImageCreateFlagBits::eMutableFormat),
usage(vk::ImageUsageFlagBits::eTransferSrc | vk::ImageUsageFlagBits::eTransferDst | vk::ImageUsageFlagBits::eSampled) {
if ((format->vkAspect & vk::ImageAspectFlagBits::eColor) && !format->IsCompressed())
usage |= vk::ImageUsageFlagBits::eColorAttachment;
if (format->vkAspect & (vk::ImageAspectFlagBits::eDepth | vk::ImageAspectFlagBits::eStencil))
@ -319,14 +322,11 @@ namespace skyline::gpu {
}
}
vk::ImageCreateFlags flags{gpu.traits.quirks.vkImageMutableFormatCostly ? vk::ImageCreateFlags{} : vk::ImageCreateFlagBits::eMutableFormat};
if (imageType == vk::ImageType::e2D && dimensions.width == dimensions.height && layerCount >= 6)
flags |= vk::ImageCreateFlagBits::eCubeCompatible;
else if (imageType == vk::ImageType::e3D)
flags |= vk::ImageCreateFlagBits::e2DArrayCompatible;
vk::ImageCreateInfo imageCreateInfo{
.flags = flags,
.imageType = imageType,

View File

@ -413,6 +413,8 @@ namespace skyline::gpu {
texture::Format format;
vk::ImageLayout layout;
vk::ImageTiling tiling;
vk::ImageCreateFlags flags;
vk::ImageUsageFlags usage;
u32 mipLevels;
u32 layerCount; //!< The amount of array layers in the image, utilized for efficient binding (Not to be confused with the depth or faces in a cubemap)
vk::SampleCountFlagBits sampleCount;
@ -421,7 +423,7 @@ namespace skyline::gpu {
* @brief Creates a texture object wrapping the supplied backing with the supplied attributes
* @param layout The initial layout of the texture, it **must** be eUndefined or ePreinitialized
*/
Texture(GPU &gpu, BackingType &&backing, texture::Dimensions dimensions, texture::Format format, vk::ImageLayout layout, vk::ImageTiling tiling, u32 mipLevels = 1, u32 layerCount = 1, vk::SampleCountFlagBits sampleCount = vk::SampleCountFlagBits::e1);
Texture(GPU &gpu, BackingType &&backing, texture::Dimensions dimensions, texture::Format format, vk::ImageLayout layout, vk::ImageTiling tiling, vk::ImageCreateFlags flags, vk::ImageUsageFlags usage, u32 mipLevels = 1, u32 layerCount = 1, vk::SampleCountFlagBits sampleCount = vk::SampleCountFlagBits::e1);
/**
* @brief Creates a texture object wrapping the guest texture with a backing that can represent the guest texture data

View File

@ -6,7 +6,7 @@
namespace skyline::gpu {
TraitManager::TraitManager(const DeviceFeatures2 &deviceFeatures2, DeviceFeatures2 &enabledFeatures2, const std::vector<vk::ExtensionProperties> &deviceExtensions, std::vector<std::array<char, VK_MAX_EXTENSION_NAME_SIZE>> &enabledExtensions, const DeviceProperties2 &deviceProperties2) : quirks(deviceProperties2.get<vk::PhysicalDeviceProperties2>().properties, deviceProperties2.get<vk::PhysicalDeviceDriverProperties>()) {
bool hasCustomBorderColorExt{}, hasShaderAtomicInt64Ext{}, hasShaderFloat16Int8Ext{}, hasShaderDemoteToHelperExt{}, hasVertexAttributeDivisorExt{}, hasProvokingVertexExt{}, hasPrimitiveTopologyListRestartExt{};
bool hasCustomBorderColorExt{}, hasShaderAtomicInt64Ext{}, hasShaderFloat16Int8Ext{}, hasShaderDemoteToHelperExt{}, hasVertexAttributeDivisorExt{}, hasProvokingVertexExt{}, hasPrimitiveTopologyListRestartExt{}, hasImagelessFramebuffersExt{};
bool supportsUniformBufferStandardLayout{}; // We require VK_KHR_uniform_buffer_standard_layout but assume it is implicitly supported even when not present
for (auto &extension : deviceExtensions) {
@ -36,6 +36,7 @@ namespace skyline::gpu {
EXT_SET("VK_EXT_provoking_vertex", hasProvokingVertexExt);
EXT_SET("VK_EXT_vertex_attribute_divisor", hasVertexAttributeDivisorExt);
EXT_SET("VK_KHR_push_descriptor", supportsPushDescriptors);
EXT_SET("VK_KHR_imageless_framebuffer", hasImagelessFramebuffersExt);
EXT_SET("VK_EXT_global_priority", supportsGlobalPriority);
EXT_SET("VK_EXT_shader_viewport_index_layer", supportsShaderViewportIndexLayer);
EXT_SET("VK_KHR_spirv_1_4", supportsSpirv14);
@ -120,6 +121,12 @@ namespace skyline::gpu {
enabledFeatures2.unlink<vk::PhysicalDevicePrimitiveTopologyListRestartFeaturesEXT>();
}
if (hasImagelessFramebuffersExt) {
FEAT_SET(vk::PhysicalDeviceImagelessFramebufferFeatures, imagelessFramebuffer, supportsImagelessFramebuffers)
} else {
enabledFeatures2.unlink<vk::PhysicalDeviceImagelessFramebufferFeatures>();
}
#undef FEAT_SET
if (supportsFloatControls)
@ -132,8 +139,8 @@ namespace skyline::gpu {
std::string TraitManager::Summary() {
return fmt::format(
"\n* Supports U8 Indices: {}\n* Supports Sampler Mirror Clamp To Edge: {}\n* Supports Sampler Reduction Mode: {}\n* Supports Custom Border Color (Without Format): {}\n* Supports Last Provoking Vertex: {}\n* Supports Logical Operations: {}\n* Supports Vertex Attribute Divisor: {}\n* Supports Vertex Attribute Zero Divisor: {}\n* Supports Push Descriptors: {}\n* Supports Global Priority: {}\n* Supports Multiple Viewports: {}\n* Supports Shader Viewport Index: {}\n* Supports SPIR-V 1.4: {}\n* Supports Shader Invocation Demotion: {}\n* Supports 16-bit FP: {}\n* Supports 8-bit Integers: {}\n* Supports 16-bit Integers: {}\n* Supports 64-bit Integers: {}\n* Supports Atomic 64-bit Integers: {}\n* Supports Floating Point Behavior Control: {}\n* Supports Image Read Without Format: {}\n* Supports List Primitive Topology Restart: {}\n* Supports Patch List Primitive Topology Restart: {}\n* Supports Subgroup Vote: {}\n* Subgroup Size: {}",
supportsUint8Indices, supportsSamplerMirrorClampToEdge, supportsSamplerReductionMode, supportsCustomBorderColor, supportsLastProvokingVertex, supportsLogicOp, supportsVertexAttributeDivisor, supportsVertexAttributeZeroDivisor, supportsPushDescriptors, supportsGlobalPriority, supportsMultipleViewports, supportsShaderViewportIndexLayer, supportsSpirv14, supportsShaderDemoteToHelper, supportsFloat16, supportsInt8, supportsInt16, supportsInt64, supportsAtomicInt64, supportsFloatControls, supportsImageReadWithoutFormat, supportsTopologyListRestart, supportsTopologyPatchListRestart, supportsSubgroupVote, subgroupSize
"\n* Supports U8 Indices: {}\n* Supports Sampler Mirror Clamp To Edge: {}\n* Supports Sampler Reduction Mode: {}\n* Supports Custom Border Color (Without Format): {}\n* Supports Last Provoking Vertex: {}\n* Supports Logical Operations: {}\n* Supports Vertex Attribute Divisor: {}\n* Supports Vertex Attribute Zero Divisor: {}\n* Supports Push Descriptors: {}\n* Supports Imageless Framebuffers: {}\n* Supports Global Priority: {}\n* Supports Multiple Viewports: {}\n* Supports Shader Viewport Index: {}\n* Supports SPIR-V 1.4: {}\n* Supports Shader Invocation Demotion: {}\n* Supports 16-bit FP: {}\n* Supports 8-bit Integers: {}\n* Supports 16-bit Integers: {}\n* Supports 64-bit Integers: {}\n* Supports Atomic 64-bit Integers: {}\n* Supports Floating Point Behavior Control: {}\n* Supports Image Read Without Format: {}\n* Supports List Primitive Topology Restart: {}\n* Supports Patch List Primitive Topology Restart: {}\n* Supports Subgroup Vote: {}\n* Subgroup Size: {}",
supportsUint8Indices, supportsSamplerMirrorClampToEdge, supportsSamplerReductionMode, supportsCustomBorderColor, supportsLastProvokingVertex, supportsLogicOp, supportsVertexAttributeDivisor, supportsVertexAttributeZeroDivisor, supportsPushDescriptors, supportsImagelessFramebuffers, supportsGlobalPriority, supportsMultipleViewports, supportsShaderViewportIndexLayer, supportsSpirv14, supportsShaderDemoteToHelper, supportsFloat16, supportsInt8, supportsInt16, supportsInt64, supportsAtomicInt64, supportsFloatControls, supportsImageReadWithoutFormat, supportsTopologyListRestart, supportsTopologyPatchListRestart, supportsSubgroupVote, subgroupSize
);
}

View File

@ -21,6 +21,7 @@ namespace skyline::gpu {
bool supportsVertexAttributeDivisor{}; //!< If the device supports a divisor for instance-rate vertex attributes (with VK_EXT_vertex_attribute_divisor)
bool supportsVertexAttributeZeroDivisor{}; //!< If the device supports a zero divisor for instance-rate vertex attributes (with VK_EXT_vertex_attribute_divisor)
bool supportsPushDescriptors{}; //!< If the device supports push descriptors (with VK_KHR_push_descriptor)
bool supportsImagelessFramebuffers{}; //!< If the device supports imageless framebuffers (with VK_KHR_imageless_framebuffer)
bool supportsGlobalPriority{}; //!< If the device supports global priorities for queues (with VK_EXT_global_priority)
bool supportsMultipleViewports{}; //!< If the device supports more than one viewport
bool supportsShaderViewportIndexLayer{}; //!< If the device supports retrieving the viewport index in shaders (with VK_EXT_shader_viewport_index_layer)
@ -79,7 +80,8 @@ namespace skyline::gpu {
vk::PhysicalDeviceUniformBufferStandardLayoutFeatures,
vk::PhysicalDeviceShaderDrawParametersFeatures,
vk::PhysicalDeviceProvokingVertexFeaturesEXT,
vk::PhysicalDevicePrimitiveTopologyListRestartFeaturesEXT>;
vk::PhysicalDevicePrimitiveTopologyListRestartFeaturesEXT,
vk::PhysicalDeviceImagelessFramebufferFeatures>;
TraitManager(const DeviceFeatures2 &deviceFeatures2, DeviceFeatures2 &enabledFeatures2, const std::vector<vk::ExtensionProperties> &deviceExtensions, std::vector<std::array<char, VK_MAX_EXTENSION_NAME_SIZE>> &enabledExtensions, const DeviceProperties2 &deviceProperties2);