From 7ef4959060fdaf9deec067f1c3675d073efc2816 Mon Sep 17 00:00:00 2001 From: PixelyIon Date: Sun, 24 Apr 2022 14:31:00 +0530 Subject: [PATCH] Implement Graphics Pipeline Cache Implements a cache for storing `VkPipeline` objects which are fairly expensive to create and doing so on a per-frame basis was rather wasteful and consumed a significant part of frametime. It should be noted that this is **not** compliant with the Vulkan specification and **will** break unless the driver supports a relaxed version of the Vulkan specification's Render Pass Compatibility clause. --- app/CMakeLists.txt | 1 + app/src/main/cpp/skyline/gpu.cpp | 7 +- app/src/main/cpp/skyline/gpu.h | 3 + .../gpu/cache/graphics_pipeline_cache.cpp | 357 ++++++++++++++++++ .../gpu/cache/graphics_pipeline_cache.h | 155 ++++++++ .../gpu/interconnect/graphics_context.h | 150 +++----- 6 files changed, 579 insertions(+), 94 deletions(-) create mode 100644 app/src/main/cpp/skyline/gpu/cache/graphics_pipeline_cache.cpp create mode 100644 app/src/main/cpp/skyline/gpu/cache/graphics_pipeline_cache.h diff --git a/app/CMakeLists.txt b/app/CMakeLists.txt index cf38e2d7..38d13892 100644 --- a/app/CMakeLists.txt +++ b/app/CMakeLists.txt @@ -169,6 +169,7 @@ add_library(skyline SHARED ${source_DIR}/skyline/gpu/buffer.cpp ${source_DIR}/skyline/gpu/presentation_engine.cpp ${source_DIR}/skyline/gpu/shader_manager.cpp + ${source_DIR}/skyline/gpu/cache/graphics_pipeline_cache.cpp ${source_DIR}/skyline/gpu/interconnect/command_executor.cpp ${source_DIR}/skyline/gpu/interconnect/command_nodes.cpp ${source_DIR}/skyline/gpu/interconnect/conversion/quads.cpp diff --git a/app/src/main/cpp/skyline/gpu.cpp b/app/src/main/cpp/skyline/gpu.cpp index f395cd4d..ca6d5ec6 100644 --- a/app/src/main/cpp/skyline/gpu.cpp +++ b/app/src/main/cpp/skyline/gpu.cpp @@ -101,6 +101,10 @@ namespace skyline::gpu { IGNORE_VALIDATION("UNASSIGNED-CoreValidation-SwapchainPreTransform"); // We handle transformation via Android APIs directly IGNORE_VALIDATION("UNASSIGNED-GeneralParameterPerfWarn-SuboptimalSwapchain"); // Same as SwapchainPreTransform IGNORE_VALIDATION("UNASSIGNED-CoreValidation-DrawState-InvalidImageLayout"); // We utilize images as VK_IMAGE_LAYOUT_GENERAL rather than optimal layouts for operations + + /* Pipeline Cache isn't compliant with the Vulkan specification, it depends on driver support for a relaxed version of Vulkan specification's Render Pass Compatibility clause and this will result in validation errors regardless which we need to ignore */ + IGNORE_VALIDATION("VUID-vkCmdDrawIndexed-renderPass-02684"); + IGNORE_VALIDATION("VUID-vkCmdDrawIndexed-subpass-02685"); } #undef IGNORE_TYPE @@ -273,5 +277,6 @@ namespace skyline::gpu { texture(*this), buffer(*this), descriptor(*this), - shader(state, *this) {} + shader(state, *this), + graphicsPipelineCache(*this) {} } diff --git a/app/src/main/cpp/skyline/gpu.h b/app/src/main/cpp/skyline/gpu.h index 420a81b8..33c2268c 100644 --- a/app/src/main/cpp/skyline/gpu.h +++ b/app/src/main/cpp/skyline/gpu.h @@ -11,6 +11,7 @@ #include "gpu/buffer_manager.h" #include "gpu/descriptor_allocator.h" #include "gpu/shader_manager.h" +#include "gpu/cache/graphics_pipeline_cache.h" namespace skyline::gpu { static constexpr u32 VkApiVersion{VK_API_VERSION_1_1}; //!< The version of core Vulkan that we require @@ -45,6 +46,8 @@ namespace skyline::gpu { DescriptorAllocator descriptor; ShaderManager shader; + cache::GraphicsPipelineCache graphicsPipelineCache; + GPU(const DeviceState &state); }; } diff --git a/app/src/main/cpp/skyline/gpu/cache/graphics_pipeline_cache.cpp b/app/src/main/cpp/skyline/gpu/cache/graphics_pipeline_cache.cpp new file mode 100644 index 00000000..41e2d31b --- /dev/null +++ b/app/src/main/cpp/skyline/gpu/cache/graphics_pipeline_cache.cpp @@ -0,0 +1,357 @@ +// SPDX-License-Identifier: MPL-2.0 +// Copyright © 2022 Skyline Team and Contributors (https://github.com/skyline-emu/) + +#include +#include +#include "graphics_pipeline_cache.h" + +namespace skyline::gpu::cache { + GraphicsPipelineCache::GraphicsPipelineCache(GPU &gpu) : gpu(gpu), vkPipelineCache(gpu.vkDevice, vk::PipelineCacheCreateInfo{}) {} + + #define VEC_CPY(pointer, size) state.pointer, state.pointer + state.size + + GraphicsPipelineCache::PipelineCacheKey::PipelineCacheKey(const GraphicsPipelineCache::PipelineState &state) + : shaderStages(state.shaderStages.begin(), state.shaderStages.end()), + vertexState(state.vertexState), + vertexBindings(VEC_CPY(VertexInputState().pVertexBindingDescriptions, VertexInputState().vertexBindingDescriptionCount)), + vertexAttributes(VEC_CPY(VertexInputState().pVertexAttributeDescriptions, VertexInputState().vertexAttributeDescriptionCount)), + vertexDivisors(VEC_CPY(VertexDivisorState().pVertexBindingDivisors, VertexDivisorState().vertexBindingDivisorCount)), + inputAssemblyState(state.inputAssemblyState), + tessellationState(state.tessellationState), + viewportState(state.viewportState), + viewports(VEC_CPY(viewportState.pViewports, viewportState.viewportCount)), + scissors(VEC_CPY(viewportState.pScissors, viewportState.scissorCount)), + rasterizationState(state.rasterizationState), + multisampleState(state.multisampleState), + depthStencilState(state.depthStencilState), + colorBlendState(state.colorBlendState), + colorBlendAttachments(VEC_CPY(colorBlendState.pAttachments, colorBlendState.attachmentCount)) { + auto &vertexInputState{vertexState.get()}; + vertexInputState.pVertexBindingDescriptions = vertexBindings.data(); + vertexInputState.pVertexAttributeDescriptions = vertexAttributes.data(); + vertexState.get().pVertexBindingDivisors = vertexDivisors.data(); + + viewportState.pViewports = viewports.data(); + viewportState.pScissors = scissors.data(); + + colorBlendState.pAttachments = colorBlendAttachments.data(); + + for (auto &colorAttachment : state.colorAttachments) + colorAttachments.emplace_back(AttachmentMetadata{colorAttachment->format->vkFormat, colorAttachment->texture->sampleCount}); + if (state.depthStencilAttachment) + depthStencilAttachment.emplace(AttachmentMetadata{state.depthStencilAttachment->format->vkFormat, state.depthStencilAttachment->texture->sampleCount}); + } + + #undef VEC_CPY + + #define HASH(x) boost::hash_combine(hash, x) + + template + size_t HashCommonPipelineState(const T &key, size_t hash = 0) { + HASH(key.shaderStages.size()); + for (const auto &stage : key.shaderStages) { + HASH(stage.stage); + HASH(static_cast(stage.module)); + } + + auto &vertexInputState{key.VertexInputState()}; + HASH(vertexInputState.vertexBindingDescriptionCount); + HASH(vertexInputState.vertexAttributeDescriptionCount); + + if (key.vertexState.template isLinked()) + HASH(key.VertexDivisorState().vertexBindingDivisorCount); + + HASH(key.inputAssemblyState.topology); + HASH(key.inputAssemblyState.primitiveRestartEnable); + + HASH(key.tessellationState.patchControlPoints); + + HASH(key.viewportState.viewportCount); + HASH(key.viewportState.scissorCount); + + auto &rasterizationState{key.RasterizationState()}; + HASH(rasterizationState.depthClampEnable); + HASH(rasterizationState.rasterizerDiscardEnable); + HASH(rasterizationState.polygonMode); + HASH(std::hash{}(rasterizationState.cullMode)); + HASH(rasterizationState.frontFace); + HASH(rasterizationState.depthBiasEnable); + HASH(rasterizationState.depthBiasConstantFactor); + HASH(rasterizationState.depthBiasClamp); + HASH(rasterizationState.depthBiasSlopeFactor); + HASH(rasterizationState.lineWidth); + + if (key.rasterizationState.template isLinked()) + HASH(key.ProvokingVertexState().provokingVertexMode); + + HASH(key.multisampleState.rasterizationSamples); + HASH(key.multisampleState.sampleShadingEnable); + HASH(key.multisampleState.minSampleShading); + HASH(key.multisampleState.alphaToCoverageEnable); + HASH(key.multisampleState.alphaToOneEnable); + + HASH(key.depthStencilState.depthTestEnable); + HASH(key.depthStencilState.depthWriteEnable); + HASH(key.depthStencilState.depthCompareOp); + HASH(key.depthStencilState.depthBoundsTestEnable); + HASH(key.depthStencilState.stencilTestEnable); + HASH(key.depthStencilState.front.compareOp); + HASH(key.depthStencilState.front.failOp); + HASH(key.depthStencilState.front.passOp); + HASH(key.depthStencilState.front.depthFailOp); + HASH(key.depthStencilState.front.compareMask); + HASH(key.depthStencilState.front.writeMask); + HASH(key.depthStencilState.front.reference); + HASH(key.depthStencilState.back.compareOp); + HASH(key.depthStencilState.back.failOp); + HASH(key.depthStencilState.back.passOp); + HASH(key.depthStencilState.back.depthFailOp); + HASH(key.depthStencilState.back.compareMask); + HASH(key.depthStencilState.back.writeMask); + HASH(key.depthStencilState.back.reference); + HASH(key.depthStencilState.minDepthBounds); + HASH(key.depthStencilState.maxDepthBounds); + + HASH(key.colorBlendState.logicOpEnable); + HASH(key.colorBlendState.logicOp); + HASH(key.colorBlendState.attachmentCount); + + return hash; + } + + size_t GraphicsPipelineCache::PipelineStateHash::operator()(const GraphicsPipelineCache::PipelineState &key) const { + size_t hash{HashCommonPipelineState(key)}; + + HASH(key.colorAttachments.size()); + for (const auto &attachment : key.colorAttachments) { + HASH(attachment->format->vkFormat); + HASH(attachment->texture->sampleCount); + } + + HASH(key.depthStencilAttachment != nullptr); + if (key.depthStencilAttachment != nullptr) { + HASH(key.depthStencilAttachment->format->vkFormat); + HASH(key.depthStencilAttachment->texture->sampleCount); + } + + return hash; + } + + size_t GraphicsPipelineCache::PipelineStateHash::operator()(const GraphicsPipelineCache::PipelineCacheKey &key) const { + size_t hash{HashCommonPipelineState(key)}; + + HASH(key.colorAttachments.size()); + for (const auto &attachment : key.colorAttachments) { + HASH(attachment.format); + HASH(attachment.sampleCount); + } + + HASH(key.depthStencilAttachment.has_value()); + if (key.depthStencilAttachment) { + HASH(key.depthStencilAttachment->format); + HASH(key.depthStencilAttachment->sampleCount); + } + + return hash; + } + + #undef HASH + + bool GraphicsPipelineCache::PipelineCacheEqual::operator()(const GraphicsPipelineCache::PipelineCacheKey &lhs, const GraphicsPipelineCache::PipelineState &rhs) const { + #define RETF(condition) if (condition) { return false; } + #define KEYEQ(member) (lhs.member == rhs.member) + #define KEYNEQ(member) (lhs.member != rhs.member) + static constexpr auto NotEqual{[](auto pointer, auto size, auto pointer2, auto size2, auto equalFunction) -> bool { + return + size != size2 || + !std::equal(pointer, pointer + static_cast(size), pointer2, equalFunction); + }}; + #define CARREQ(pointer, size, func) NotEqual(lhs.pointer, lhs.size, rhs.pointer, rhs.size, [](decltype(*lhs.pointer) &lhs, decltype(*rhs.pointer) &rhs) { func }) // Note: typeof(*lhs/rhs.pointer) is required for clangd to resolve the parameter type correctly for autocomplete + #define ARREQ(pointer, size) CARREQ(pointer, size, { return lhs == rhs; }) + + RETF(CARREQ(shaderStages.begin(), shaderStages.size(), { + return KEYEQ(flags) && KEYEQ(stage) && KEYEQ(module) && std::strcmp(lhs.pName, rhs.pName) == 0; + // Note: We intentionally ignore specialization constants here + })) + + RETF(KEYNEQ(VertexInputState().flags) || + ARREQ(VertexInputState().pVertexBindingDescriptions, VertexInputState().vertexBindingDescriptionCount) || + ARREQ(VertexInputState().pVertexAttributeDescriptions, VertexInputState().vertexAttributeDescriptionCount) + ) + + RETF(KEYNEQ(vertexState.isLinked()) || + (lhs.vertexState.isLinked() && + ARREQ(VertexDivisorState().pVertexBindingDivisors, VertexDivisorState().vertexBindingDivisorCount) + ) + ) + + RETF(KEYNEQ(tessellationState.flags) || KEYNEQ(tessellationState.patchControlPoints)) + + RETF(KEYNEQ(inputAssemblyState.flags) || KEYNEQ(inputAssemblyState.topology) || KEYNEQ(inputAssemblyState.primitiveRestartEnable)) + + RETF(KEYNEQ(viewportState.flags) || + ARREQ(viewportState.pViewports, viewportState.viewportCount) || + ARREQ(viewportState.pScissors, viewportState.scissorCount) + ) + + RETF(KEYNEQ(RasterizationState().flags) || + KEYNEQ(RasterizationState().depthClampEnable) || + KEYNEQ(RasterizationState().rasterizerDiscardEnable) || + KEYNEQ(RasterizationState().polygonMode) || + KEYNEQ(RasterizationState().cullMode) || + KEYNEQ(RasterizationState().frontFace) || + KEYNEQ(RasterizationState().depthBiasEnable) || + KEYNEQ(RasterizationState().depthBiasConstantFactor) || + KEYNEQ(RasterizationState().depthBiasClamp) || + KEYNEQ(RasterizationState().depthBiasSlopeFactor) || + KEYNEQ(RasterizationState().lineWidth) + ) + + RETF(KEYNEQ(rasterizationState.isLinked()) || + (lhs.rasterizationState.isLinked() && + KEYNEQ(ProvokingVertexState().provokingVertexMode) + ) + ) + + RETF(KEYNEQ(multisampleState.flags) || + KEYNEQ(multisampleState.rasterizationSamples) || + KEYNEQ(multisampleState.sampleShadingEnable) || + KEYNEQ(multisampleState.minSampleShading) || + KEYNEQ(multisampleState.alphaToCoverageEnable) || + KEYNEQ(multisampleState.alphaToOneEnable) + ) + + RETF(KEYNEQ(depthStencilState.flags) || + KEYNEQ(depthStencilState.depthTestEnable) || + KEYNEQ(depthStencilState.depthWriteEnable) || + KEYNEQ(depthStencilState.depthCompareOp) || + KEYNEQ(depthStencilState.depthBoundsTestEnable) || + KEYNEQ(depthStencilState.stencilTestEnable) || + KEYNEQ(depthStencilState.front) || + KEYNEQ(depthStencilState.back) || + KEYNEQ(depthStencilState.minDepthBounds) || + KEYNEQ(depthStencilState.maxDepthBounds) + ) + + RETF(KEYNEQ(colorBlendState.flags) || + KEYNEQ(colorBlendState.logicOpEnable) || + KEYNEQ(colorBlendState.logicOp) || + ARREQ(colorBlendState.pAttachments, colorBlendState.attachmentCount) || + KEYNEQ(colorBlendState.blendConstants) + ) + + RETF(CARREQ(colorAttachments.begin(), colorAttachments.size(), { + return lhs.format == rhs->format->vkFormat && lhs.sampleCount == rhs->texture->sampleCount; + })) + + RETF(lhs.depthStencilAttachment.has_value() != (rhs.depthStencilAttachment != nullptr) || + (lhs.depthStencilAttachment.has_value() && + lhs.depthStencilAttachment->format != rhs.depthStencilAttachment->format->vkFormat && + lhs.depthStencilAttachment->sampleCount != rhs.depthStencilAttachment->texture->sampleCount + ) + ) + + #undef ARREQ + #undef CARREQ + #undef KEYNEQ + #undef KEYEQ + #undef RETF + + return true; + } + + bool GraphicsPipelineCache::PipelineCacheEqual::operator()(const PipelineCacheKey &lhs, const PipelineCacheKey &rhs) const { + return lhs == rhs; + } + + GraphicsPipelineCache::PipelineCacheEntry::PipelineCacheEntry(vk::raii::DescriptorSetLayout &&descriptorSetLayout, vk::raii::PipelineLayout &&pipelineLayout, vk::raii::Pipeline &&pipeline) : descriptorSetLayout(std::move(descriptorSetLayout)), pipelineLayout(std::move(pipelineLayout)), pipeline(std::move(pipeline)) {} + + GraphicsPipelineCache::CompiledPipeline::CompiledPipeline(const PipelineCacheEntry &entry) : descriptorSetLayout(*entry.descriptorSetLayout), pipelineLayout(*entry.pipelineLayout), pipeline(*entry.pipeline) {} + + GraphicsPipelineCache::CompiledPipeline GraphicsPipelineCache::GetCompiledPipeline(const PipelineState &state, span layoutBindings) { + std::unique_lock lock(mutex); + + auto it{pipelineCache.find(state)}; + if (it != pipelineCache.end()) + return CompiledPipeline{it->second}; + + lock.unlock(); + + vk::raii::DescriptorSetLayout descriptorSetLayout{gpu.vkDevice, vk::DescriptorSetLayoutCreateInfo{ + .flags = gpu.traits.supportsPushDescriptors ? vk::DescriptorSetLayoutCreateFlagBits::ePushDescriptorKHR : vk::DescriptorSetLayoutCreateFlags{}, + .pBindings = layoutBindings.data(), + .bindingCount = static_cast(layoutBindings.size()), + }}; + + vk::raii::PipelineLayout pipelineLayout{gpu.vkDevice, vk::PipelineLayoutCreateInfo{ + .pSetLayouts = &*descriptorSetLayout, + .setLayoutCount = 1, + }}; + + boost::container::small_vector attachmentDescriptions; + boost::container::small_vector attachmentReferences; + + auto pushAttachment{[&](const TextureView &view) { + attachmentDescriptions.push_back(vk::AttachmentDescription{ + .format = view.format->vkFormat, + .samples = view.texture->sampleCount, + .loadOp = vk::AttachmentLoadOp::eLoad, + .storeOp = vk::AttachmentStoreOp::eStore, + .stencilLoadOp = vk::AttachmentLoadOp::eLoad, + .stencilStoreOp = vk::AttachmentStoreOp::eStore, + .initialLayout = view.texture->layout, + .finalLayout = view.texture->layout, + }); + attachmentReferences.push_back(vk::AttachmentReference{ + .attachment = static_cast(attachmentDescriptions.size() - 1), + .layout = view.texture->layout, + }); + }}; + + vk::SubpassDescription subpassDescription{ + .pipelineBindPoint = vk::PipelineBindPoint::eGraphics, + }; + + for (auto &colorAttachment : state.colorAttachments) + pushAttachment(*colorAttachment); + + if (state.depthStencilAttachment) { + pushAttachment(*state.depthStencilAttachment); + + subpassDescription.pColorAttachments = attachmentReferences.data(); + subpassDescription.colorAttachmentCount = static_cast(attachmentReferences.size() - 1); + subpassDescription.pDepthStencilAttachment = &attachmentReferences.back(); + } else { + subpassDescription.pColorAttachments = attachmentReferences.data(); + subpassDescription.colorAttachmentCount = static_cast(attachmentReferences.size()); + } + + vk::raii::RenderPass renderPass{gpu.vkDevice, vk::RenderPassCreateInfo{ + .attachmentCount = static_cast(attachmentDescriptions.size()), + .pAttachments = attachmentDescriptions.data(), + .subpassCount = 1, + .pSubpasses = &subpassDescription, + }}; + + auto pipeline{gpu.vkDevice.createGraphicsPipeline(vkPipelineCache, vk::GraphicsPipelineCreateInfo{ + .pStages = state.shaderStages.data(), + .stageCount = static_cast(state.shaderStages.size()), + .pVertexInputState = &state.vertexState.get(), + .pInputAssemblyState = &state.inputAssemblyState, + .pViewportState = &state.viewportState, + .pRasterizationState = &state.rasterizationState.get(), + .pMultisampleState = &state.multisampleState, + .pDepthStencilState = &state.depthStencilState, + .pColorBlendState = &state.colorBlendState, + .pDynamicState = nullptr, + .layout = *pipelineLayout, + .renderPass = *renderPass, + .subpass = 0, + })}; + + lock.lock(); + + auto pipelineEntryIt{pipelineCache.try_emplace(PipelineCacheKey{state}, std::move(descriptorSetLayout), std::move(pipelineLayout), std::move(pipeline))}; + return CompiledPipeline{pipelineEntryIt.first->second}; + } +} diff --git a/app/src/main/cpp/skyline/gpu/cache/graphics_pipeline_cache.h b/app/src/main/cpp/skyline/gpu/cache/graphics_pipeline_cache.h new file mode 100644 index 00000000..a08708f0 --- /dev/null +++ b/app/src/main/cpp/skyline/gpu/cache/graphics_pipeline_cache.h @@ -0,0 +1,155 @@ +// SPDX-License-Identifier: MPL-2.0 +// Copyright © 2022 Skyline Team and Contributors (https://github.com/skyline-emu/) + +#pragma once + +#include + +namespace skyline::gpu::cache { + /** + * @brief A cache for all Vulkan graphics pipelines objects used by the GPU to avoid costly re-creation + * @note The cache is **not** compliant with Vulkan specification's Render Pass Compatibility clause when used with multi-subpass Render Passes but certain drivers may support a more relaxed version of this clause in practice which may allow it to be used with multi-subpass Render Passes + */ + class GraphicsPipelineCache { + public: + /** + * @brief All unique state required to compile a graphics pipeline as references + */ + struct PipelineState { + span shaderStages; + vk::StructureChain &vertexState; + vk::PipelineInputAssemblyStateCreateInfo &inputAssemblyState; + vk::PipelineTessellationStateCreateInfo &tessellationState; + vk::PipelineViewportStateCreateInfo &viewportState; + vk::StructureChain &rasterizationState; + vk::PipelineMultisampleStateCreateInfo &multisampleState; + vk::PipelineDepthStencilStateCreateInfo &depthStencilState; + vk::PipelineColorBlendStateCreateInfo &colorBlendState; + + span colorAttachments; //!< All color attachments in the subpass of this pipeline + TextureView *depthStencilAttachment; //!< A nullable pointer to the depth/stencil attachment in the subpass of this pipeline + + constexpr const vk::PipelineVertexInputStateCreateInfo &VertexInputState() const { + return vertexState.get(); + } + + constexpr const vk::PipelineVertexInputDivisorStateCreateInfoEXT &VertexDivisorState() const { + return vertexState.get(); + } + + constexpr const vk::PipelineRasterizationStateCreateInfo &RasterizationState() const { + return rasterizationState.get(); + } + + constexpr const vk::PipelineRasterizationProvokingVertexStateCreateInfoEXT &ProvokingVertexState() const { + return rasterizationState.get(); + } + }; + + private: + /** + * @brief All unique metadata a single attachment for a compatible pipeline according to Render Pass Compatibility clause in the Vulkan specification + * @url https://www.khronos.org/registry/vulkan/specs/1.3-extensions/html/vkspec.html#renderpass-compatibility + * @url https://www.khronos.org/registry/vulkan/specs/1.3-extensions/man/html/VkAttachmentDescription.html + */ + struct AttachmentMetadata { + vk::Format format; + vk::SampleCountFlagBits sampleCount; + + bool operator==(const AttachmentMetadata &rhs) const = default; + }; + + /** + * @brief All data in PipelineState in value form to allow cheap heterogenous lookups with reference types while still storing a value-based key in the map + */ + struct PipelineCacheKey { + std::vector shaderStages; + vk::StructureChain vertexState; + std::vector vertexBindings; + std::vector vertexAttributes; + std::vector vertexDivisors; + vk::PipelineInputAssemblyStateCreateInfo inputAssemblyState; + vk::PipelineTessellationStateCreateInfo tessellationState; + vk::PipelineViewportStateCreateInfo viewportState; + std::vector viewports; + std::vector scissors; + vk::StructureChain rasterizationState; + vk::PipelineMultisampleStateCreateInfo multisampleState; + vk::PipelineDepthStencilStateCreateInfo depthStencilState; + vk::PipelineColorBlendStateCreateInfo colorBlendState; + std::vector colorBlendAttachments; + + std::vector colorAttachments; + std::optional depthStencilAttachment; + + PipelineCacheKey(const PipelineState& state); + + bool operator==(const PipelineCacheKey& other) const = default; + + constexpr const vk::PipelineVertexInputStateCreateInfo &VertexInputState() const { + return vertexState.get(); + } + + constexpr const vk::PipelineVertexInputDivisorStateCreateInfoEXT &VertexDivisorState() const { + return vertexState.get(); + } + + constexpr const vk::PipelineRasterizationStateCreateInfo &RasterizationState() const { + return rasterizationState.get(); + } + + constexpr const vk::PipelineRasterizationProvokingVertexStateCreateInfoEXT &ProvokingVertexState() const { + return rasterizationState.get(); + } + }; + + private: + GPU &gpu; + std::mutex mutex; //!< Synchronizes accesses to the pipeline cache + vk::raii::PipelineCache vkPipelineCache; //!< A Vulkan Pipeline Cache which stores all unique graphics pipelines + + struct PipelineStateHash { + using is_transparent = std::true_type; + + size_t operator()(const PipelineState &key) const; + + size_t operator()(const PipelineCacheKey &key) const; + }; + + struct PipelineCacheEqual { + using is_transparent = std::true_type; + + bool operator()(const PipelineCacheKey &lhs, const PipelineState &rhs) const; + + bool operator()(const PipelineCacheKey &lhs, const PipelineCacheKey &rhs) const; + }; + + struct PipelineCacheEntry { + vk::raii::DescriptorSetLayout descriptorSetLayout; + vk::raii::PipelineLayout pipelineLayout; + vk::raii::Pipeline pipeline; + + PipelineCacheEntry(vk::raii::DescriptorSetLayout&& descriptorSetLayout, vk::raii::PipelineLayout &&layout, vk::raii::Pipeline &&pipeline); + }; + + std::unordered_map pipelineCache; + + public: + GraphicsPipelineCache(GPU &gpu); + + struct CompiledPipeline { + vk::DescriptorSetLayout descriptorSetLayout; + vk::PipelineLayout pipelineLayout; + vk::Pipeline pipeline; + + CompiledPipeline(const PipelineCacheEntry &entry); + }; + + /** + * @note All attachments in the PipelineState **must** be locked prior to calling this function + * @note Shader specializiation constants are **not** supported and will result in UB + * @note Input/Resolve attachments are **not** supported and using them with the supplied pipeline will result in UB + */ + CompiledPipeline GetCompiledPipeline(const PipelineState& state, span layoutBindings); + }; +} diff --git a/app/src/main/cpp/skyline/gpu/interconnect/graphics_context.h b/app/src/main/cpp/skyline/gpu/interconnect/graphics_context.h index b3566ada..c1e60148 100644 --- a/app/src/main/cpp/skyline/gpu/interconnect/graphics_context.h +++ b/app/src/main/cpp/skyline/gpu/interconnect/graphics_context.h @@ -49,7 +49,7 @@ namespace skyline::gpu::interconnect { static_assert(sizeof(IOVA) == sizeof(u64)); public: - GraphicsContext(GPU &gpu, soc::gm20b::ChannelContext &channelCtx, gpu::interconnect::CommandExecutor &executor) : gpu(gpu), channelCtx(channelCtx), executor(executor), pipelineCache(gpu.vkDevice, vk::PipelineCacheCreateInfo{}) { + GraphicsContext(GPU &gpu, soc::gm20b::ChannelContext &channelCtx, gpu::interconnect::CommandExecutor &executor) : gpu(gpu), channelCtx(channelCtx), executor(executor) { scissors.fill(DefaultScissor); u32 bindingIndex{}; @@ -849,7 +849,7 @@ namespace skyline::gpu::interconnect { struct ShaderProgramState { boost::container::static_vector shaderModules; //!< Shader modules for every pipeline stage boost::container::static_vector shaderStages; //!< Shader modules for every pipeline stage - vk::raii::DescriptorSetLayout descriptorSetLayout; //!< The descriptor set layout for the pipeline (Only valid when `activeShaderStagesInfoCount` is non-zero) + std::vector descriptorSetBindings; //!< The descriptor set layout for the pipeline (Only valid when `activeShaderStagesInfoCount` is non-zero) struct DescriptorSetWrites { std::vector writes; //!< The descriptor set writes for the pipeline @@ -1169,11 +1169,7 @@ namespace skyline::gpu::interconnect { return { std::move(shaderModules), std::move(shaderStages), - vk::raii::DescriptorSetLayout(gpu.vkDevice, vk::DescriptorSetLayoutCreateInfo{ - .flags = gpu.traits.supportsPushDescriptors ? vk::DescriptorSetLayoutCreateFlagBits::ePushDescriptorKHR : vk::DescriptorSetLayoutCreateFlags{}, - .pBindings = layoutBindings.data(), - .bindingCount = static_cast(layoutBindings.size()), - }), + {layoutBindings.begin(), layoutBindings.end()}, std::move(descriptorSetWrites), }; } @@ -2705,25 +2701,12 @@ namespace skyline::gpu::interconnect { }; /* Draws */ - private: - vk::raii::PipelineCache pipelineCache; - public: template void Draw(u32 count, u32 first, i32 vertexOffset = 0) { ValidatePrimitiveRestartState(); - // Shader + Binding Setup - auto programState{CompileShaderProgramState()}; - auto descriptorSet{gpu.descriptor.AllocateSet(*programState.descriptorSetLayout)}; - for (auto &descriptorSetWrite : **programState.descriptorSetWrites) - descriptorSetWrite.dstSet = descriptorSet; - - vk::raii::PipelineLayout pipelineLayout(gpu.vkDevice, vk::PipelineLayoutCreateInfo{ - .pSetLayouts = &*programState.descriptorSetLayout, - .setLayoutCount = 1, - }); - + // Index Buffer Setup struct BoundIndexBuffer { vk::Buffer handle{}; vk::DeviceSize offset{}; @@ -2815,7 +2798,7 @@ namespace skyline::gpu::interconnect { } } - boost::container::static_vector blendAttachmentStates(blendState.pAttachments, blendState.pAttachments + activeColorRenderTargets.size()); + blendState.attachmentCount = static_cast(activeColorRenderTargets.size()); // Depth/Stencil Render Target Setup auto depthRenderTargetView{GetDepthRenderTarget()}; @@ -2825,77 +2808,59 @@ namespace skyline::gpu::interconnect { executor.AttachTexture(depthRenderTargetView); } + // Pipeline Creation + vk::StructureChain vertexState{ + vk::PipelineVertexInputStateCreateInfo{ + .pVertexBindingDescriptions = vertexBindingDescriptions.data(), + .vertexBindingDescriptionCount = static_cast(vertexBindingDescriptions.size()), + .pVertexAttributeDescriptions = vertexAttributesDescriptions.data(), + .vertexAttributeDescriptionCount = static_cast(vertexAttributesDescriptions.size()), + }, vk::PipelineVertexInputDivisorStateCreateInfoEXT{ + .pVertexBindingDivisors = vertexBindingDivisorsDescriptions.data(), + .vertexBindingDivisorCount = static_cast(vertexBindingDivisorsDescriptions.size()), + } + }; + + if (!gpu.traits.supportsVertexAttributeDivisor || vertexBindingDivisorsDescriptions.empty()) + vertexState.unlink(); + + bool multiViewport{gpu.traits.supportsMultipleViewports}; + vk::PipelineViewportStateCreateInfo viewportState{ + .pViewports = viewports.data(), + .viewportCount = static_cast(multiViewport ? maxwell3d::ViewportCount : 1), + .pScissors = scissors.data(), + .scissorCount = static_cast(multiViewport ? maxwell3d::ViewportCount : 1), + }; + + auto programState{CompileShaderProgramState()}; + auto compiledPipeline{gpu.graphicsPipelineCache.GetCompiledPipeline(cache::GraphicsPipelineCache::PipelineState{ + .shaderStages = programState.shaderStages, + .vertexState = vertexState, + .inputAssemblyState = inputAssemblyState, + .tessellationState = tessellationState, + .viewportState = viewportState, + .rasterizationState = rasterizerState, + .multisampleState = multisampleState, + .depthStencilState = depthState, + .colorBlendState = blendState, + .colorAttachments = activeColorRenderTargets, + .depthStencilAttachment = depthRenderTargetView, + }, programState.descriptorSetBindings)}; + // Draw Persistent Storage - struct DrawStorage { - vk::raii::DescriptorSetLayout descriptorSetLayout; + struct DrawStorage : FenceCycleDependency { std::unique_ptr descriptorSetWrites; - vk::raii::PipelineLayout pipelineLayout; + vk::DescriptorSetLayout descriptorSetLayout; + vk::PipelineLayout pipelineLayout; + vk::Pipeline pipeline; - DrawStorage(vk::raii::DescriptorSetLayout &&descriptorSetLayout, std::unique_ptr &&descriptorSetWrites, vk::raii::PipelineLayout &&pipelineLayout) : descriptorSetLayout(std::move(descriptorSetLayout)), descriptorSetWrites(std::move(descriptorSetWrites)), pipelineLayout(std::move(pipelineLayout)) {} + DrawStorage(std::unique_ptr &&descriptorSetWrites, vk::DescriptorSetLayout descriptorSetLayout, vk::PipelineLayout pipelineLayout, vk::Pipeline pipeline) : descriptorSetWrites(std::move(descriptorSetWrites)), descriptorSetLayout(descriptorSetLayout), pipelineLayout(pipelineLayout), pipeline(pipeline) {} }; - auto drawStorage{std::make_shared(std::move(programState.descriptorSetLayout), std::move(programState.descriptorSetWrites), std::move(pipelineLayout))}; - - // Command Buffer Persistent Storage - struct FenceStorage : FenceCycleDependency { - std::optional pipeline; - DescriptorAllocator::ActiveDescriptorSet descriptorSet; - std::shared_ptr drawStorage{}; - - FenceStorage(DescriptorAllocator::ActiveDescriptorSet &&descriptorSet) : descriptorSet(std::move(descriptorSet)) {} - }; - - auto fenceStorage{std::make_shared(std::move(descriptorSet))}; + auto drawStorage{std::make_shared(std::move(programState.descriptorSetWrites), compiledPipeline.descriptorSetLayout, compiledPipeline.pipelineLayout, compiledPipeline.pipeline)}; // Submit Draw - executor.AddSubpass([=, &vkDevice = gpu.vkDevice, shaderModules = programState.shaderModules, shaderStages = programState.shaderStages, inputAssemblyState = inputAssemblyState, multiViewport = gpu.traits.supportsMultipleViewports, viewports = viewports, scissors = scissors, rasterizerState = rasterizerState, multisampleState = multisampleState, depthState = depthState, blendState = blendState, drawStorage = std::move(drawStorage), fenceStorage = std::move(fenceStorage), supportsVertexAttributeDivisor = gpu.traits.supportsVertexAttributeDivisor, supportsPushDescriptors = gpu.traits.supportsPushDescriptors, pipelineCache = *pipelineCache](vk::raii::CommandBuffer &commandBuffer, const std::shared_ptr &cycle, GPU &, vk::RenderPass renderPass, u32 subpassIndex) mutable { - vk::StructureChain vertexState{ - vk::PipelineVertexInputStateCreateInfo{ - .pVertexBindingDescriptions = vertexBindingDescriptions.data(), - .vertexBindingDescriptionCount = static_cast(vertexBindingDescriptions.size()), - .pVertexAttributeDescriptions = vertexAttributesDescriptions.data(), - .vertexAttributeDescriptionCount = static_cast(vertexAttributesDescriptions.size()), - }, vk::PipelineVertexInputDivisorStateCreateInfoEXT{ - .pVertexBindingDivisors = vertexBindingDivisorsDescriptions.data(), - .vertexBindingDivisorCount = static_cast(vertexBindingDivisorsDescriptions.size()), - } - }; - - if (!supportsVertexAttributeDivisor || vertexBindingDivisorsDescriptions.empty()) - vertexState.unlink(); - - vk::PipelineViewportStateCreateInfo viewportState{ - .pViewports = viewports.data(), - .viewportCount = static_cast(multiViewport ? maxwell3d::ViewportCount : 1), - .pScissors = scissors.data(), - .scissorCount = static_cast(multiViewport ? maxwell3d::ViewportCount : 1), - }; - - blendState.pAttachments = blendAttachmentStates.data(); - blendState.attachmentCount = static_cast(blendAttachmentStates.size()); - - vk::GraphicsPipelineCreateInfo pipelineCreateInfo{ - .pStages = shaderStages.data(), - .stageCount = static_cast(shaderStages.size()), - .pVertexInputState = &vertexState.get(), - .pInputAssemblyState = &inputAssemblyState, - .pTessellationState = &tessellationState, - .pViewportState = &viewportState, - .pRasterizationState = &rasterizerState.get(), - .pMultisampleState = &multisampleState, - .pDepthStencilState = &depthState, - .pColorBlendState = &blendState, - .pDynamicState = nullptr, - .layout = *drawStorage->pipelineLayout, - .renderPass = renderPass, - .subpass = subpassIndex, - }; - - auto pipeline{(*vkDevice).createGraphicsPipeline(pipelineCache, pipelineCreateInfo, nullptr, *vkDevice.getDispatcher())}; - if (pipeline.result != vk::Result::eSuccess) - vk::throwResultException(pipeline.result, __builtin_FUNCTION()); - - commandBuffer.bindPipeline(vk::PipelineBindPoint::eGraphics, pipeline.value); + executor.AddSubpass([=, drawStorage = std::move(drawStorage), &vkDevice = gpu.vkDevice, supportsPushDescriptors = gpu.traits.supportsPushDescriptors](vk::raii::CommandBuffer &commandBuffer, const std::shared_ptr &cycle, GPU &, vk::RenderPass renderPass, u32 subpassIndex) mutable { auto &vertexBufferHandles{boundVertexBuffers->handles}; for (u32 bindingIndex{}; bindingIndex != vertexBufferHandles.size(); bindingIndex++) { @@ -2911,15 +2876,17 @@ namespace skyline::gpu::interconnect { } if (supportsPushDescriptors) { - commandBuffer.pushDescriptorSetKHR(vk::PipelineBindPoint::eGraphics, *drawStorage->pipelineLayout, 0, **drawStorage->descriptorSetWrites); + commandBuffer.pushDescriptorSetKHR(vk::PipelineBindPoint::eGraphics, drawStorage->pipelineLayout, 0, **drawStorage->descriptorSetWrites); } else { - auto descriptorSet{gpu.descriptor.AllocateSet(*drawStorage->descriptorSetLayout)}; + auto descriptorSet{gpu.descriptor.AllocateSet(drawStorage->descriptorSetLayout)}; for (auto &descriptorSetWrite : **drawStorage->descriptorSetWrites) descriptorSetWrite.dstSet = descriptorSet; vkDevice.updateDescriptorSets(**drawStorage->descriptorSetWrites, nullptr); - commandBuffer.bindDescriptorSets(vk::PipelineBindPoint::eGraphics, *drawStorage->pipelineLayout, 0, descriptorSet, nullptr); + commandBuffer.bindDescriptorSets(vk::PipelineBindPoint::eGraphics, drawStorage->pipelineLayout, 0, descriptorSet, nullptr); } + commandBuffer.bindPipeline(vk::PipelineBindPoint::eGraphics, drawStorage->pipeline); + if constexpr (IsIndexed) { commandBuffer.bindIndexBuffer(boundIndexBuffer->handle, boundIndexBuffer->offset, boundIndexBuffer->type); commandBuffer.drawIndexed(count, 1, first, vertexOffset, 0); @@ -2927,10 +2894,7 @@ namespace skyline::gpu::interconnect { commandBuffer.draw(count, 1, first, 0); } - fenceStorage->drawStorage = drawStorage; - fenceStorage->pipeline = vk::raii::Pipeline(vkDevice, pipeline.value); - - cycle->AttachObject(fenceStorage); + cycle->AttachObject(drawStorage); }, vk::Rect2D{ .extent = activeColorRenderTargets.empty() ? depthRenderTarget.guest.dimensions : activeColorRenderTargets.front()->texture->dimensions, }, {}, activeColorRenderTargets, depthRenderTargetView);