Implement Graphics Pipeline Cache

Implements a cache for storing `VkPipeline` objects which are fairly expensive to create and doing so on a per-frame basis was rather wasteful and consumed a significant part of frametime. It should be noted that this is **not** compliant with the Vulkan specification and **will** break unless the driver supports a relaxed version of the Vulkan specification's Render Pass Compatibility clause.
This commit is contained in:
PixelyIon 2022-04-24 14:31:00 +05:30
parent 50a8b69f7b
commit 7ef4959060
6 changed files with 579 additions and 94 deletions

View File

@ -169,6 +169,7 @@ add_library(skyline SHARED
${source_DIR}/skyline/gpu/buffer.cpp
${source_DIR}/skyline/gpu/presentation_engine.cpp
${source_DIR}/skyline/gpu/shader_manager.cpp
${source_DIR}/skyline/gpu/cache/graphics_pipeline_cache.cpp
${source_DIR}/skyline/gpu/interconnect/command_executor.cpp
${source_DIR}/skyline/gpu/interconnect/command_nodes.cpp
${source_DIR}/skyline/gpu/interconnect/conversion/quads.cpp

View File

@ -101,6 +101,10 @@ namespace skyline::gpu {
IGNORE_VALIDATION("UNASSIGNED-CoreValidation-SwapchainPreTransform"); // We handle transformation via Android APIs directly
IGNORE_VALIDATION("UNASSIGNED-GeneralParameterPerfWarn-SuboptimalSwapchain"); // Same as SwapchainPreTransform
IGNORE_VALIDATION("UNASSIGNED-CoreValidation-DrawState-InvalidImageLayout"); // We utilize images as VK_IMAGE_LAYOUT_GENERAL rather than optimal layouts for operations
/* Pipeline Cache isn't compliant with the Vulkan specification, it depends on driver support for a relaxed version of Vulkan specification's Render Pass Compatibility clause and this will result in validation errors regardless which we need to ignore */
IGNORE_VALIDATION("VUID-vkCmdDrawIndexed-renderPass-02684");
IGNORE_VALIDATION("VUID-vkCmdDrawIndexed-subpass-02685");
}
#undef IGNORE_TYPE
@ -273,5 +277,6 @@ namespace skyline::gpu {
texture(*this),
buffer(*this),
descriptor(*this),
shader(state, *this) {}
shader(state, *this),
graphicsPipelineCache(*this) {}
}

View File

@ -11,6 +11,7 @@
#include "gpu/buffer_manager.h"
#include "gpu/descriptor_allocator.h"
#include "gpu/shader_manager.h"
#include "gpu/cache/graphics_pipeline_cache.h"
namespace skyline::gpu {
static constexpr u32 VkApiVersion{VK_API_VERSION_1_1}; //!< The version of core Vulkan that we require
@ -45,6 +46,8 @@ namespace skyline::gpu {
DescriptorAllocator descriptor;
ShaderManager shader;
cache::GraphicsPipelineCache graphicsPipelineCache;
GPU(const DeviceState &state);
};
}

View File

@ -0,0 +1,357 @@
// SPDX-License-Identifier: MPL-2.0
// Copyright © 2022 Skyline Team and Contributors (https://github.com/skyline-emu/)
#include <boost/functional/hash.hpp>
#include <gpu.h>
#include "graphics_pipeline_cache.h"
namespace skyline::gpu::cache {
GraphicsPipelineCache::GraphicsPipelineCache(GPU &gpu) : gpu(gpu), vkPipelineCache(gpu.vkDevice, vk::PipelineCacheCreateInfo{}) {}
#define VEC_CPY(pointer, size) state.pointer, state.pointer + state.size
GraphicsPipelineCache::PipelineCacheKey::PipelineCacheKey(const GraphicsPipelineCache::PipelineState &state)
: shaderStages(state.shaderStages.begin(), state.shaderStages.end()),
vertexState(state.vertexState),
vertexBindings(VEC_CPY(VertexInputState().pVertexBindingDescriptions, VertexInputState().vertexBindingDescriptionCount)),
vertexAttributes(VEC_CPY(VertexInputState().pVertexAttributeDescriptions, VertexInputState().vertexAttributeDescriptionCount)),
vertexDivisors(VEC_CPY(VertexDivisorState().pVertexBindingDivisors, VertexDivisorState().vertexBindingDivisorCount)),
inputAssemblyState(state.inputAssemblyState),
tessellationState(state.tessellationState),
viewportState(state.viewportState),
viewports(VEC_CPY(viewportState.pViewports, viewportState.viewportCount)),
scissors(VEC_CPY(viewportState.pScissors, viewportState.scissorCount)),
rasterizationState(state.rasterizationState),
multisampleState(state.multisampleState),
depthStencilState(state.depthStencilState),
colorBlendState(state.colorBlendState),
colorBlendAttachments(VEC_CPY(colorBlendState.pAttachments, colorBlendState.attachmentCount)) {
auto &vertexInputState{vertexState.get<vk::PipelineVertexInputStateCreateInfo>()};
vertexInputState.pVertexBindingDescriptions = vertexBindings.data();
vertexInputState.pVertexAttributeDescriptions = vertexAttributes.data();
vertexState.get<vk::PipelineVertexInputDivisorStateCreateInfoEXT>().pVertexBindingDivisors = vertexDivisors.data();
viewportState.pViewports = viewports.data();
viewportState.pScissors = scissors.data();
colorBlendState.pAttachments = colorBlendAttachments.data();
for (auto &colorAttachment : state.colorAttachments)
colorAttachments.emplace_back(AttachmentMetadata{colorAttachment->format->vkFormat, colorAttachment->texture->sampleCount});
if (state.depthStencilAttachment)
depthStencilAttachment.emplace(AttachmentMetadata{state.depthStencilAttachment->format->vkFormat, state.depthStencilAttachment->texture->sampleCount});
}
#undef VEC_CPY
#define HASH(x) boost::hash_combine(hash, x)
template<typename T>
size_t HashCommonPipelineState(const T &key, size_t hash = 0) {
HASH(key.shaderStages.size());
for (const auto &stage : key.shaderStages) {
HASH(stage.stage);
HASH(static_cast<VkShaderModule>(stage.module));
}
auto &vertexInputState{key.VertexInputState()};
HASH(vertexInputState.vertexBindingDescriptionCount);
HASH(vertexInputState.vertexAttributeDescriptionCount);
if (key.vertexState.template isLinked<vk::PipelineVertexInputDivisorStateCreateInfoEXT>())
HASH(key.VertexDivisorState().vertexBindingDivisorCount);
HASH(key.inputAssemblyState.topology);
HASH(key.inputAssemblyState.primitiveRestartEnable);
HASH(key.tessellationState.patchControlPoints);
HASH(key.viewportState.viewportCount);
HASH(key.viewportState.scissorCount);
auto &rasterizationState{key.RasterizationState()};
HASH(rasterizationState.depthClampEnable);
HASH(rasterizationState.rasterizerDiscardEnable);
HASH(rasterizationState.polygonMode);
HASH(std::hash<vk::CullModeFlags>{}(rasterizationState.cullMode));
HASH(rasterizationState.frontFace);
HASH(rasterizationState.depthBiasEnable);
HASH(rasterizationState.depthBiasConstantFactor);
HASH(rasterizationState.depthBiasClamp);
HASH(rasterizationState.depthBiasSlopeFactor);
HASH(rasterizationState.lineWidth);
if (key.rasterizationState.template isLinked<vk::PipelineRasterizationProvokingVertexStateCreateInfoEXT>())
HASH(key.ProvokingVertexState().provokingVertexMode);
HASH(key.multisampleState.rasterizationSamples);
HASH(key.multisampleState.sampleShadingEnable);
HASH(key.multisampleState.minSampleShading);
HASH(key.multisampleState.alphaToCoverageEnable);
HASH(key.multisampleState.alphaToOneEnable);
HASH(key.depthStencilState.depthTestEnable);
HASH(key.depthStencilState.depthWriteEnable);
HASH(key.depthStencilState.depthCompareOp);
HASH(key.depthStencilState.depthBoundsTestEnable);
HASH(key.depthStencilState.stencilTestEnable);
HASH(key.depthStencilState.front.compareOp);
HASH(key.depthStencilState.front.failOp);
HASH(key.depthStencilState.front.passOp);
HASH(key.depthStencilState.front.depthFailOp);
HASH(key.depthStencilState.front.compareMask);
HASH(key.depthStencilState.front.writeMask);
HASH(key.depthStencilState.front.reference);
HASH(key.depthStencilState.back.compareOp);
HASH(key.depthStencilState.back.failOp);
HASH(key.depthStencilState.back.passOp);
HASH(key.depthStencilState.back.depthFailOp);
HASH(key.depthStencilState.back.compareMask);
HASH(key.depthStencilState.back.writeMask);
HASH(key.depthStencilState.back.reference);
HASH(key.depthStencilState.minDepthBounds);
HASH(key.depthStencilState.maxDepthBounds);
HASH(key.colorBlendState.logicOpEnable);
HASH(key.colorBlendState.logicOp);
HASH(key.colorBlendState.attachmentCount);
return hash;
}
size_t GraphicsPipelineCache::PipelineStateHash::operator()(const GraphicsPipelineCache::PipelineState &key) const {
size_t hash{HashCommonPipelineState(key)};
HASH(key.colorAttachments.size());
for (const auto &attachment : key.colorAttachments) {
HASH(attachment->format->vkFormat);
HASH(attachment->texture->sampleCount);
}
HASH(key.depthStencilAttachment != nullptr);
if (key.depthStencilAttachment != nullptr) {
HASH(key.depthStencilAttachment->format->vkFormat);
HASH(key.depthStencilAttachment->texture->sampleCount);
}
return hash;
}
size_t GraphicsPipelineCache::PipelineStateHash::operator()(const GraphicsPipelineCache::PipelineCacheKey &key) const {
size_t hash{HashCommonPipelineState(key)};
HASH(key.colorAttachments.size());
for (const auto &attachment : key.colorAttachments) {
HASH(attachment.format);
HASH(attachment.sampleCount);
}
HASH(key.depthStencilAttachment.has_value());
if (key.depthStencilAttachment) {
HASH(key.depthStencilAttachment->format);
HASH(key.depthStencilAttachment->sampleCount);
}
return hash;
}
#undef HASH
bool GraphicsPipelineCache::PipelineCacheEqual::operator()(const GraphicsPipelineCache::PipelineCacheKey &lhs, const GraphicsPipelineCache::PipelineState &rhs) const {
#define RETF(condition) if (condition) { return false; }
#define KEYEQ(member) (lhs.member == rhs.member)
#define KEYNEQ(member) (lhs.member != rhs.member)
static constexpr auto NotEqual{[](auto pointer, auto size, auto pointer2, auto size2, auto equalFunction) -> bool {
return
size != size2 ||
!std::equal(pointer, pointer + static_cast<ssize_t>(size), pointer2, equalFunction);
}};
#define CARREQ(pointer, size, func) NotEqual(lhs.pointer, lhs.size, rhs.pointer, rhs.size, [](decltype(*lhs.pointer) &lhs, decltype(*rhs.pointer) &rhs) { func }) // Note: typeof(*lhs/rhs.pointer) is required for clangd to resolve the parameter type correctly for autocomplete
#define ARREQ(pointer, size) CARREQ(pointer, size, { return lhs == rhs; })
RETF(CARREQ(shaderStages.begin(), shaderStages.size(), {
return KEYEQ(flags) && KEYEQ(stage) && KEYEQ(module) && std::strcmp(lhs.pName, rhs.pName) == 0;
// Note: We intentionally ignore specialization constants here
}))
RETF(KEYNEQ(VertexInputState().flags) ||
ARREQ(VertexInputState().pVertexBindingDescriptions, VertexInputState().vertexBindingDescriptionCount) ||
ARREQ(VertexInputState().pVertexAttributeDescriptions, VertexInputState().vertexAttributeDescriptionCount)
)
RETF(KEYNEQ(vertexState.isLinked<vk::PipelineVertexInputDivisorStateCreateInfoEXT>()) ||
(lhs.vertexState.isLinked<vk::PipelineVertexInputDivisorStateCreateInfoEXT>() &&
ARREQ(VertexDivisorState().pVertexBindingDivisors, VertexDivisorState().vertexBindingDivisorCount)
)
)
RETF(KEYNEQ(tessellationState.flags) || KEYNEQ(tessellationState.patchControlPoints))
RETF(KEYNEQ(inputAssemblyState.flags) || KEYNEQ(inputAssemblyState.topology) || KEYNEQ(inputAssemblyState.primitiveRestartEnable))
RETF(KEYNEQ(viewportState.flags) ||
ARREQ(viewportState.pViewports, viewportState.viewportCount) ||
ARREQ(viewportState.pScissors, viewportState.scissorCount)
)
RETF(KEYNEQ(RasterizationState().flags) ||
KEYNEQ(RasterizationState().depthClampEnable) ||
KEYNEQ(RasterizationState().rasterizerDiscardEnable) ||
KEYNEQ(RasterizationState().polygonMode) ||
KEYNEQ(RasterizationState().cullMode) ||
KEYNEQ(RasterizationState().frontFace) ||
KEYNEQ(RasterizationState().depthBiasEnable) ||
KEYNEQ(RasterizationState().depthBiasConstantFactor) ||
KEYNEQ(RasterizationState().depthBiasClamp) ||
KEYNEQ(RasterizationState().depthBiasSlopeFactor) ||
KEYNEQ(RasterizationState().lineWidth)
)
RETF(KEYNEQ(rasterizationState.isLinked<vk::PipelineRasterizationProvokingVertexStateCreateInfoEXT>()) ||
(lhs.rasterizationState.isLinked<vk::PipelineRasterizationProvokingVertexStateCreateInfoEXT>() &&
KEYNEQ(ProvokingVertexState().provokingVertexMode)
)
)
RETF(KEYNEQ(multisampleState.flags) ||
KEYNEQ(multisampleState.rasterizationSamples) ||
KEYNEQ(multisampleState.sampleShadingEnable) ||
KEYNEQ(multisampleState.minSampleShading) ||
KEYNEQ(multisampleState.alphaToCoverageEnable) ||
KEYNEQ(multisampleState.alphaToOneEnable)
)
RETF(KEYNEQ(depthStencilState.flags) ||
KEYNEQ(depthStencilState.depthTestEnable) ||
KEYNEQ(depthStencilState.depthWriteEnable) ||
KEYNEQ(depthStencilState.depthCompareOp) ||
KEYNEQ(depthStencilState.depthBoundsTestEnable) ||
KEYNEQ(depthStencilState.stencilTestEnable) ||
KEYNEQ(depthStencilState.front) ||
KEYNEQ(depthStencilState.back) ||
KEYNEQ(depthStencilState.minDepthBounds) ||
KEYNEQ(depthStencilState.maxDepthBounds)
)
RETF(KEYNEQ(colorBlendState.flags) ||
KEYNEQ(colorBlendState.logicOpEnable) ||
KEYNEQ(colorBlendState.logicOp) ||
ARREQ(colorBlendState.pAttachments, colorBlendState.attachmentCount) ||
KEYNEQ(colorBlendState.blendConstants)
)
RETF(CARREQ(colorAttachments.begin(), colorAttachments.size(), {
return lhs.format == rhs->format->vkFormat && lhs.sampleCount == rhs->texture->sampleCount;
}))
RETF(lhs.depthStencilAttachment.has_value() != (rhs.depthStencilAttachment != nullptr) ||
(lhs.depthStencilAttachment.has_value() &&
lhs.depthStencilAttachment->format != rhs.depthStencilAttachment->format->vkFormat &&
lhs.depthStencilAttachment->sampleCount != rhs.depthStencilAttachment->texture->sampleCount
)
)
#undef ARREQ
#undef CARREQ
#undef KEYNEQ
#undef KEYEQ
#undef RETF
return true;
}
bool GraphicsPipelineCache::PipelineCacheEqual::operator()(const PipelineCacheKey &lhs, const PipelineCacheKey &rhs) const {
return lhs == rhs;
}
GraphicsPipelineCache::PipelineCacheEntry::PipelineCacheEntry(vk::raii::DescriptorSetLayout &&descriptorSetLayout, vk::raii::PipelineLayout &&pipelineLayout, vk::raii::Pipeline &&pipeline) : descriptorSetLayout(std::move(descriptorSetLayout)), pipelineLayout(std::move(pipelineLayout)), pipeline(std::move(pipeline)) {}
GraphicsPipelineCache::CompiledPipeline::CompiledPipeline(const PipelineCacheEntry &entry) : descriptorSetLayout(*entry.descriptorSetLayout), pipelineLayout(*entry.pipelineLayout), pipeline(*entry.pipeline) {}
GraphicsPipelineCache::CompiledPipeline GraphicsPipelineCache::GetCompiledPipeline(const PipelineState &state, span<vk::DescriptorSetLayoutBinding> layoutBindings) {
std::unique_lock lock(mutex);
auto it{pipelineCache.find(state)};
if (it != pipelineCache.end())
return CompiledPipeline{it->second};
lock.unlock();
vk::raii::DescriptorSetLayout descriptorSetLayout{gpu.vkDevice, vk::DescriptorSetLayoutCreateInfo{
.flags = gpu.traits.supportsPushDescriptors ? vk::DescriptorSetLayoutCreateFlagBits::ePushDescriptorKHR : vk::DescriptorSetLayoutCreateFlags{},
.pBindings = layoutBindings.data(),
.bindingCount = static_cast<u32>(layoutBindings.size()),
}};
vk::raii::PipelineLayout pipelineLayout{gpu.vkDevice, vk::PipelineLayoutCreateInfo{
.pSetLayouts = &*descriptorSetLayout,
.setLayoutCount = 1,
}};
boost::container::small_vector<vk::AttachmentDescription, 8> attachmentDescriptions;
boost::container::small_vector<vk::AttachmentReference, 8> attachmentReferences;
auto pushAttachment{[&](const TextureView &view) {
attachmentDescriptions.push_back(vk::AttachmentDescription{
.format = view.format->vkFormat,
.samples = view.texture->sampleCount,
.loadOp = vk::AttachmentLoadOp::eLoad,
.storeOp = vk::AttachmentStoreOp::eStore,
.stencilLoadOp = vk::AttachmentLoadOp::eLoad,
.stencilStoreOp = vk::AttachmentStoreOp::eStore,
.initialLayout = view.texture->layout,
.finalLayout = view.texture->layout,
});
attachmentReferences.push_back(vk::AttachmentReference{
.attachment = static_cast<u32>(attachmentDescriptions.size() - 1),
.layout = view.texture->layout,
});
}};
vk::SubpassDescription subpassDescription{
.pipelineBindPoint = vk::PipelineBindPoint::eGraphics,
};
for (auto &colorAttachment : state.colorAttachments)
pushAttachment(*colorAttachment);
if (state.depthStencilAttachment) {
pushAttachment(*state.depthStencilAttachment);
subpassDescription.pColorAttachments = attachmentReferences.data();
subpassDescription.colorAttachmentCount = static_cast<u32>(attachmentReferences.size() - 1);
subpassDescription.pDepthStencilAttachment = &attachmentReferences.back();
} else {
subpassDescription.pColorAttachments = attachmentReferences.data();
subpassDescription.colorAttachmentCount = static_cast<u32>(attachmentReferences.size());
}
vk::raii::RenderPass renderPass{gpu.vkDevice, vk::RenderPassCreateInfo{
.attachmentCount = static_cast<u32>(attachmentDescriptions.size()),
.pAttachments = attachmentDescriptions.data(),
.subpassCount = 1,
.pSubpasses = &subpassDescription,
}};
auto pipeline{gpu.vkDevice.createGraphicsPipeline(vkPipelineCache, vk::GraphicsPipelineCreateInfo{
.pStages = state.shaderStages.data(),
.stageCount = static_cast<u32>(state.shaderStages.size()),
.pVertexInputState = &state.vertexState.get<vk::PipelineVertexInputStateCreateInfo>(),
.pInputAssemblyState = &state.inputAssemblyState,
.pViewportState = &state.viewportState,
.pRasterizationState = &state.rasterizationState.get<vk::PipelineRasterizationStateCreateInfo>(),
.pMultisampleState = &state.multisampleState,
.pDepthStencilState = &state.depthStencilState,
.pColorBlendState = &state.colorBlendState,
.pDynamicState = nullptr,
.layout = *pipelineLayout,
.renderPass = *renderPass,
.subpass = 0,
})};
lock.lock();
auto pipelineEntryIt{pipelineCache.try_emplace(PipelineCacheKey{state}, std::move(descriptorSetLayout), std::move(pipelineLayout), std::move(pipeline))};
return CompiledPipeline{pipelineEntryIt.first->second};
}
}

View File

@ -0,0 +1,155 @@
// SPDX-License-Identifier: MPL-2.0
// Copyright © 2022 Skyline Team and Contributors (https://github.com/skyline-emu/)
#pragma once
#include <gpu/texture/texture.h>
namespace skyline::gpu::cache {
/**
* @brief A cache for all Vulkan graphics pipelines objects used by the GPU to avoid costly re-creation
* @note The cache is **not** compliant with Vulkan specification's Render Pass Compatibility clause when used with multi-subpass Render Passes but certain drivers may support a more relaxed version of this clause in practice which may allow it to be used with multi-subpass Render Passes
*/
class GraphicsPipelineCache {
public:
/**
* @brief All unique state required to compile a graphics pipeline as references
*/
struct PipelineState {
span<vk::PipelineShaderStageCreateInfo> shaderStages;
vk::StructureChain<vk::PipelineVertexInputStateCreateInfo, vk::PipelineVertexInputDivisorStateCreateInfoEXT> &vertexState;
vk::PipelineInputAssemblyStateCreateInfo &inputAssemblyState;
vk::PipelineTessellationStateCreateInfo &tessellationState;
vk::PipelineViewportStateCreateInfo &viewportState;
vk::StructureChain<vk::PipelineRasterizationStateCreateInfo, vk::PipelineRasterizationProvokingVertexStateCreateInfoEXT> &rasterizationState;
vk::PipelineMultisampleStateCreateInfo &multisampleState;
vk::PipelineDepthStencilStateCreateInfo &depthStencilState;
vk::PipelineColorBlendStateCreateInfo &colorBlendState;
span<TextureView *> colorAttachments; //!< All color attachments in the subpass of this pipeline
TextureView *depthStencilAttachment; //!< A nullable pointer to the depth/stencil attachment in the subpass of this pipeline
constexpr const vk::PipelineVertexInputStateCreateInfo &VertexInputState() const {
return vertexState.get<vk::PipelineVertexInputStateCreateInfo>();
}
constexpr const vk::PipelineVertexInputDivisorStateCreateInfoEXT &VertexDivisorState() const {
return vertexState.get<vk::PipelineVertexInputDivisorStateCreateInfoEXT>();
}
constexpr const vk::PipelineRasterizationStateCreateInfo &RasterizationState() const {
return rasterizationState.get<vk::PipelineRasterizationStateCreateInfo>();
}
constexpr const vk::PipelineRasterizationProvokingVertexStateCreateInfoEXT &ProvokingVertexState() const {
return rasterizationState.get<vk::PipelineRasterizationProvokingVertexStateCreateInfoEXT>();
}
};
private:
/**
* @brief All unique metadata a single attachment for a compatible pipeline according to Render Pass Compatibility clause in the Vulkan specification
* @url https://www.khronos.org/registry/vulkan/specs/1.3-extensions/html/vkspec.html#renderpass-compatibility
* @url https://www.khronos.org/registry/vulkan/specs/1.3-extensions/man/html/VkAttachmentDescription.html
*/
struct AttachmentMetadata {
vk::Format format;
vk::SampleCountFlagBits sampleCount;
bool operator==(const AttachmentMetadata &rhs) const = default;
};
/**
* @brief All data in PipelineState in value form to allow cheap heterogenous lookups with reference types while still storing a value-based key in the map
*/
struct PipelineCacheKey {
std::vector<vk::PipelineShaderStageCreateInfo> shaderStages;
vk::StructureChain<vk::PipelineVertexInputStateCreateInfo, vk::PipelineVertexInputDivisorStateCreateInfoEXT> vertexState;
std::vector<vk::VertexInputBindingDescription> vertexBindings;
std::vector<vk::VertexInputAttributeDescription> vertexAttributes;
std::vector<vk::VertexInputBindingDivisorDescriptionEXT> vertexDivisors;
vk::PipelineInputAssemblyStateCreateInfo inputAssemblyState;
vk::PipelineTessellationStateCreateInfo tessellationState;
vk::PipelineViewportStateCreateInfo viewportState;
std::vector<vk::Viewport> viewports;
std::vector<vk::Rect2D> scissors;
vk::StructureChain<vk::PipelineRasterizationStateCreateInfo, vk::PipelineRasterizationProvokingVertexStateCreateInfoEXT> rasterizationState;
vk::PipelineMultisampleStateCreateInfo multisampleState;
vk::PipelineDepthStencilStateCreateInfo depthStencilState;
vk::PipelineColorBlendStateCreateInfo colorBlendState;
std::vector<vk::PipelineColorBlendAttachmentState> colorBlendAttachments;
std::vector<AttachmentMetadata> colorAttachments;
std::optional<AttachmentMetadata> depthStencilAttachment;
PipelineCacheKey(const PipelineState& state);
bool operator==(const PipelineCacheKey& other) const = default;
constexpr const vk::PipelineVertexInputStateCreateInfo &VertexInputState() const {
return vertexState.get<vk::PipelineVertexInputStateCreateInfo>();
}
constexpr const vk::PipelineVertexInputDivisorStateCreateInfoEXT &VertexDivisorState() const {
return vertexState.get<vk::PipelineVertexInputDivisorStateCreateInfoEXT>();
}
constexpr const vk::PipelineRasterizationStateCreateInfo &RasterizationState() const {
return rasterizationState.get<vk::PipelineRasterizationStateCreateInfo>();
}
constexpr const vk::PipelineRasterizationProvokingVertexStateCreateInfoEXT &ProvokingVertexState() const {
return rasterizationState.get<vk::PipelineRasterizationProvokingVertexStateCreateInfoEXT>();
}
};
private:
GPU &gpu;
std::mutex mutex; //!< Synchronizes accesses to the pipeline cache
vk::raii::PipelineCache vkPipelineCache; //!< A Vulkan Pipeline Cache which stores all unique graphics pipelines
struct PipelineStateHash {
using is_transparent = std::true_type;
size_t operator()(const PipelineState &key) const;
size_t operator()(const PipelineCacheKey &key) const;
};
struct PipelineCacheEqual {
using is_transparent = std::true_type;
bool operator()(const PipelineCacheKey &lhs, const PipelineState &rhs) const;
bool operator()(const PipelineCacheKey &lhs, const PipelineCacheKey &rhs) const;
};
struct PipelineCacheEntry {
vk::raii::DescriptorSetLayout descriptorSetLayout;
vk::raii::PipelineLayout pipelineLayout;
vk::raii::Pipeline pipeline;
PipelineCacheEntry(vk::raii::DescriptorSetLayout&& descriptorSetLayout, vk::raii::PipelineLayout &&layout, vk::raii::Pipeline &&pipeline);
};
std::unordered_map<PipelineCacheKey, PipelineCacheEntry, PipelineStateHash, PipelineCacheEqual> pipelineCache;
public:
GraphicsPipelineCache(GPU &gpu);
struct CompiledPipeline {
vk::DescriptorSetLayout descriptorSetLayout;
vk::PipelineLayout pipelineLayout;
vk::Pipeline pipeline;
CompiledPipeline(const PipelineCacheEntry &entry);
};
/**
* @note All attachments in the PipelineState **must** be locked prior to calling this function
* @note Shader specializiation constants are **not** supported and will result in UB
* @note Input/Resolve attachments are **not** supported and using them with the supplied pipeline will result in UB
*/
CompiledPipeline GetCompiledPipeline(const PipelineState& state, span<vk::DescriptorSetLayoutBinding> layoutBindings);
};
}

View File

@ -49,7 +49,7 @@ namespace skyline::gpu::interconnect {
static_assert(sizeof(IOVA) == sizeof(u64));
public:
GraphicsContext(GPU &gpu, soc::gm20b::ChannelContext &channelCtx, gpu::interconnect::CommandExecutor &executor) : gpu(gpu), channelCtx(channelCtx), executor(executor), pipelineCache(gpu.vkDevice, vk::PipelineCacheCreateInfo{}) {
GraphicsContext(GPU &gpu, soc::gm20b::ChannelContext &channelCtx, gpu::interconnect::CommandExecutor &executor) : gpu(gpu), channelCtx(channelCtx), executor(executor) {
scissors.fill(DefaultScissor);
u32 bindingIndex{};
@ -849,7 +849,7 @@ namespace skyline::gpu::interconnect {
struct ShaderProgramState {
boost::container::static_vector<vk::ShaderModule, maxwell3d::PipelineStageCount> shaderModules; //!< Shader modules for every pipeline stage
boost::container::static_vector<vk::PipelineShaderStageCreateInfo, maxwell3d::PipelineStageCount> shaderStages; //!< Shader modules for every pipeline stage
vk::raii::DescriptorSetLayout descriptorSetLayout; //!< The descriptor set layout for the pipeline (Only valid when `activeShaderStagesInfoCount` is non-zero)
std::vector<vk::DescriptorSetLayoutBinding> descriptorSetBindings; //!< The descriptor set layout for the pipeline (Only valid when `activeShaderStagesInfoCount` is non-zero)
struct DescriptorSetWrites {
std::vector<vk::WriteDescriptorSet> writes; //!< The descriptor set writes for the pipeline
@ -1169,11 +1169,7 @@ namespace skyline::gpu::interconnect {
return {
std::move(shaderModules),
std::move(shaderStages),
vk::raii::DescriptorSetLayout(gpu.vkDevice, vk::DescriptorSetLayoutCreateInfo{
.flags = gpu.traits.supportsPushDescriptors ? vk::DescriptorSetLayoutCreateFlagBits::ePushDescriptorKHR : vk::DescriptorSetLayoutCreateFlags{},
.pBindings = layoutBindings.data(),
.bindingCount = static_cast<u32>(layoutBindings.size()),
}),
{layoutBindings.begin(), layoutBindings.end()},
std::move(descriptorSetWrites),
};
}
@ -2705,25 +2701,12 @@ namespace skyline::gpu::interconnect {
};
/* Draws */
private:
vk::raii::PipelineCache pipelineCache;
public:
template<bool IsIndexed>
void Draw(u32 count, u32 first, i32 vertexOffset = 0) {
ValidatePrimitiveRestartState();
// Shader + Binding Setup
auto programState{CompileShaderProgramState()};
auto descriptorSet{gpu.descriptor.AllocateSet(*programState.descriptorSetLayout)};
for (auto &descriptorSetWrite : **programState.descriptorSetWrites)
descriptorSetWrite.dstSet = descriptorSet;
vk::raii::PipelineLayout pipelineLayout(gpu.vkDevice, vk::PipelineLayoutCreateInfo{
.pSetLayouts = &*programState.descriptorSetLayout,
.setLayoutCount = 1,
});
// Index Buffer Setup
struct BoundIndexBuffer {
vk::Buffer handle{};
vk::DeviceSize offset{};
@ -2815,7 +2798,7 @@ namespace skyline::gpu::interconnect {
}
}
boost::container::static_vector<vk::PipelineColorBlendAttachmentState, maxwell3d::RenderTargetCount> blendAttachmentStates(blendState.pAttachments, blendState.pAttachments + activeColorRenderTargets.size());
blendState.attachmentCount = static_cast<u32>(activeColorRenderTargets.size());
// Depth/Stencil Render Target Setup
auto depthRenderTargetView{GetDepthRenderTarget()};
@ -2825,77 +2808,59 @@ namespace skyline::gpu::interconnect {
executor.AttachTexture(depthRenderTargetView);
}
// Pipeline Creation
vk::StructureChain<vk::PipelineVertexInputStateCreateInfo, vk::PipelineVertexInputDivisorStateCreateInfoEXT> vertexState{
vk::PipelineVertexInputStateCreateInfo{
.pVertexBindingDescriptions = vertexBindingDescriptions.data(),
.vertexBindingDescriptionCount = static_cast<u32>(vertexBindingDescriptions.size()),
.pVertexAttributeDescriptions = vertexAttributesDescriptions.data(),
.vertexAttributeDescriptionCount = static_cast<u32>(vertexAttributesDescriptions.size()),
}, vk::PipelineVertexInputDivisorStateCreateInfoEXT{
.pVertexBindingDivisors = vertexBindingDivisorsDescriptions.data(),
.vertexBindingDivisorCount = static_cast<u32>(vertexBindingDivisorsDescriptions.size()),
}
};
if (!gpu.traits.supportsVertexAttributeDivisor || vertexBindingDivisorsDescriptions.empty())
vertexState.unlink<vk::PipelineVertexInputDivisorStateCreateInfoEXT>();
bool multiViewport{gpu.traits.supportsMultipleViewports};
vk::PipelineViewportStateCreateInfo viewportState{
.pViewports = viewports.data(),
.viewportCount = static_cast<u32>(multiViewport ? maxwell3d::ViewportCount : 1),
.pScissors = scissors.data(),
.scissorCount = static_cast<u32>(multiViewport ? maxwell3d::ViewportCount : 1),
};
auto programState{CompileShaderProgramState()};
auto compiledPipeline{gpu.graphicsPipelineCache.GetCompiledPipeline(cache::GraphicsPipelineCache::PipelineState{
.shaderStages = programState.shaderStages,
.vertexState = vertexState,
.inputAssemblyState = inputAssemblyState,
.tessellationState = tessellationState,
.viewportState = viewportState,
.rasterizationState = rasterizerState,
.multisampleState = multisampleState,
.depthStencilState = depthState,
.colorBlendState = blendState,
.colorAttachments = activeColorRenderTargets,
.depthStencilAttachment = depthRenderTargetView,
}, programState.descriptorSetBindings)};
// Draw Persistent Storage
struct DrawStorage {
vk::raii::DescriptorSetLayout descriptorSetLayout;
struct DrawStorage : FenceCycleDependency {
std::unique_ptr<ShaderProgramState::DescriptorSetWrites> descriptorSetWrites;
vk::raii::PipelineLayout pipelineLayout;
vk::DescriptorSetLayout descriptorSetLayout;
vk::PipelineLayout pipelineLayout;
vk::Pipeline pipeline;
DrawStorage(vk::raii::DescriptorSetLayout &&descriptorSetLayout, std::unique_ptr<ShaderProgramState::DescriptorSetWrites> &&descriptorSetWrites, vk::raii::PipelineLayout &&pipelineLayout) : descriptorSetLayout(std::move(descriptorSetLayout)), descriptorSetWrites(std::move(descriptorSetWrites)), pipelineLayout(std::move(pipelineLayout)) {}
DrawStorage(std::unique_ptr<ShaderProgramState::DescriptorSetWrites> &&descriptorSetWrites, vk::DescriptorSetLayout descriptorSetLayout, vk::PipelineLayout pipelineLayout, vk::Pipeline pipeline) : descriptorSetWrites(std::move(descriptorSetWrites)), descriptorSetLayout(descriptorSetLayout), pipelineLayout(pipelineLayout), pipeline(pipeline) {}
};
auto drawStorage{std::make_shared<DrawStorage>(std::move(programState.descriptorSetLayout), std::move(programState.descriptorSetWrites), std::move(pipelineLayout))};
// Command Buffer Persistent Storage
struct FenceStorage : FenceCycleDependency {
std::optional<vk::raii::Pipeline> pipeline;
DescriptorAllocator::ActiveDescriptorSet descriptorSet;
std::shared_ptr<DrawStorage> drawStorage{};
FenceStorage(DescriptorAllocator::ActiveDescriptorSet &&descriptorSet) : descriptorSet(std::move(descriptorSet)) {}
};
auto fenceStorage{std::make_shared<FenceStorage>(std::move(descriptorSet))};
auto drawStorage{std::make_shared<DrawStorage>(std::move(programState.descriptorSetWrites), compiledPipeline.descriptorSetLayout, compiledPipeline.pipelineLayout, compiledPipeline.pipeline)};
// Submit Draw
executor.AddSubpass([=, &vkDevice = gpu.vkDevice, shaderModules = programState.shaderModules, shaderStages = programState.shaderStages, inputAssemblyState = inputAssemblyState, multiViewport = gpu.traits.supportsMultipleViewports, viewports = viewports, scissors = scissors, rasterizerState = rasterizerState, multisampleState = multisampleState, depthState = depthState, blendState = blendState, drawStorage = std::move(drawStorage), fenceStorage = std::move(fenceStorage), supportsVertexAttributeDivisor = gpu.traits.supportsVertexAttributeDivisor, supportsPushDescriptors = gpu.traits.supportsPushDescriptors, pipelineCache = *pipelineCache](vk::raii::CommandBuffer &commandBuffer, const std::shared_ptr<FenceCycle> &cycle, GPU &, vk::RenderPass renderPass, u32 subpassIndex) mutable {
vk::StructureChain<vk::PipelineVertexInputStateCreateInfo, vk::PipelineVertexInputDivisorStateCreateInfoEXT> vertexState{
vk::PipelineVertexInputStateCreateInfo{
.pVertexBindingDescriptions = vertexBindingDescriptions.data(),
.vertexBindingDescriptionCount = static_cast<u32>(vertexBindingDescriptions.size()),
.pVertexAttributeDescriptions = vertexAttributesDescriptions.data(),
.vertexAttributeDescriptionCount = static_cast<u32>(vertexAttributesDescriptions.size()),
}, vk::PipelineVertexInputDivisorStateCreateInfoEXT{
.pVertexBindingDivisors = vertexBindingDivisorsDescriptions.data(),
.vertexBindingDivisorCount = static_cast<u32>(vertexBindingDivisorsDescriptions.size()),
}
};
if (!supportsVertexAttributeDivisor || vertexBindingDivisorsDescriptions.empty())
vertexState.unlink<vk::PipelineVertexInputDivisorStateCreateInfoEXT>();
vk::PipelineViewportStateCreateInfo viewportState{
.pViewports = viewports.data(),
.viewportCount = static_cast<u32>(multiViewport ? maxwell3d::ViewportCount : 1),
.pScissors = scissors.data(),
.scissorCount = static_cast<u32>(multiViewport ? maxwell3d::ViewportCount : 1),
};
blendState.pAttachments = blendAttachmentStates.data();
blendState.attachmentCount = static_cast<u32>(blendAttachmentStates.size());
vk::GraphicsPipelineCreateInfo pipelineCreateInfo{
.pStages = shaderStages.data(),
.stageCount = static_cast<u32>(shaderStages.size()),
.pVertexInputState = &vertexState.get<vk::PipelineVertexInputStateCreateInfo>(),
.pInputAssemblyState = &inputAssemblyState,
.pTessellationState = &tessellationState,
.pViewportState = &viewportState,
.pRasterizationState = &rasterizerState.get<vk::PipelineRasterizationStateCreateInfo>(),
.pMultisampleState = &multisampleState,
.pDepthStencilState = &depthState,
.pColorBlendState = &blendState,
.pDynamicState = nullptr,
.layout = *drawStorage->pipelineLayout,
.renderPass = renderPass,
.subpass = subpassIndex,
};
auto pipeline{(*vkDevice).createGraphicsPipeline(pipelineCache, pipelineCreateInfo, nullptr, *vkDevice.getDispatcher())};
if (pipeline.result != vk::Result::eSuccess)
vk::throwResultException(pipeline.result, __builtin_FUNCTION());
commandBuffer.bindPipeline(vk::PipelineBindPoint::eGraphics, pipeline.value);
executor.AddSubpass([=, drawStorage = std::move(drawStorage), &vkDevice = gpu.vkDevice, supportsPushDescriptors = gpu.traits.supportsPushDescriptors](vk::raii::CommandBuffer &commandBuffer, const std::shared_ptr<FenceCycle> &cycle, GPU &, vk::RenderPass renderPass, u32 subpassIndex) mutable {
auto &vertexBufferHandles{boundVertexBuffers->handles};
for (u32 bindingIndex{}; bindingIndex != vertexBufferHandles.size(); bindingIndex++) {
@ -2911,15 +2876,17 @@ namespace skyline::gpu::interconnect {
}
if (supportsPushDescriptors) {
commandBuffer.pushDescriptorSetKHR(vk::PipelineBindPoint::eGraphics, *drawStorage->pipelineLayout, 0, **drawStorage->descriptorSetWrites);
commandBuffer.pushDescriptorSetKHR(vk::PipelineBindPoint::eGraphics, drawStorage->pipelineLayout, 0, **drawStorage->descriptorSetWrites);
} else {
auto descriptorSet{gpu.descriptor.AllocateSet(*drawStorage->descriptorSetLayout)};
auto descriptorSet{gpu.descriptor.AllocateSet(drawStorage->descriptorSetLayout)};
for (auto &descriptorSetWrite : **drawStorage->descriptorSetWrites)
descriptorSetWrite.dstSet = descriptorSet;
vkDevice.updateDescriptorSets(**drawStorage->descriptorSetWrites, nullptr);
commandBuffer.bindDescriptorSets(vk::PipelineBindPoint::eGraphics, *drawStorage->pipelineLayout, 0, descriptorSet, nullptr);
commandBuffer.bindDescriptorSets(vk::PipelineBindPoint::eGraphics, drawStorage->pipelineLayout, 0, descriptorSet, nullptr);
}
commandBuffer.bindPipeline(vk::PipelineBindPoint::eGraphics, drawStorage->pipeline);
if constexpr (IsIndexed) {
commandBuffer.bindIndexBuffer(boundIndexBuffer->handle, boundIndexBuffer->offset, boundIndexBuffer->type);
commandBuffer.drawIndexed(count, 1, first, vertexOffset, 0);
@ -2927,10 +2894,7 @@ namespace skyline::gpu::interconnect {
commandBuffer.draw(count, 1, first, 0);
}
fenceStorage->drawStorage = drawStorage;
fenceStorage->pipeline = vk::raii::Pipeline(vkDevice, pipeline.value);
cycle->AttachObject(fenceStorage);
cycle->AttachObject(drawStorage);
}, vk::Rect2D{
.extent = activeColorRenderTargets.empty() ? depthRenderTarget.guest.dimensions : activeColorRenderTargets.front()->texture->dimensions,
}, {}, activeColorRenderTargets, depthRenderTargetView);