mirror of
https://github.com/skyline-emu/skyline.git
synced 2025-01-11 01:09:10 +01:00
Implement thread pool based async pipeline compilation with futures
By distributing the load of shader compiling onto multiple threads and then only waiting for completion until absolutely neccessary we can reduce compilation stutters significantly.
This commit is contained in:
parent
186549748d
commit
072b8193a1
@ -296,32 +296,9 @@ namespace skyline::gpu::cache {
|
||||
return lhs == rhs;
|
||||
}
|
||||
|
||||
GraphicsPipelineCache::PipelineCacheEntry::PipelineCacheEntry(vk::raii::DescriptorSetLayout &&descriptorSetLayout, vk::raii::PipelineLayout &&pipelineLayout, vk::raii::Pipeline &&pipeline) : descriptorSetLayout(std::move(descriptorSetLayout)), pipelineLayout(std::move(pipelineLayout)), pipeline(std::move(pipeline)) {}
|
||||
|
||||
GraphicsPipelineCache::CompiledPipeline::CompiledPipeline(const PipelineCacheEntry &entry) : descriptorSetLayout(*entry.descriptorSetLayout), pipelineLayout(*entry.pipelineLayout), pipeline(*entry.pipeline) {}
|
||||
|
||||
GraphicsPipelineCache::CompiledPipeline GraphicsPipelineCache::GetCompiledPipeline(const PipelineState &state, span<const vk::DescriptorSetLayoutBinding> layoutBindings, span<const vk::PushConstantRange> pushConstantRanges, bool noPushDescriptors) {
|
||||
std::unique_lock lock(mutex);
|
||||
|
||||
auto it{pipelineCache.find(state)};
|
||||
if (it != pipelineCache.end())
|
||||
return CompiledPipeline{it->second};
|
||||
|
||||
lock.unlock();
|
||||
|
||||
vk::raii::DescriptorSetLayout descriptorSetLayout{gpu.vkDevice, vk::DescriptorSetLayoutCreateInfo{
|
||||
.flags = vk::DescriptorSetLayoutCreateFlags{(!noPushDescriptors && gpu.traits.supportsPushDescriptors) ? vk::DescriptorSetLayoutCreateFlagBits::ePushDescriptorKHR : vk::DescriptorSetLayoutCreateFlags{}},
|
||||
.pBindings = layoutBindings.data(),
|
||||
.bindingCount = static_cast<u32>(layoutBindings.size()),
|
||||
}};
|
||||
|
||||
vk::raii::PipelineLayout pipelineLayout{gpu.vkDevice, vk::PipelineLayoutCreateInfo{
|
||||
.pSetLayouts = &*descriptorSetLayout,
|
||||
.setLayoutCount = 1,
|
||||
.pPushConstantRanges = pushConstantRanges.data(),
|
||||
.pushConstantRangeCount = static_cast<u32>(pushConstantRanges.size()),
|
||||
}};
|
||||
GraphicsPipelineCache::PipelineCacheEntry::PipelineCacheEntry(vk::raii::DescriptorSetLayout &&descriptorSetLayout, vk::raii::PipelineLayout &&pipelineLayout) : descriptorSetLayout{std::move(descriptorSetLayout)}, pipelineLayout{std::move(pipelineLayout)} {}
|
||||
|
||||
vk::raii::Pipeline GraphicsPipelineCache::BuildPipeline(const PipelineCacheKey &key, vk::PipelineLayout pipelineLayout) {
|
||||
boost::container::small_vector<vk::AttachmentDescription, 8> attachmentDescriptions;
|
||||
boost::container::small_vector<vk::AttachmentReference, 8> attachmentReferences;
|
||||
|
||||
@ -329,7 +306,7 @@ namespace skyline::gpu::cache {
|
||||
if (format != vk::Format::eUndefined) {
|
||||
attachmentDescriptions.push_back(vk::AttachmentDescription{
|
||||
.format = format,
|
||||
.samples = state.sampleCount,
|
||||
.samples = key.sampleCount,
|
||||
.loadOp = vk::AttachmentLoadOp::eLoad,
|
||||
.storeOp = vk::AttachmentStoreOp::eStore,
|
||||
.stencilLoadOp = vk::AttachmentLoadOp::eLoad,
|
||||
@ -354,11 +331,11 @@ namespace skyline::gpu::cache {
|
||||
.pipelineBindPoint = vk::PipelineBindPoint::eGraphics,
|
||||
};
|
||||
|
||||
for (auto &colorAttachment : state.colorFormats)
|
||||
for (auto &colorAttachment : key.colorFormats)
|
||||
pushAttachment(colorAttachment);
|
||||
|
||||
if (state.depthStencilFormat != vk::Format::eUndefined) {
|
||||
pushAttachment(state.depthStencilFormat);
|
||||
if (key.depthStencilFormat != vk::Format::eUndefined) {
|
||||
pushAttachment(key.depthStencilFormat);
|
||||
|
||||
subpassDescription.pColorAttachments = attachmentReferences.data();
|
||||
subpassDescription.colorAttachmentCount = static_cast<u32>(attachmentReferences.size() - 1);
|
||||
@ -375,25 +352,48 @@ namespace skyline::gpu::cache {
|
||||
.pSubpasses = &subpassDescription,
|
||||
}};
|
||||
|
||||
auto pipeline{gpu.vkDevice.createGraphicsPipeline(vkPipelineCache, vk::GraphicsPipelineCreateInfo{
|
||||
.pStages = state.shaderStages.data(),
|
||||
.stageCount = static_cast<u32>(state.shaderStages.size()),
|
||||
.pVertexInputState = &state.vertexState.get<vk::PipelineVertexInputStateCreateInfo>(),
|
||||
.pInputAssemblyState = &state.inputAssemblyState,
|
||||
.pViewportState = &state.viewportState,
|
||||
.pRasterizationState = &state.rasterizationState.get<vk::PipelineRasterizationStateCreateInfo>(),
|
||||
.pMultisampleState = &state.multisampleState,
|
||||
.pDepthStencilState = &state.depthStencilState,
|
||||
.pColorBlendState = &state.colorBlendState,
|
||||
.pDynamicState = &state.dynamicState,
|
||||
.layout = *pipelineLayout,
|
||||
return gpu.vkDevice.createGraphicsPipeline(vkPipelineCache, vk::GraphicsPipelineCreateInfo{
|
||||
.pStages = key.shaderStages.data(),
|
||||
.stageCount = static_cast<u32>(key.shaderStages.size()),
|
||||
.pVertexInputState = &key.vertexState.get<vk::PipelineVertexInputStateCreateInfo>(),
|
||||
.pInputAssemblyState = &key.inputAssemblyState,
|
||||
.pViewportState = &key.viewportState,
|
||||
.pRasterizationState = &key.rasterizationState.get<vk::PipelineRasterizationStateCreateInfo>(),
|
||||
.pMultisampleState = &key.multisampleState,
|
||||
.pDepthStencilState = &key.depthStencilState,
|
||||
.pColorBlendState = &key.colorBlendState,
|
||||
.pDynamicState = &key.dynamicState,
|
||||
.layout = pipelineLayout,
|
||||
.renderPass = *renderPass,
|
||||
.subpass = 0,
|
||||
})};
|
||||
});
|
||||
}
|
||||
|
||||
lock.lock();
|
||||
GraphicsPipelineCache::CompiledPipeline::CompiledPipeline(const PipelineCacheEntry &entry) : descriptorSetLayout{*entry.descriptorSetLayout}, pipelineLayout{*entry.pipelineLayout}, pipeline{*entry.pipeline} {}
|
||||
|
||||
auto pipelineEntryIt{pipelineCache.try_emplace(PipelineCacheKey{state}, std::move(descriptorSetLayout), std::move(pipelineLayout), std::move(pipeline))};
|
||||
GraphicsPipelineCache::CompiledPipeline GraphicsPipelineCache::GetCompiledPipeline(const PipelineState &state, span<const vk::DescriptorSetLayoutBinding> layoutBindings, span<const vk::PushConstantRange> pushConstantRanges, bool noPushDescriptors) {
|
||||
std::unique_lock lock(mutex);
|
||||
|
||||
auto it{pipelineCache.find(state)};
|
||||
if (it != pipelineCache.end())
|
||||
return CompiledPipeline{it->second};
|
||||
|
||||
vk::raii::DescriptorSetLayout descriptorSetLayout{gpu.vkDevice, vk::DescriptorSetLayoutCreateInfo{
|
||||
.flags = vk::DescriptorSetLayoutCreateFlags{(!noPushDescriptors && gpu.traits.supportsPushDescriptors) ? vk::DescriptorSetLayoutCreateFlagBits::ePushDescriptorKHR : vk::DescriptorSetLayoutCreateFlags{}},
|
||||
.pBindings = layoutBindings.data(),
|
||||
.bindingCount = static_cast<u32>(layoutBindings.size()),
|
||||
}};
|
||||
|
||||
vk::raii::PipelineLayout pipelineLayout{gpu.vkDevice, vk::PipelineLayoutCreateInfo{
|
||||
.pSetLayouts = &*descriptorSetLayout,
|
||||
.setLayoutCount = 1,
|
||||
.pPushConstantRanges = pushConstantRanges.data(),
|
||||
.pushConstantRangeCount = static_cast<u32>(pushConstantRanges.size()),
|
||||
}};
|
||||
|
||||
auto pipelineEntryIt{pipelineCache.try_emplace(PipelineCacheKey{state}, std::move(descriptorSetLayout), std::move(pipelineLayout))};
|
||||
auto pipelineFuture{pool.submit(&GraphicsPipelineCache::BuildPipeline, this, std::ref(pipelineEntryIt.first->first), std::ref(*pipelineEntryIt.first->second.pipelineLayout))};
|
||||
pipelineEntryIt.first->second.pipeline = pipelineFuture.share();
|
||||
return CompiledPipeline{pipelineEntryIt.first->second};
|
||||
}
|
||||
}
|
||||
|
@ -3,6 +3,8 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <future>
|
||||
#include <BS_thread_pool.hpp>
|
||||
#include <vulkan/vulkan_raii.hpp>
|
||||
|
||||
namespace skyline::gpu {
|
||||
@ -136,20 +138,23 @@ namespace skyline::gpu::cache {
|
||||
struct PipelineCacheEntry {
|
||||
vk::raii::DescriptorSetLayout descriptorSetLayout;
|
||||
vk::raii::PipelineLayout pipelineLayout;
|
||||
vk::raii::Pipeline pipeline;
|
||||
std::optional<std::shared_future<vk::raii::Pipeline>> pipeline;
|
||||
|
||||
PipelineCacheEntry(vk::raii::DescriptorSetLayout&& descriptorSetLayout, vk::raii::PipelineLayout &&layout, vk::raii::Pipeline &&pipeline);
|
||||
PipelineCacheEntry(vk::raii::DescriptorSetLayout&& descriptorSetLayout, vk::raii::PipelineLayout &&layout);
|
||||
};
|
||||
|
||||
BS::thread_pool pool;
|
||||
std::unordered_map<PipelineCacheKey, PipelineCacheEntry, PipelineStateHash, PipelineCacheEqual> pipelineCache;
|
||||
|
||||
vk::raii::Pipeline BuildPipeline(const PipelineCacheKey &key, vk::PipelineLayout pipelineLayout);
|
||||
|
||||
public:
|
||||
GraphicsPipelineCache(GPU &gpu);
|
||||
|
||||
struct CompiledPipeline {
|
||||
vk::DescriptorSetLayout descriptorSetLayout;
|
||||
vk::PipelineLayout pipelineLayout;
|
||||
vk::Pipeline pipeline;
|
||||
std::shared_future<vk::raii::Pipeline> pipeline;
|
||||
|
||||
CompiledPipeline(const PipelineCacheEntry &entry);
|
||||
};
|
||||
|
@ -3,6 +3,7 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <future>
|
||||
#include <gpu/interconnect/command_executor.h>
|
||||
#include "common.h"
|
||||
|
||||
@ -261,6 +262,16 @@ namespace skyline::gpu::interconnect {
|
||||
};
|
||||
using SetPipelineCmd = CmdHolder<SetPipelineCmdImpl>;
|
||||
|
||||
struct SetPipelineFutureCmdImpl {
|
||||
void Record(GPU &gpu, vk::raii::CommandBuffer &commandBuffer) {
|
||||
commandBuffer.bindPipeline(bindPoint, *pipeline.get());
|
||||
}
|
||||
|
||||
std::shared_future<vk::raii::Pipeline> pipeline;
|
||||
vk::PipelineBindPoint bindPoint;
|
||||
};
|
||||
using SetPipelineFutureCmd = CmdHolder<SetPipelineFutureCmdImpl>;
|
||||
|
||||
/**
|
||||
* @brief Single-use helper for recording a batch of state updates into a command buffer
|
||||
*/
|
||||
@ -471,6 +482,14 @@ namespace skyline::gpu::interconnect {
|
||||
});
|
||||
}
|
||||
|
||||
void SetPipeline(const std::shared_future<vk::raii::Pipeline> &pipeline, vk::PipelineBindPoint bindPoint) {
|
||||
AppendCmd<SetPipelineFutureCmd>(
|
||||
{
|
||||
.pipeline = pipeline,
|
||||
.bindPoint = bindPoint,
|
||||
});
|
||||
}
|
||||
|
||||
void SetDescriptorSetWithPush(DescriptorUpdateInfo *updateInfo) {
|
||||
AppendCmd<SetDescriptorSetWithPushCmd>(
|
||||
{
|
||||
|
@ -205,7 +205,7 @@ namespace skyline::gpu::interconnect::maxwell3d {
|
||||
|
||||
size_t PackedPipelineState::GetColorRenderTargetCount() const {
|
||||
for (size_t i{engine::ColorTargetCount}; i > 0 ; i--)
|
||||
if (IsColorRenderTargetEnabled(i - 1))
|
||||
if (IsColorRenderTargetEnabled(ctSelect[i - 1]))
|
||||
return i;
|
||||
|
||||
return 0;
|
||||
|
@ -530,7 +530,7 @@ namespace skyline::gpu::interconnect::maxwell3d {
|
||||
|
||||
for (u32 i{}; i < packedState.GetColorRenderTargetCount(); i++) {
|
||||
attachmentBlendStates.push_back(packedState.GetAttachmentBlendState(i));
|
||||
texture::Format format{packedState.GetColorRenderTargetFormat(i)};
|
||||
texture::Format format{packedState.GetColorRenderTargetFormat(packedState.ctSelect[i])};
|
||||
colorAttachmentFormats.push_back(format ? format->vkFormat : vk::Format::eUndefined);
|
||||
}
|
||||
|
||||
@ -595,10 +595,10 @@ namespace skyline::gpu::interconnect::maxwell3d {
|
||||
}
|
||||
|
||||
Pipeline::Pipeline(InterconnectContext &ctx, const PipelineStateAccessor &accessor, const PackedPipelineState &packedState)
|
||||
: shaderStages{MakePipelineShaders(ctx, accessor, packedState)},
|
||||
: sourcePackedState{packedState},
|
||||
shaderStages{MakePipelineShaders(ctx, accessor, sourcePackedState)},
|
||||
descriptorInfo{MakePipelineDescriptorInfo(shaderStages, ctx.gpu.traits.quirks.needsIndividualTextureBindingWrites)},
|
||||
compiledPipeline{MakeCompiledPipeline(ctx, packedState, shaderStages, descriptorInfo.descriptorSetLayoutBindings)},
|
||||
sourcePackedState{packedState} {
|
||||
compiledPipeline{MakeCompiledPipeline(ctx, sourcePackedState, shaderStages, descriptorInfo.descriptorSetLayoutBindings)} {
|
||||
storageBufferViews.resize(descriptorInfo.totalStorageBufferCount);
|
||||
}
|
||||
|
||||
|
@ -82,6 +82,8 @@ namespace skyline::gpu::interconnect::maxwell3d {
|
||||
u32 totalImageDescCount;
|
||||
};
|
||||
|
||||
PackedPipelineState sourcePackedState;
|
||||
|
||||
private:
|
||||
std::vector<CachedMappedBufferView> storageBufferViews;
|
||||
u32 lastExecutionNumber{}; //!< The last execution number this pipeline was used at
|
||||
@ -99,8 +101,6 @@ namespace skyline::gpu::interconnect::maxwell3d {
|
||||
cache::GraphicsPipelineCache::CompiledPipeline compiledPipeline;
|
||||
size_t sampledImageCount{};
|
||||
|
||||
PackedPipelineState sourcePackedState;
|
||||
|
||||
Pipeline(InterconnectContext &ctx, const PipelineStateAccessor &accessor, const PackedPipelineState &packedState);
|
||||
|
||||
Pipeline *LookupNext(const PackedPipelineState &packedState);
|
||||
|
Loading…
x
Reference in New Issue
Block a user