mirror of
https://github.com/skyline-emu/skyline.git
synced 2024-12-23 20:31:48 +01:00
Implement thread pool based async pipeline compilation with futures
By distributing the load of shader compiling onto multiple threads and then only waiting for completion until absolutely neccessary we can reduce compilation stutters significantly.
This commit is contained in:
parent
186549748d
commit
072b8193a1
@ -296,32 +296,9 @@ namespace skyline::gpu::cache {
|
|||||||
return lhs == rhs;
|
return lhs == rhs;
|
||||||
}
|
}
|
||||||
|
|
||||||
GraphicsPipelineCache::PipelineCacheEntry::PipelineCacheEntry(vk::raii::DescriptorSetLayout &&descriptorSetLayout, vk::raii::PipelineLayout &&pipelineLayout, vk::raii::Pipeline &&pipeline) : descriptorSetLayout(std::move(descriptorSetLayout)), pipelineLayout(std::move(pipelineLayout)), pipeline(std::move(pipeline)) {}
|
GraphicsPipelineCache::PipelineCacheEntry::PipelineCacheEntry(vk::raii::DescriptorSetLayout &&descriptorSetLayout, vk::raii::PipelineLayout &&pipelineLayout) : descriptorSetLayout{std::move(descriptorSetLayout)}, pipelineLayout{std::move(pipelineLayout)} {}
|
||||||
|
|
||||||
GraphicsPipelineCache::CompiledPipeline::CompiledPipeline(const PipelineCacheEntry &entry) : descriptorSetLayout(*entry.descriptorSetLayout), pipelineLayout(*entry.pipelineLayout), pipeline(*entry.pipeline) {}
|
|
||||||
|
|
||||||
GraphicsPipelineCache::CompiledPipeline GraphicsPipelineCache::GetCompiledPipeline(const PipelineState &state, span<const vk::DescriptorSetLayoutBinding> layoutBindings, span<const vk::PushConstantRange> pushConstantRanges, bool noPushDescriptors) {
|
|
||||||
std::unique_lock lock(mutex);
|
|
||||||
|
|
||||||
auto it{pipelineCache.find(state)};
|
|
||||||
if (it != pipelineCache.end())
|
|
||||||
return CompiledPipeline{it->second};
|
|
||||||
|
|
||||||
lock.unlock();
|
|
||||||
|
|
||||||
vk::raii::DescriptorSetLayout descriptorSetLayout{gpu.vkDevice, vk::DescriptorSetLayoutCreateInfo{
|
|
||||||
.flags = vk::DescriptorSetLayoutCreateFlags{(!noPushDescriptors && gpu.traits.supportsPushDescriptors) ? vk::DescriptorSetLayoutCreateFlagBits::ePushDescriptorKHR : vk::DescriptorSetLayoutCreateFlags{}},
|
|
||||||
.pBindings = layoutBindings.data(),
|
|
||||||
.bindingCount = static_cast<u32>(layoutBindings.size()),
|
|
||||||
}};
|
|
||||||
|
|
||||||
vk::raii::PipelineLayout pipelineLayout{gpu.vkDevice, vk::PipelineLayoutCreateInfo{
|
|
||||||
.pSetLayouts = &*descriptorSetLayout,
|
|
||||||
.setLayoutCount = 1,
|
|
||||||
.pPushConstantRanges = pushConstantRanges.data(),
|
|
||||||
.pushConstantRangeCount = static_cast<u32>(pushConstantRanges.size()),
|
|
||||||
}};
|
|
||||||
|
|
||||||
|
vk::raii::Pipeline GraphicsPipelineCache::BuildPipeline(const PipelineCacheKey &key, vk::PipelineLayout pipelineLayout) {
|
||||||
boost::container::small_vector<vk::AttachmentDescription, 8> attachmentDescriptions;
|
boost::container::small_vector<vk::AttachmentDescription, 8> attachmentDescriptions;
|
||||||
boost::container::small_vector<vk::AttachmentReference, 8> attachmentReferences;
|
boost::container::small_vector<vk::AttachmentReference, 8> attachmentReferences;
|
||||||
|
|
||||||
@ -329,7 +306,7 @@ namespace skyline::gpu::cache {
|
|||||||
if (format != vk::Format::eUndefined) {
|
if (format != vk::Format::eUndefined) {
|
||||||
attachmentDescriptions.push_back(vk::AttachmentDescription{
|
attachmentDescriptions.push_back(vk::AttachmentDescription{
|
||||||
.format = format,
|
.format = format,
|
||||||
.samples = state.sampleCount,
|
.samples = key.sampleCount,
|
||||||
.loadOp = vk::AttachmentLoadOp::eLoad,
|
.loadOp = vk::AttachmentLoadOp::eLoad,
|
||||||
.storeOp = vk::AttachmentStoreOp::eStore,
|
.storeOp = vk::AttachmentStoreOp::eStore,
|
||||||
.stencilLoadOp = vk::AttachmentLoadOp::eLoad,
|
.stencilLoadOp = vk::AttachmentLoadOp::eLoad,
|
||||||
@ -354,11 +331,11 @@ namespace skyline::gpu::cache {
|
|||||||
.pipelineBindPoint = vk::PipelineBindPoint::eGraphics,
|
.pipelineBindPoint = vk::PipelineBindPoint::eGraphics,
|
||||||
};
|
};
|
||||||
|
|
||||||
for (auto &colorAttachment : state.colorFormats)
|
for (auto &colorAttachment : key.colorFormats)
|
||||||
pushAttachment(colorAttachment);
|
pushAttachment(colorAttachment);
|
||||||
|
|
||||||
if (state.depthStencilFormat != vk::Format::eUndefined) {
|
if (key.depthStencilFormat != vk::Format::eUndefined) {
|
||||||
pushAttachment(state.depthStencilFormat);
|
pushAttachment(key.depthStencilFormat);
|
||||||
|
|
||||||
subpassDescription.pColorAttachments = attachmentReferences.data();
|
subpassDescription.pColorAttachments = attachmentReferences.data();
|
||||||
subpassDescription.colorAttachmentCount = static_cast<u32>(attachmentReferences.size() - 1);
|
subpassDescription.colorAttachmentCount = static_cast<u32>(attachmentReferences.size() - 1);
|
||||||
@ -375,25 +352,48 @@ namespace skyline::gpu::cache {
|
|||||||
.pSubpasses = &subpassDescription,
|
.pSubpasses = &subpassDescription,
|
||||||
}};
|
}};
|
||||||
|
|
||||||
auto pipeline{gpu.vkDevice.createGraphicsPipeline(vkPipelineCache, vk::GraphicsPipelineCreateInfo{
|
return gpu.vkDevice.createGraphicsPipeline(vkPipelineCache, vk::GraphicsPipelineCreateInfo{
|
||||||
.pStages = state.shaderStages.data(),
|
.pStages = key.shaderStages.data(),
|
||||||
.stageCount = static_cast<u32>(state.shaderStages.size()),
|
.stageCount = static_cast<u32>(key.shaderStages.size()),
|
||||||
.pVertexInputState = &state.vertexState.get<vk::PipelineVertexInputStateCreateInfo>(),
|
.pVertexInputState = &key.vertexState.get<vk::PipelineVertexInputStateCreateInfo>(),
|
||||||
.pInputAssemblyState = &state.inputAssemblyState,
|
.pInputAssemblyState = &key.inputAssemblyState,
|
||||||
.pViewportState = &state.viewportState,
|
.pViewportState = &key.viewportState,
|
||||||
.pRasterizationState = &state.rasterizationState.get<vk::PipelineRasterizationStateCreateInfo>(),
|
.pRasterizationState = &key.rasterizationState.get<vk::PipelineRasterizationStateCreateInfo>(),
|
||||||
.pMultisampleState = &state.multisampleState,
|
.pMultisampleState = &key.multisampleState,
|
||||||
.pDepthStencilState = &state.depthStencilState,
|
.pDepthStencilState = &key.depthStencilState,
|
||||||
.pColorBlendState = &state.colorBlendState,
|
.pColorBlendState = &key.colorBlendState,
|
||||||
.pDynamicState = &state.dynamicState,
|
.pDynamicState = &key.dynamicState,
|
||||||
.layout = *pipelineLayout,
|
.layout = pipelineLayout,
|
||||||
.renderPass = *renderPass,
|
.renderPass = *renderPass,
|
||||||
.subpass = 0,
|
.subpass = 0,
|
||||||
})};
|
});
|
||||||
|
}
|
||||||
|
|
||||||
lock.lock();
|
GraphicsPipelineCache::CompiledPipeline::CompiledPipeline(const PipelineCacheEntry &entry) : descriptorSetLayout{*entry.descriptorSetLayout}, pipelineLayout{*entry.pipelineLayout}, pipeline{*entry.pipeline} {}
|
||||||
|
|
||||||
auto pipelineEntryIt{pipelineCache.try_emplace(PipelineCacheKey{state}, std::move(descriptorSetLayout), std::move(pipelineLayout), std::move(pipeline))};
|
GraphicsPipelineCache::CompiledPipeline GraphicsPipelineCache::GetCompiledPipeline(const PipelineState &state, span<const vk::DescriptorSetLayoutBinding> layoutBindings, span<const vk::PushConstantRange> pushConstantRanges, bool noPushDescriptors) {
|
||||||
|
std::unique_lock lock(mutex);
|
||||||
|
|
||||||
|
auto it{pipelineCache.find(state)};
|
||||||
|
if (it != pipelineCache.end())
|
||||||
|
return CompiledPipeline{it->second};
|
||||||
|
|
||||||
|
vk::raii::DescriptorSetLayout descriptorSetLayout{gpu.vkDevice, vk::DescriptorSetLayoutCreateInfo{
|
||||||
|
.flags = vk::DescriptorSetLayoutCreateFlags{(!noPushDescriptors && gpu.traits.supportsPushDescriptors) ? vk::DescriptorSetLayoutCreateFlagBits::ePushDescriptorKHR : vk::DescriptorSetLayoutCreateFlags{}},
|
||||||
|
.pBindings = layoutBindings.data(),
|
||||||
|
.bindingCount = static_cast<u32>(layoutBindings.size()),
|
||||||
|
}};
|
||||||
|
|
||||||
|
vk::raii::PipelineLayout pipelineLayout{gpu.vkDevice, vk::PipelineLayoutCreateInfo{
|
||||||
|
.pSetLayouts = &*descriptorSetLayout,
|
||||||
|
.setLayoutCount = 1,
|
||||||
|
.pPushConstantRanges = pushConstantRanges.data(),
|
||||||
|
.pushConstantRangeCount = static_cast<u32>(pushConstantRanges.size()),
|
||||||
|
}};
|
||||||
|
|
||||||
|
auto pipelineEntryIt{pipelineCache.try_emplace(PipelineCacheKey{state}, std::move(descriptorSetLayout), std::move(pipelineLayout))};
|
||||||
|
auto pipelineFuture{pool.submit(&GraphicsPipelineCache::BuildPipeline, this, std::ref(pipelineEntryIt.first->first), std::ref(*pipelineEntryIt.first->second.pipelineLayout))};
|
||||||
|
pipelineEntryIt.first->second.pipeline = pipelineFuture.share();
|
||||||
return CompiledPipeline{pipelineEntryIt.first->second};
|
return CompiledPipeline{pipelineEntryIt.first->second};
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -3,6 +3,8 @@
|
|||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
|
#include <future>
|
||||||
|
#include <BS_thread_pool.hpp>
|
||||||
#include <vulkan/vulkan_raii.hpp>
|
#include <vulkan/vulkan_raii.hpp>
|
||||||
|
|
||||||
namespace skyline::gpu {
|
namespace skyline::gpu {
|
||||||
@ -136,20 +138,23 @@ namespace skyline::gpu::cache {
|
|||||||
struct PipelineCacheEntry {
|
struct PipelineCacheEntry {
|
||||||
vk::raii::DescriptorSetLayout descriptorSetLayout;
|
vk::raii::DescriptorSetLayout descriptorSetLayout;
|
||||||
vk::raii::PipelineLayout pipelineLayout;
|
vk::raii::PipelineLayout pipelineLayout;
|
||||||
vk::raii::Pipeline pipeline;
|
std::optional<std::shared_future<vk::raii::Pipeline>> pipeline;
|
||||||
|
|
||||||
PipelineCacheEntry(vk::raii::DescriptorSetLayout&& descriptorSetLayout, vk::raii::PipelineLayout &&layout, vk::raii::Pipeline &&pipeline);
|
PipelineCacheEntry(vk::raii::DescriptorSetLayout&& descriptorSetLayout, vk::raii::PipelineLayout &&layout);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
BS::thread_pool pool;
|
||||||
std::unordered_map<PipelineCacheKey, PipelineCacheEntry, PipelineStateHash, PipelineCacheEqual> pipelineCache;
|
std::unordered_map<PipelineCacheKey, PipelineCacheEntry, PipelineStateHash, PipelineCacheEqual> pipelineCache;
|
||||||
|
|
||||||
|
vk::raii::Pipeline BuildPipeline(const PipelineCacheKey &key, vk::PipelineLayout pipelineLayout);
|
||||||
|
|
||||||
public:
|
public:
|
||||||
GraphicsPipelineCache(GPU &gpu);
|
GraphicsPipelineCache(GPU &gpu);
|
||||||
|
|
||||||
struct CompiledPipeline {
|
struct CompiledPipeline {
|
||||||
vk::DescriptorSetLayout descriptorSetLayout;
|
vk::DescriptorSetLayout descriptorSetLayout;
|
||||||
vk::PipelineLayout pipelineLayout;
|
vk::PipelineLayout pipelineLayout;
|
||||||
vk::Pipeline pipeline;
|
std::shared_future<vk::raii::Pipeline> pipeline;
|
||||||
|
|
||||||
CompiledPipeline(const PipelineCacheEntry &entry);
|
CompiledPipeline(const PipelineCacheEntry &entry);
|
||||||
};
|
};
|
||||||
|
@ -3,6 +3,7 @@
|
|||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
|
#include <future>
|
||||||
#include <gpu/interconnect/command_executor.h>
|
#include <gpu/interconnect/command_executor.h>
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
|
|
||||||
@ -261,6 +262,16 @@ namespace skyline::gpu::interconnect {
|
|||||||
};
|
};
|
||||||
using SetPipelineCmd = CmdHolder<SetPipelineCmdImpl>;
|
using SetPipelineCmd = CmdHolder<SetPipelineCmdImpl>;
|
||||||
|
|
||||||
|
struct SetPipelineFutureCmdImpl {
|
||||||
|
void Record(GPU &gpu, vk::raii::CommandBuffer &commandBuffer) {
|
||||||
|
commandBuffer.bindPipeline(bindPoint, *pipeline.get());
|
||||||
|
}
|
||||||
|
|
||||||
|
std::shared_future<vk::raii::Pipeline> pipeline;
|
||||||
|
vk::PipelineBindPoint bindPoint;
|
||||||
|
};
|
||||||
|
using SetPipelineFutureCmd = CmdHolder<SetPipelineFutureCmdImpl>;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief Single-use helper for recording a batch of state updates into a command buffer
|
* @brief Single-use helper for recording a batch of state updates into a command buffer
|
||||||
*/
|
*/
|
||||||
@ -471,6 +482,14 @@ namespace skyline::gpu::interconnect {
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void SetPipeline(const std::shared_future<vk::raii::Pipeline> &pipeline, vk::PipelineBindPoint bindPoint) {
|
||||||
|
AppendCmd<SetPipelineFutureCmd>(
|
||||||
|
{
|
||||||
|
.pipeline = pipeline,
|
||||||
|
.bindPoint = bindPoint,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
void SetDescriptorSetWithPush(DescriptorUpdateInfo *updateInfo) {
|
void SetDescriptorSetWithPush(DescriptorUpdateInfo *updateInfo) {
|
||||||
AppendCmd<SetDescriptorSetWithPushCmd>(
|
AppendCmd<SetDescriptorSetWithPushCmd>(
|
||||||
{
|
{
|
||||||
|
@ -205,7 +205,7 @@ namespace skyline::gpu::interconnect::maxwell3d {
|
|||||||
|
|
||||||
size_t PackedPipelineState::GetColorRenderTargetCount() const {
|
size_t PackedPipelineState::GetColorRenderTargetCount() const {
|
||||||
for (size_t i{engine::ColorTargetCount}; i > 0 ; i--)
|
for (size_t i{engine::ColorTargetCount}; i > 0 ; i--)
|
||||||
if (IsColorRenderTargetEnabled(i - 1))
|
if (IsColorRenderTargetEnabled(ctSelect[i - 1]))
|
||||||
return i;
|
return i;
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -530,7 +530,7 @@ namespace skyline::gpu::interconnect::maxwell3d {
|
|||||||
|
|
||||||
for (u32 i{}; i < packedState.GetColorRenderTargetCount(); i++) {
|
for (u32 i{}; i < packedState.GetColorRenderTargetCount(); i++) {
|
||||||
attachmentBlendStates.push_back(packedState.GetAttachmentBlendState(i));
|
attachmentBlendStates.push_back(packedState.GetAttachmentBlendState(i));
|
||||||
texture::Format format{packedState.GetColorRenderTargetFormat(i)};
|
texture::Format format{packedState.GetColorRenderTargetFormat(packedState.ctSelect[i])};
|
||||||
colorAttachmentFormats.push_back(format ? format->vkFormat : vk::Format::eUndefined);
|
colorAttachmentFormats.push_back(format ? format->vkFormat : vk::Format::eUndefined);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -595,10 +595,10 @@ namespace skyline::gpu::interconnect::maxwell3d {
|
|||||||
}
|
}
|
||||||
|
|
||||||
Pipeline::Pipeline(InterconnectContext &ctx, const PipelineStateAccessor &accessor, const PackedPipelineState &packedState)
|
Pipeline::Pipeline(InterconnectContext &ctx, const PipelineStateAccessor &accessor, const PackedPipelineState &packedState)
|
||||||
: shaderStages{MakePipelineShaders(ctx, accessor, packedState)},
|
: sourcePackedState{packedState},
|
||||||
|
shaderStages{MakePipelineShaders(ctx, accessor, sourcePackedState)},
|
||||||
descriptorInfo{MakePipelineDescriptorInfo(shaderStages, ctx.gpu.traits.quirks.needsIndividualTextureBindingWrites)},
|
descriptorInfo{MakePipelineDescriptorInfo(shaderStages, ctx.gpu.traits.quirks.needsIndividualTextureBindingWrites)},
|
||||||
compiledPipeline{MakeCompiledPipeline(ctx, packedState, shaderStages, descriptorInfo.descriptorSetLayoutBindings)},
|
compiledPipeline{MakeCompiledPipeline(ctx, sourcePackedState, shaderStages, descriptorInfo.descriptorSetLayoutBindings)} {
|
||||||
sourcePackedState{packedState} {
|
|
||||||
storageBufferViews.resize(descriptorInfo.totalStorageBufferCount);
|
storageBufferViews.resize(descriptorInfo.totalStorageBufferCount);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -82,6 +82,8 @@ namespace skyline::gpu::interconnect::maxwell3d {
|
|||||||
u32 totalImageDescCount;
|
u32 totalImageDescCount;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
PackedPipelineState sourcePackedState;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
std::vector<CachedMappedBufferView> storageBufferViews;
|
std::vector<CachedMappedBufferView> storageBufferViews;
|
||||||
u32 lastExecutionNumber{}; //!< The last execution number this pipeline was used at
|
u32 lastExecutionNumber{}; //!< The last execution number this pipeline was used at
|
||||||
@ -99,8 +101,6 @@ namespace skyline::gpu::interconnect::maxwell3d {
|
|||||||
cache::GraphicsPipelineCache::CompiledPipeline compiledPipeline;
|
cache::GraphicsPipelineCache::CompiledPipeline compiledPipeline;
|
||||||
size_t sampledImageCount{};
|
size_t sampledImageCount{};
|
||||||
|
|
||||||
PackedPipelineState sourcePackedState;
|
|
||||||
|
|
||||||
Pipeline(InterconnectContext &ctx, const PipelineStateAccessor &accessor, const PackedPipelineState &packedState);
|
Pipeline(InterconnectContext &ctx, const PipelineStateAccessor &accessor, const PackedPipelineState &packedState);
|
||||||
|
|
||||||
Pipeline *LookupNext(const PackedPipelineState &packedState);
|
Pipeline *LookupNext(const PackedPipelineState &packedState);
|
||||||
|
Loading…
Reference in New Issue
Block a user