Implement descriptor update batching and push descriptors

Batching helps to avoid the need to attach so many objects to the fence cycle, which ends up taking a fair bit of time due to the allocation required.
This commit is contained in:
Billy Laws 2022-10-09 13:58:50 +01:00
parent 62a165b51e
commit 9ce848d4e0
3 changed files with 47 additions and 23 deletions

View File

@ -26,15 +26,23 @@ namespace skyline::gpu::interconnect::maxwell3d {
textures{manager, registerBundle.texturePoolRegisters},
directState{activeState.directState} {
executor.AddFlushCallback([this] {
if (attachedDescriptorSets) {
ctx.executor.AttachDependency(attachedDescriptorSets);
attachedDescriptorSets = nullptr;
activeDescriptorSet = nullptr;
}
activeState.MarkAllDirty();
constantBuffers.MarkAllDirty();
samplers.MarkAllDirty();
textures.MarkAllDirty();
quadConversionBufferAttached = false;
constantBuffers.DisableQuickBind();
});
executor.AddPipelineChangeCallback([this] {
activeState.MarkAllDirty();
activeDescriptorSet = nullptr;
});
}
@ -232,14 +240,23 @@ namespace skyline::gpu::interconnect::maxwell3d {
builder.SetPipeline(pipeline->compiledPipeline.pipeline);
if (descUpdateInfo) {
auto newSet{std::make_shared<DescriptorAllocator::ActiveDescriptorSet>(ctx.gpu.descriptor.AllocateSet(descUpdateInfo->descriptorSetLayout))};
ctx.executor.cycle->AttachObject(newSet);
if (ctx.gpu.traits.supportsPushDescriptors) {
builder.SetDescriptorSetWithPush(descUpdateInfo);
} else {
if (!attachedDescriptorSets)
attachedDescriptorSets = std::make_shared<boost::container::static_vector<DescriptorAllocator::ActiveDescriptorSet, DescriptorBatchSize>>();
// Descriptor set lifetime is bound to the current cycle so we can safely use a raw pointer from now on
auto *oldSet{activeDescriptorSet};
activeDescriptorSet = newSet.get();
auto newSet{&attachedDescriptorSets->emplace_back(ctx.gpu.descriptor.AllocateSet(descUpdateInfo->descriptorSetLayout))};
auto *oldSet{activeDescriptorSet};
activeDescriptorSet = newSet;
builder.SetDescriptorSetWithUpdate(descUpdateInfo, activeDescriptorSet, oldSet);
builder.SetDescriptorSetWithUpdate(descUpdateInfo, activeDescriptorSet, oldSet);
if (attachedDescriptorSets->size() == DescriptorBatchSize) {
ctx.executor.AttachDependency(attachedDescriptorSets);
attachedDescriptorSets.reset();
}
}
}
auto stateUpdater{builder.Build()};

View File

@ -48,6 +48,8 @@ namespace skyline::gpu::interconnect::maxwell3d {
std::shared_ptr<memory::Buffer> quadConversionBuffer{};
bool quadConversionBufferAttached{};
static constexpr size_t DescriptorBatchSize{0x100};
std::shared_ptr<boost::container::static_vector<DescriptorAllocator::ActiveDescriptorSet, DescriptorBatchSize>> attachedDescriptorSets;
DescriptorAllocator::ActiveDescriptorSet *activeDescriptorSet{};
size_t UpdateQuadConversionBuffer(u32 count, u32 firstVertex);

View File

@ -205,31 +205,36 @@ namespace skyline::gpu::interconnect::maxwell3d {
}
}
// Set the destination/(source) descriptor set(s) for all writes/(copies)
for (auto &write : updateInfo->writes)
write.dstSet = **dstSet;
if constexpr (PushDescriptor) {
commandBuffer.pushDescriptorSetKHR(updateInfo->bindPoint, updateInfo->pipelineLayout, updateInfo->descriptorSetIndex, updateInfo->writes);
} else {
// Set the destination/(source) descriptor set(s) for all writes/(copies)
for (auto &write : updateInfo->writes)
write.dstSet = **dstSet;
for (auto &copy : updateInfo->copies) {
copy.dstSet = **dstSet;
copy.srcSet = **srcSet;
for (auto &copy : updateInfo->copies) {
copy.dstSet = **dstSet;
copy.srcSet = **srcSet;
}
// Perform the updates, doing copies first to avoid overwriting
if (!updateInfo->copies.empty())
gpu.vkDevice.updateDescriptorSets({}, updateInfo->copies);
if (!updateInfo->writes.empty())
gpu.vkDevice.updateDescriptorSets(updateInfo->writes, {});
// Bind the updated descriptor set and we're done!
commandBuffer.bindDescriptorSets(updateInfo->bindPoint, updateInfo->pipelineLayout, updateInfo->descriptorSetIndex, **dstSet, {});
}
// Perform the updates, doing copies first to avoid overwriting
if (!updateInfo->copies.empty())
gpu.vkDevice.updateDescriptorSets({}, updateInfo->copies);
if (!updateInfo->writes.empty())
gpu.vkDevice.updateDescriptorSets(updateInfo->writes, {});
// Bind the updated descriptor set and we're done!
commandBuffer.bindDescriptorSets(updateInfo->bindPoint, updateInfo->pipelineLayout, updateInfo->descriptorSetIndex, **dstSet, {});
}
DescriptorUpdateInfo *updateInfo;
DescriptorAllocator::ActiveDescriptorSet *srcSet;
DescriptorAllocator::ActiveDescriptorSet *dstSet;
};
using SetDescriptorSetWithUpdateCmd = CmdHolder<SetDescriptorSetWithUpdateCmdImpl>;
using SetDescriptorSetWithUpdateCmd = CmdHolder<SetDescriptorSetCmdImpl<false>>;
using SetDescriptorSetWithPushCmd = CmdHolder<SetDescriptorSetCmdImpl<true>>;
struct SetPipelineCmdImpl {
void Record(GPU &gpu, vk::raii::CommandBuffer &commandBuffer) {