diff --git a/app/src/main/cpp/skyline/gpu/interconnect/maxwell_3d/constant_buffers.cpp b/app/src/main/cpp/skyline/gpu/interconnect/maxwell_3d/constant_buffers.cpp index 4c1ae404..f01fbdbb 100644 --- a/app/src/main/cpp/skyline/gpu/interconnect/maxwell_3d/constant_buffers.cpp +++ b/app/src/main/cpp/skyline/gpu/interconnect/maxwell_3d/constant_buffers.cpp @@ -80,9 +80,24 @@ namespace skyline::gpu::interconnect::maxwell3d { void ConstantBuffers::Bind(InterconnectContext &ctx, engine::ShaderStage stage, size_t index) { auto &view{*selectorState.UpdateGet(ctx).view}; boundConstantBuffers[static_cast(stage)][index] = {view}; + + if (quickBindEnabled && quickBind) + DisableQuickBind(); // We can only quick bind one buffer per draw + else if (quickBindEnabled) + quickBind = QuickBind{index, stage}; } void ConstantBuffers::Unbind(engine::ShaderStage stage, size_t index) { boundConstantBuffers[static_cast(stage)][index] = {}; } + + void ConstantBuffers::ResetQuickBind() { + quickBindEnabled = true; + quickBind.reset(); + } + + void ConstantBuffers::DisableQuickBind() { + quickBindEnabled = false; + quickBind.reset(); + } } \ No newline at end of file diff --git a/app/src/main/cpp/skyline/gpu/interconnect/maxwell_3d/constant_buffers.h b/app/src/main/cpp/skyline/gpu/interconnect/maxwell_3d/constant_buffers.h index 91623139..3569f7e7 100644 --- a/app/src/main/cpp/skyline/gpu/interconnect/maxwell_3d/constant_buffers.h +++ b/app/src/main/cpp/skyline/gpu/interconnect/maxwell_3d/constant_buffers.h @@ -52,6 +52,16 @@ namespace skyline::gpu::interconnect::maxwell3d { public: ConstantBufferSet boundConstantBuffers; + /** + * @brief Allows for a single constant buffer to be bound between two draws without requiring a full descriptor sync + */ + struct QuickBind { + size_t index; //!< The index of the constant buffer to bind + engine::ShaderStage stage; //!< The shader stage to bind the constant buffer to + }; + std::optional quickBind; + bool quickBindEnabled{}; //!< If quick binding can occur, if multiple bindings, constant buffer loads or other engines have been used since the last draw this is disabled + ConstantBuffers(DirtyManager &manager, const ConstantBufferSelectorState::EngineRegisters &constantBufferSelectorRegisters); void MarkAllDirty(); @@ -61,5 +71,15 @@ namespace skyline::gpu::interconnect::maxwell3d { void Bind(InterconnectContext &ctx, engine::ShaderStage stage, size_t index); void Unbind(engine::ShaderStage stage, size_t index); + + /** + * @brief Resets quick binding state to be ready store a new bind, this should be called after every draw + */ + void ResetQuickBind(); + + /** + * @brief Diables quick binding, this should be called before any operation that could impact contents of bound constant buffers + */ + void DisableQuickBind(); }; } diff --git a/app/src/main/cpp/skyline/gpu/interconnect/maxwell_3d/maxwell_3d.cpp b/app/src/main/cpp/skyline/gpu/interconnect/maxwell_3d/maxwell_3d.cpp index 380cff2f..ac06cf4f 100644 --- a/app/src/main/cpp/skyline/gpu/interconnect/maxwell_3d/maxwell_3d.cpp +++ b/app/src/main/cpp/skyline/gpu/interconnect/maxwell_3d/maxwell_3d.cpp @@ -88,6 +88,10 @@ namespace skyline::gpu::interconnect::maxwell3d { constantBuffers.Unbind(stage, index); } + void Maxwell3D::DisableQuickConstantBufferBind() { + constantBuffers.DisableQuickBind(); + } + void Maxwell3D::Clear(engine::ClearSurface &clearSurface) { auto scissor{GetClearScissor()}; if (scissor.extent.width == 0 || scissor.extent.height == 0) diff --git a/app/src/main/cpp/skyline/gpu/interconnect/maxwell_3d/maxwell_3d.h b/app/src/main/cpp/skyline/gpu/interconnect/maxwell_3d/maxwell_3d.h index 0ab3ac3b..5bc713cb 100644 --- a/app/src/main/cpp/skyline/gpu/interconnect/maxwell_3d/maxwell_3d.h +++ b/app/src/main/cpp/skyline/gpu/interconnect/maxwell_3d/maxwell_3d.h @@ -62,6 +62,11 @@ namespace skyline::gpu::interconnect::maxwell3d { */ void BindConstantBuffer(engine::ShaderStage stage, u32 index, bool enable); + /** + * @note See ConstantBuffers::DisableQuickBind + */ + void DisableQuickConstantBufferBind(); + void Clear(engine::ClearSurface &clearSurface); void Draw(engine::DrawTopology topology, bool indexed, u32 count, u32 first, u32 instanceCount, u32 vertexOffset, u32 firstInstance); diff --git a/app/src/main/cpp/skyline/gpu/interconnect/maxwell_3d/pipeline_manager.cpp b/app/src/main/cpp/skyline/gpu/interconnect/maxwell_3d/pipeline_manager.cpp index 9e16c220..6350a914 100644 --- a/app/src/main/cpp/skyline/gpu/interconnect/maxwell_3d/pipeline_manager.cpp +++ b/app/src/main/cpp/skyline/gpu/interconnect/maxwell_3d/pipeline_manager.cpp @@ -221,16 +221,22 @@ namespace skyline::gpu::interconnect::maxwell3d { Pipeline::DescriptorInfo descriptorInfo{}; u32 bindingIndex{}; - for (const auto &stage : shaderStages) { + for (size_t i{}; i < engine::ShaderStageCount; i++) { + const auto &stage{shaderStages[i]}; if (!stage.module) continue; - auto pushBindings{[&](vk::DescriptorType type, const auto &descs, u32 &count, bool individualDescWrites = false) { + auto &stageCbufUsage{descriptorInfo.cbufUsages[i]}; + + auto pushBindings{[&](vk::DescriptorType type, const auto &descs, u32 &count, auto &&descCb, bool individualDescWrites = false) { descriptorInfo.writeDescCount += individualDescWrites ? descs.size() : ((descs.size() > 0) ? 1 : 0); - for (const auto &desc : descs) { + for (u32 descIdx{}; descIdx < descs.size(); descIdx++) { + const auto &desc{descs[descIdx]}; count += desc.count; + descCb(desc, descIdx); + descriptorInfo.descriptorSetLayoutBindings.push_back(vk::DescriptorSetLayoutBinding{ .binding = bindingIndex++, .descriptorType = type, @@ -240,19 +246,32 @@ namespace skyline::gpu::interconnect::maxwell3d { } }}; - pushBindings(vk::DescriptorType::eUniformBuffer, stage.info.constant_buffer_descriptors, descriptorInfo.uniformBufferDescCount); - pushBindings(vk::DescriptorType::eStorageBuffer, stage.info.storage_buffers_descriptors, descriptorInfo.storageBufferDescCount); + pushBindings(vk::DescriptorType::eUniformBuffer, stage.info.constant_buffer_descriptors, descriptorInfo.uniformBufferDescCount, [&](const Shader::ConstantBufferDescriptor &desc, u32 descIdx) { + for (u32 cbufIdx{desc.index}; cbufIdx < desc.index + desc.count; cbufIdx++) { + auto &usage{stageCbufUsage[cbufIdx]}; + usage.uniformBuffers.push_back({bindingIndex, descIdx}); + usage.totalBufferDescCount += desc.count; + usage.writeDescCount++; + } + }); + pushBindings(vk::DescriptorType::eStorageBuffer, stage.info.storage_buffers_descriptors, descriptorInfo.storageBufferDescCount, [&](const Shader::StorageBufferDescriptor &desc, u32 descIdx) { + auto &usage{stageCbufUsage[desc.cbuf_index]}; + usage.storageBuffers.push_back({bindingIndex, descIdx}); + usage.totalBufferDescCount += desc.count; + usage.writeDescCount++; + }); descriptorInfo.totalBufferDescCount += descriptorInfo.uniformBufferDescCount + descriptorInfo.storageBufferDescCount; - pushBindings(vk::DescriptorType::eUniformTexelBuffer, stage.info.texture_buffer_descriptors, descriptorInfo.uniformTexelBufferDescCount); - pushBindings(vk::DescriptorType::eStorageTexelBuffer, stage.info.image_buffer_descriptors, descriptorInfo.storageTexelBufferDescCount); + pushBindings(vk::DescriptorType::eUniformTexelBuffer, stage.info.texture_buffer_descriptors, descriptorInfo.uniformTexelBufferDescCount, [](const auto &, u32) {}); + pushBindings(vk::DescriptorType::eStorageTexelBuffer, stage.info.image_buffer_descriptors, descriptorInfo.storageTexelBufferDescCount, [](const auto &, u32) {}); descriptorInfo.totalTexelBufferDescCount += descriptorInfo.uniformTexelBufferDescCount + descriptorInfo.storageTexelBufferDescCount; - pushBindings(vk::DescriptorType::eCombinedImageSampler, stage.info.texture_descriptors, descriptorInfo.combinedImageSamplerDescCount, needsIndividualTextureBindingWrites); - pushBindings(vk::DescriptorType::eStorageImage, stage.info.image_descriptors, descriptorInfo.storageImageDescCount); + pushBindings(vk::DescriptorType::eCombinedImageSampler, stage.info.texture_descriptors, descriptorInfo.combinedImageSamplerDescCount, [](const auto &, u32) {}, needsIndividualTextureBindingWrites); + pushBindings(vk::DescriptorType::eStorageImage, stage.info.image_descriptors, descriptorInfo.storageImageDescCount, [](const auto &, u32) {}); descriptorInfo.totalImageDescCount += descriptorInfo.combinedImageSamplerDescCount + descriptorInfo.storageImageDescCount; } + descriptorInfo.totalElemCount = descriptorInfo.totalBufferDescCount + descriptorInfo.totalTexelBufferDescCount + descriptorInfo.totalImageDescCount; return descriptorInfo; } @@ -542,6 +561,38 @@ namespace skyline::gpu::interconnect::maxwell3d { transitionCacheNextIdx = (transitionCacheNextIdx + 1) % transitionCache.size(); } + static DynamicBufferBinding GetConstantBufferBinding(InterconnectContext &ctx, const Shader::Info &info, BufferView view, size_t idx) { + ctx.executor.AttachBuffer(view); + + size_t sizeOverride{std::min(info.constant_buffer_used_sizes[idx], view.size)}; + if (auto megaBufferBinding{view.TryMegaBuffer(ctx.executor.cycle, ctx.executor.AcquireMegaBufferAllocator(), ctx.executor.executionNumber, sizeOverride)}) { + return megaBufferBinding; + } else { + view.GetBuffer()->BlockSequencedCpuBackingWrites(); + return view; + } + } + + static DynamicBufferBinding GetStorageBufferBinding(InterconnectContext &ctx, const Shader::Info &info, ConstantBuffer &cbuf, CachedMappedBufferView &cachedView, size_t idx) { + struct SsboDescriptor { + u64 address; + u32 size; + }; + + const auto &desc{info.storage_buffers_descriptors[idx]}; + auto ssbo{cbuf.Read(ctx.executor, desc.cbuf_offset)}; + cachedView.Update(ctx, ssbo.address, ssbo.size); + + auto view{cachedView.view}; + ctx.executor.AttachBuffer(view); + view.GetBuffer()->BlockSequencedCpuBackingWrites(); + + if (desc.is_written) + view.GetBuffer()->MarkGpuDirty(); + + return view; + } + // TODO: EXEC ID FOR STORAGE BUFS PURGE REMAP void Pipeline::SyncDescriptors(InterconnectContext &ctx, ConstantBufferSet &constantBuffers) { u32 bindingIdx{}; @@ -572,41 +623,69 @@ namespace skyline::gpu::interconnect::maxwell3d { for (size_t i{}; i < shaderStages.size(); i++) { const auto &stage{shaderStages[i]}; + if (!stage.module) + continue; + writeBufferDescs(vk::DescriptorType::eUniformBuffer, stage.info.constant_buffer_descriptors, descriptorInfo.uniformBufferDescCount, - [&](const Shader::ConstantBufferDescriptor &desc, size_t descIdx, size_t arrayIdx) -> DynamicBufferBinding { + [&](const Shader::ConstantBufferDescriptor &desc, size_t descIdx, size_t arrayIdx) { size_t cbufIdx{desc.index + arrayIdx}; - auto view{constantBuffers[i][cbufIdx].view}; - - ctx.executor.AttachBuffer(view); - - size_t sizeOverride{std::min(stage.info.constant_buffer_used_sizes[cbufIdx], view.size)}; - if (auto megaBufferBinding{view.TryMegaBuffer(ctx.executor.cycle, ctx.executor.AcquireMegaBufferAllocator(), ctx.executor.executionNumber, sizeOverride)}) { - return megaBufferBinding; - } else { - view.GetBuffer()->BlockSequencedCpuBackingWrites(); - return view; - } + return GetConstantBufferBinding(ctx, stage.info, constantBuffers[i][cbufIdx].view, cbufIdx); }); - writeBufferDescs(vk::DescriptorType::eStorageBuffer, stage.info.storage_buffers_descriptors, descriptorInfo.storageBufferDescCount, [&](const Shader::StorageBufferDescriptor &desc, size_t descIdx, size_t arrayIdx) { - struct SsboDescriptor { - u64 address; - u32 size; - }; - - auto &cbuf{constantBuffers[i][desc.cbuf_index]}; - auto ssbo{cbuf.Read(ctx.executor, desc.cbuf_offset)}; - storageBufferViews[descIdx].Update(ctx, ssbo.address, ssbo.size); - - auto view{storageBufferViews[descIdx].view}; - ctx.executor.AttachBuffer(view); - - if (desc.is_written) - view.GetBuffer()->MarkGpuDirty(); - - return view; + writeBufferDescs(vk::DescriptorType::eStorageBuffer, stage.info.storage_buffers_descriptors, descriptorInfo.storageBufferDescCount, + [&](const Shader::StorageBufferDescriptor &desc, size_t descIdx, size_t arrayIdx) { + return GetStorageBufferBinding(ctx, stage.info, constantBuffers[i][desc.cbuf_index], storageBufferViews[descIdx], descIdx); }); } } + + void Pipeline::SyncDescriptorsQuickBind(InterconnectContext &ctx, ConstantBufferSet &constantBuffers, ConstantBuffers::QuickBind quickBind) { + const auto &cbufUsageInfo{descriptorInfo.cbufUsages[static_cast(quickBind.stage)][quickBind.index]}; + const auto &shaderInfo{shaderStages[static_cast(quickBind.stage)].info}; + auto &stageConstantBuffers{constantBuffers[static_cast(quickBind.stage)]}; + auto copy{ctx.executor.allocator.AllocateUntracked()}; + auto writes{ctx.executor.allocator.AllocateUntracked(cbufUsageInfo.writeDescCount)}; + size_t writeIdx{}; + size_t bufferIdx{}; + + auto bufferDescs{ctx.executor.allocator.AllocateUntracked(cbufUsageInfo.totalBufferDescCount)}; + auto bufferDescViews{ctx.executor.allocator.AllocateUntracked(cbufUsageInfo.totalBufferDescCount)}; + + // TODO: opt this to do partial copy + *copy = vk::CopyDescriptorSet{ + .srcBinding = 0, + .srcArrayElement = 0, + .dstBinding = 0, + .dstArrayElement = 0, + .descriptorCount = descriptorInfo.totalElemCount, + }; + + auto writeBufferDescs{[&](vk::DescriptorType type, const auto &usages, const auto &descs, u32 count, auto getBufferCb) { + for (const auto &usage : usages) { + const auto &shaderDesc{descs[usage.shaderDescIdx]}; + + writes[writeIdx++] = { + .dstBinding = usage.binding, + .descriptorCount = shaderDesc.count, + .descriptorType = type, + .pBufferInfo = &bufferDescs[bufferIdx], + }; + + for (size_t i{}; i < shaderDesc.count; i++) + bufferDescViews[bufferIdx++] = getBufferCb(shaderDesc, usage.shaderDescIdx, i); + } + }}; + + writeBufferDescs(vk::DescriptorType::eUniformBuffer, cbufUsageInfo.uniformBuffers, shaderInfo.constant_buffer_descriptors, descriptorInfo.uniformBufferDescCount, + [&](const Shader::ConstantBufferDescriptor &desc, size_t descIdx, size_t arrayIdx) -> DynamicBufferBinding { + size_t cbufIdx{desc.index + arrayIdx}; + return GetConstantBufferBinding(ctx, shaderInfo, stageConstantBuffers[cbufIdx].view, cbufIdx); + }); + + writeBufferDescs(vk::DescriptorType::eStorageBuffer, cbufUsageInfo.storageBuffers, shaderInfo.storage_buffers_descriptors, descriptorInfo.storageBufferDescCount, + [&](const Shader::StorageBufferDescriptor &desc, size_t descIdx, size_t arrayIdx) { + return GetStorageBufferBinding(ctx, shaderInfo, stageConstantBuffers[desc.cbuf_index], storageBufferViews[bufferIdx], descIdx); + }); + } } diff --git a/app/src/main/cpp/skyline/gpu/interconnect/maxwell_3d/pipeline_manager.h b/app/src/main/cpp/skyline/gpu/interconnect/maxwell_3d/pipeline_manager.h index 3ff02554..3782b7af 100644 --- a/app/src/main/cpp/skyline/gpu/interconnect/maxwell_3d/pipeline_manager.h +++ b/app/src/main/cpp/skyline/gpu/interconnect/maxwell_3d/pipeline_manager.h @@ -40,6 +40,24 @@ namespace skyline::gpu::interconnect::maxwell3d { u32 combinedImageSamplerDescCount{}; u32 storageImageDescCount{}; u32 totalImageDescCount{}; + u32 totalElemCount{}; + + /** + * @brief Keeps track of all bindings that are dependent on a given constant buffer index to allow for quick binding + */ + struct ConstantBufferDescriptorUsages { + struct Usage { + u32 binding; //!< Vulkan binding index + u32 shaderDescIdx; //!< Index of the descriptor in the appropriate shader info member + }; + + boost::container::small_vector uniformBuffers; + boost::container::small_vector storageBuffers; + u32 totalBufferDescCount{}; + u32 writeDescCount{}; + }; + + std::array, engine::ShaderStageCount> cbufUsages{}; }; private: @@ -61,6 +79,8 @@ namespace skyline::gpu::interconnect::maxwell3d { void AddTransition(Pipeline *next); void SyncDescriptors(InterconnectContext &ctx, ConstantBufferSet &constantBuffers); + + void SyncDescriptorsQuickBind(InterconnectContext &ctx, ConstantBufferSet &constantBuffers, ConstantBuffers::QuickBind quickBind); }; class PipelineManager { diff --git a/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_3d.cpp b/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_3d.cpp index 6f8f9587..76319d10 100644 --- a/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_3d.cpp +++ b/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_3d.cpp @@ -107,6 +107,7 @@ namespace skyline::soc::gm20b::engine::maxwell3d { #undef LOAD_CONSTANT_BUFFER_CALLBACKS default: // When a method other than constant buffer update is called submit our submit the previously built-up update as a batch + interconnect.DisableQuickConstantBufferBind(); interconnect.LoadConstantBuffer(batchLoadConstantBuffer.buffer, batchLoadConstantBuffer.Invalidate()); batchLoadConstantBuffer.Reset(); break; // Continue on here to handle the actual method @@ -177,6 +178,7 @@ namespace skyline::soc::gm20b::engine::maxwell3d { }) ENGINE_STRUCT_CASE(i2m, launchDma, { + FlushEngineState(); i2m.LaunchDma(*registers.i2m); }) @@ -258,7 +260,7 @@ namespace skyline::soc::gm20b::engine::maxwell3d { #define PIPELINE_CALLBACKS(z, idx, data) \ ENGINE_ARRAY_STRUCT_CASE(bindGroups, idx, constantBuffer, { \ - interconnect.BindConstantBuffer(static_cast(idx), constantBuffer.shaderSlot, constantBuffer.valid); \ + interconnect.BindConstantBuffer(static_cast(idx), constantBuffer.shaderSlot, constantBuffer.valid); \ }) BOOST_PP_REPEAT(5, PIPELINE_CALLBACKS, 0) @@ -297,6 +299,8 @@ namespace skyline::soc::gm20b::engine::maxwell3d { interconnect.LoadConstantBuffer(batchLoadConstantBuffer.buffer, batchLoadConstantBuffer.Invalidate()); batchLoadConstantBuffer.Reset(); } + + interconnect.DisableQuickConstantBufferBind(); } __attribute__((always_inline)) void Maxwell3D::CallMethod(u32 method, u32 argument) {