From 1088ed514c95a399ab23515a6c1ae876512d6295 Mon Sep 17 00:00:00 2001 From: Billy Laws Date: Sun, 30 Oct 2022 16:15:00 +0000 Subject: [PATCH] Introduce texture usage system to ensure RPs are split when necessary Vulkan doesn't allow sampling a texture and using it as an RT in the same RP, by tracking the texture usage status and splitting RPs when this occurs we can avoid such potential sync errors. --- .../gpu/interconnect/command_executor.cpp | 53 ++++++++++------ .../gpu/interconnect/command_executor.h | 5 +- .../cpp/skyline/gpu/interconnect/fermi_2d.cpp | 3 +- .../interconnect/maxwell_3d/maxwell_3d.cpp | 9 +-- .../gpu/interconnect/maxwell_3d/maxwell_3d.h | 1 + .../maxwell_3d/pipeline_manager.cpp | 61 ++++++++++++------- .../maxwell_3d/pipeline_manager.h | 18 +++++- .../main/cpp/skyline/gpu/texture/texture.cpp | 9 +++ .../main/cpp/skyline/gpu/texture/texture.h | 20 ++++++ 9 files changed, 128 insertions(+), 51 deletions(-) diff --git a/app/src/main/cpp/skyline/gpu/interconnect/command_executor.cpp b/app/src/main/cpp/skyline/gpu/interconnect/command_executor.cpp index 9ec9bc54..726782a6 100644 --- a/app/src/main/cpp/skyline/gpu/interconnect/command_executor.cpp +++ b/app/src/main/cpp/skyline/gpu/interconnect/command_executor.cpp @@ -172,7 +172,7 @@ namespace skyline::gpu::interconnect { allocator = &slot->allocator; } - bool CommandExecutor::CreateRenderPassWithSubpass(vk::Rect2D renderArea, span inputAttachments, span colorAttachments, TextureView *depthStencilAttachment, bool noSubpassCreation) { + bool CommandExecutor::CreateRenderPassWithSubpass(vk::Rect2D renderArea, span sampledImages, span inputAttachments, span colorAttachments, TextureView *depthStencilAttachment, bool noSubpassCreation) { auto addSubpass{[&] { renderPass->AddSubpass(inputAttachments, colorAttachments, depthStencilAttachment, gpu); @@ -195,35 +195,48 @@ namespace skyline::gpu::interconnect { lastSubpassDepthStencilAttachment = depthStencilAttachment; }}; + span depthStencilAttachmentSpan{depthStencilAttachment ? span(depthStencilAttachment) : span()}; + auto outputAttachmentViews{ranges::views::concat(colorAttachments, depthStencilAttachmentSpan)}; bool attachmentsMatch{ranges::equal(lastSubpassInputAttachments, inputAttachments) && ranges::equal(lastSubpassColorAttachments, colorAttachments) && lastSubpassDepthStencilAttachment == depthStencilAttachment}; - if (renderPass == nullptr || renderPass->renderArea != renderArea || - ((noSubpassCreation || subpassCount >= gpu.traits.quirks.maxSubpassCount) && !attachmentsMatch)) { + bool splitRenderPass{renderPass == nullptr || renderPass->renderArea != renderArea || + ((noSubpassCreation || subpassCount >= gpu.traits.quirks.maxSubpassCount) && !attachmentsMatch) || + !ranges::all_of(outputAttachmentViews, [this] (auto view) { return !view || view->texture->ValidateRenderPassUsage(renderPassIndex, texture::RenderPassUsage::RenderTarget); }) || + !ranges::all_of(sampledImages, [this] (auto view) { return view->texture->ValidateRenderPassUsage(renderPassIndex, texture::RenderPassUsage::Sampled); })}; + + bool gotoNext{}; + if (splitRenderPass) { // We need to create a render pass if one doesn't already exist or the current one isn't compatible - if (renderPass != nullptr) + if (renderPass != nullptr) { slot->nodes.emplace_back(std::in_place_type_t()); + renderPassIndex++; + } renderPass = &std::get(slot->nodes.emplace_back(std::in_place_type_t(), renderArea)); addSubpass(); subpassCount = 1; - return false; - } else { - if (attachmentsMatch) { - // The last subpass had the same attachments, so we can reuse them - return false; - } else { - // The last subpass had different attachments, so we need to create a new one - addSubpass(); - subpassCount++; - return true; - } + } else if (!attachmentsMatch) { + // The last subpass had different attachments, so we need to create a new one + addSubpass(); + subpassCount++; + gotoNext = true; } + + for (auto view : outputAttachmentViews) + if (view) + view->texture->UpdateRenderPassUsage(renderPassIndex, texture::RenderPassUsage::RenderTarget); + + for (auto view : sampledImages) + view->texture->UpdateRenderPassUsage(renderPassIndex, texture::RenderPassUsage::Sampled); + + return gotoNext; } void CommandExecutor::FinishRenderPass() { if (renderPass) { slot->nodes.emplace_back(std::in_place_type_t()); + renderPassIndex++; renderPass = nullptr; subpassCount = 0; @@ -309,8 +322,8 @@ namespace skyline::gpu::interconnect { cycle->AttachObject(dependency); } - void CommandExecutor::AddSubpass(std::function &, GPU &, vk::RenderPass, u32)> &&function, vk::Rect2D renderArea, span inputAttachments, span colorAttachments, TextureView *depthStencilAttachment, bool noSubpassCreation) { - bool gotoNext{CreateRenderPassWithSubpass(renderArea, inputAttachments, colorAttachments, depthStencilAttachment ? &*depthStencilAttachment : nullptr, noSubpassCreation)}; + void CommandExecutor::AddSubpass(std::function &, GPU &, vk::RenderPass, u32)> &&function, vk::Rect2D renderArea, span sampledImages, span inputAttachments, span colorAttachments, TextureView *depthStencilAttachment, bool noSubpassCreation) { + bool gotoNext{CreateRenderPassWithSubpass(renderArea, sampledImages, inputAttachments, colorAttachments, depthStencilAttachment ? &*depthStencilAttachment : nullptr, noSubpassCreation)}; if (gotoNext) slot->nodes.emplace_back(std::in_place_type_t(), std::forward(function)); else @@ -325,7 +338,7 @@ namespace skyline::gpu::interconnect { } void CommandExecutor::AddClearColorSubpass(TextureView *attachment, const vk::ClearColorValue &value) { - bool gotoNext{CreateRenderPassWithSubpass(vk::Rect2D{.extent = attachment->texture->dimensions}, {}, attachment, nullptr)}; + bool gotoNext{CreateRenderPassWithSubpass(vk::Rect2D{.extent = attachment->texture->dimensions}, {}, {}, attachment, nullptr)}; if (renderPass->ClearColorAttachment(0, value, gpu)) { if (gotoNext) slot->nodes.emplace_back(std::in_place_type_t()); @@ -350,7 +363,7 @@ namespace skyline::gpu::interconnect { } void CommandExecutor::AddClearDepthStencilSubpass(TextureView *attachment, const vk::ClearDepthStencilValue &value) { - bool gotoNext{CreateRenderPassWithSubpass(vk::Rect2D{.extent = attachment->texture->dimensions}, {}, {}, attachment)}; + bool gotoNext{CreateRenderPassWithSubpass(vk::Rect2D{.extent = attachment->texture->dimensions}, {}, {}, {}, attachment)}; if (renderPass->ClearDepthStencilAttachment(value, gpu)) { if (gotoNext) slot->nodes.emplace_back(std::in_place_type_t()); @@ -413,6 +426,7 @@ namespace skyline::gpu::interconnect { } texture->cycle = cycle; + texture->UpdateRenderPassUsage(0, texture::RenderPassUsage::None); } } @@ -432,6 +446,7 @@ namespace skyline::gpu::interconnect { attachedTextures.clear(); attachedBuffers.clear(); allocator->Reset(); + renderPassIndex = 0; // Periodically clear preserve attachments just in case there are new waiters which would otherwise end up waiting forever if ((submissionNumber % (*state.settings->executorSlotCount * 2)) == 0) { diff --git a/app/src/main/cpp/skyline/gpu/interconnect/command_executor.h b/app/src/main/cpp/skyline/gpu/interconnect/command_executor.h index a4030145..6371ac55 100644 --- a/app/src/main/cpp/skyline/gpu/interconnect/command_executor.h +++ b/app/src/main/cpp/skyline/gpu/interconnect/command_executor.h @@ -77,6 +77,7 @@ namespace skyline::gpu::interconnect { CommandRecordThread::Slot *slot{}; node::RenderPassNode *renderPass{}; size_t subpassCount{}; //!< The number of subpasses in the current render pass + u32 renderPassIndex{}; bool preserveLocked{}; /** @@ -136,7 +137,7 @@ namespace skyline::gpu::interconnect { * @note This also checks for subpass coalescing and will merge the new subpass with the previous one when possible * @return If the next subpass must be started prior to issuing any commands */ - bool CreateRenderPassWithSubpass(vk::Rect2D renderArea, span inputAttachments, span colorAttachments, TextureView *depthStencilAttachment, bool noSubpassCreation = false); + bool CreateRenderPassWithSubpass(vk::Rect2D renderArea, span sampledImages, span inputAttachments, span colorAttachments, TextureView *depthStencilAttachment, bool noSubpassCreation = false); /** * @brief Ends a render pass if one is currently active and resets all corresponding state @@ -210,7 +211,7 @@ namespace skyline::gpu::interconnect { * @param exclusiveSubpass If this subpass should be the only subpass in a render pass * @note Any supplied texture should be attached prior and not undergo any persistent layout transitions till execution */ - void AddSubpass(std::function &, GPU &, vk::RenderPass, u32)> &&function, vk::Rect2D renderArea, span inputAttachments = {}, span colorAttachments = {}, TextureView *depthStencilAttachment = {}, bool noSubpassCreation = false); + void AddSubpass(std::function &, GPU &, vk::RenderPass, u32)> &&function, vk::Rect2D renderArea, span sampledImages, span inputAttachments = {}, span colorAttachments = {}, TextureView *depthStencilAttachment = {}, bool noSubpassCreation = false); /** * @brief Adds a subpass that clears the entirety of the specified attachment with a color value, it may utilize VK_ATTACHMENT_LOAD_OP_CLEAR for a more efficient clear when possible diff --git a/app/src/main/cpp/skyline/gpu/interconnect/fermi_2d.cpp b/app/src/main/cpp/skyline/gpu/interconnect/fermi_2d.cpp index e097ae44..5570d6a2 100644 --- a/app/src/main/cpp/skyline/gpu/interconnect/fermi_2d.cpp +++ b/app/src/main/cpp/skyline/gpu/interconnect/fermi_2d.cpp @@ -148,7 +148,8 @@ namespace skyline::gpu::interconnect { srcTextureView.get(), dstTextureView.get(), [=](auto &&executionCallback) { auto dst{dstTextureView.get()}; - executor.AddSubpass(std::move(executionCallback), {{static_cast(dstRectX), static_cast(dstRectY)}, {dstRectWidth, dstRectHeight} }, {}, {dst}); + std::array sampledImages{srcTextureView.get()}; + executor.AddSubpass(std::move(executionCallback), {{static_cast(dstRectX), static_cast(dstRectY)}, {dstRectWidth, dstRectHeight} }, sampledImages, {}, {dst}); } ); diff --git a/app/src/main/cpp/skyline/gpu/interconnect/maxwell_3d/maxwell_3d.cpp b/app/src/main/cpp/skyline/gpu/interconnect/maxwell_3d/maxwell_3d.cpp index d649f9e9..24446e0f 100644 --- a/app/src/main/cpp/skyline/gpu/interconnect/maxwell_3d/maxwell_3d.cpp +++ b/app/src/main/cpp/skyline/gpu/interconnect/maxwell_3d/maxwell_3d.cpp @@ -197,7 +197,7 @@ namespace skyline::gpu::interconnect::maxwell3d { std::array colorAttachments{colorView ? &*colorView : nullptr}; ctx.executor.AddSubpass([clearAttachments, clearRects](vk::raii::CommandBuffer &commandBuffer, const std::shared_ptr &, GPU &, vk::RenderPass, u32) { commandBuffer.clearAttachments(clearAttachments, span(clearRects).first(clearAttachments.size())); - }, renderArea, {}, colorView ? colorAttachments : span{}, depthStencilView ? &*depthStencilView : nullptr); + }, renderArea, {}, {}, colorView ? colorAttachments : span{}, depthStencilView ? &*depthStencilView : nullptr); } void Maxwell3D::Draw(engine::DrawTopology topology, bool transformFeedbackEnable, bool indexed, u32 count, u32 first, u32 instanceCount, u32 vertexOffset, u32 firstInstance) { @@ -218,18 +218,19 @@ namespace skyline::gpu::interconnect::maxwell3d { } Pipeline *pipeline{activeState.GetPipeline()}; + activeDescriptorSetSampledImages.resize(pipeline->GetTotalSampledImageCount()); auto *descUpdateInfo{[&]() -> DescriptorUpdateInfo * { if (((oldPipeline == pipeline) || (oldPipeline && oldPipeline->CheckBindingMatch(pipeline))) && constantBuffers.quickBindEnabled) { // If bindings between the old and new pipelines are the same we can reuse the descriptor sets given that quick bind is enabled (meaning that no buffer updates or calls to non-graphics engines have occurred that could invalidate them) if (constantBuffers.quickBind) // If only a single constant buffer has been rebound between draws we can perform a partial descriptor update - return pipeline->SyncDescriptorsQuickBind(ctx, constantBuffers.boundConstantBuffers, samplers, textures, *constantBuffers.quickBind); + return pipeline->SyncDescriptorsQuickBind(ctx, constantBuffers.boundConstantBuffers, samplers, textures, *constantBuffers.quickBind, activeDescriptorSetSampledImages); else return nullptr; } else { // If bindings have changed or quick bind is disabled, perform a full descriptor update - return pipeline->SyncDescriptors(ctx, constantBuffers.boundConstantBuffers, samplers, textures); + return pipeline->SyncDescriptors(ctx, constantBuffers.boundConstantBuffers, samplers, textures, activeDescriptorSetSampledImages); } }()}; @@ -295,7 +296,7 @@ namespace skyline::gpu::interconnect::maxwell3d { if (drawParams->transformFeedbackEnable) commandBuffer.endTransformFeedbackEXT(0, {}, {}); - }, scissor, {}, activeState.GetColorAttachments(), activeState.GetDepthAttachment(), !ctx.gpu.traits.quirks.relaxedRenderPassCompatibility); + }, scissor, activeDescriptorSetSampledImages, {}, activeState.GetColorAttachments(), activeState.GetDepthAttachment(), !ctx.gpu.traits.quirks.relaxedRenderPassCompatibility); constantBuffers.ResetQuickBind(); } diff --git a/app/src/main/cpp/skyline/gpu/interconnect/maxwell_3d/maxwell_3d.h b/app/src/main/cpp/skyline/gpu/interconnect/maxwell_3d/maxwell_3d.h index d1984466..52ae22f9 100644 --- a/app/src/main/cpp/skyline/gpu/interconnect/maxwell_3d/maxwell_3d.h +++ b/app/src/main/cpp/skyline/gpu/interconnect/maxwell_3d/maxwell_3d.h @@ -51,6 +51,7 @@ namespace skyline::gpu::interconnect::maxwell3d { static constexpr size_t DescriptorBatchSize{0x100}; std::shared_ptr> attachedDescriptorSets; DescriptorAllocator::ActiveDescriptorSet *activeDescriptorSet{}; + std::vector activeDescriptorSetSampledImages{}; size_t UpdateQuadConversionBuffer(u32 count, u32 firstVertex); diff --git a/app/src/main/cpp/skyline/gpu/interconnect/maxwell_3d/pipeline_manager.cpp b/app/src/main/cpp/skyline/gpu/interconnect/maxwell_3d/pipeline_manager.cpp index 361ec945..40a9792a 100644 --- a/app/src/main/cpp/skyline/gpu/interconnect/maxwell_3d/pipeline_manager.cpp +++ b/app/src/main/cpp/skyline/gpu/interconnect/maxwell_3d/pipeline_manager.cpp @@ -280,21 +280,25 @@ namespace skyline::gpu::interconnect::maxwell3d { }); pushBindings(vk::DescriptorType::eStorageBuffer, stage.info.storage_buffers_descriptors, stageDescInfo.storageBufferDescCount, [&](const Shader::StorageBufferDescriptor &desc, u32 descIdx) { auto &usage{stageDescInfo.cbufUsages[desc.cbuf_index]}; - usage.storageBuffers.push_back({bindingIndex, descIdx, descriptorInfo.totalStorageBufferCount + descIdx}); + usage.storageBuffers.push_back({bindingIndex, descIdx, descriptorInfo.totalStorageBufferCount}); usage.totalBufferDescCount += desc.count; usage.writeDescCount++; + descriptorInfo.totalStorageBufferCount += desc.count; }); descriptorInfo.totalBufferDescCount += stageDescInfo.uniformBufferDescCount + stageDescInfo.storageBufferDescCount; - descriptorInfo.totalStorageBufferCount += stageDescInfo.storageBufferDescCount; - pushBindings(vk::DescriptorType::eUniformTexelBuffer, stage.info.texture_buffer_descriptors, stageDescInfo.uniformTexelBufferDescCount, [](const auto &, u32) {}); - pushBindings(vk::DescriptorType::eStorageTexelBuffer, stage.info.image_buffer_descriptors, stageDescInfo.storageTexelBufferDescCount, [](const auto &, u32) {}); + pushBindings(vk::DescriptorType::eUniformTexelBuffer, stage.info.texture_buffer_descriptors, stageDescInfo.uniformTexelBufferDescCount, [](const auto &, u32) { + Logger::Warn("Texture buffer descriptors are not supported"); + }); + pushBindings(vk::DescriptorType::eStorageTexelBuffer, stage.info.image_buffer_descriptors, stageDescInfo.storageTexelBufferDescCount, [](const auto &, u32) { + Logger::Warn("Image buffer descriptors are not supported"); + }); descriptorInfo.totalTexelBufferDescCount += stageDescInfo.uniformTexelBufferDescCount + stageDescInfo.storageTexelBufferDescCount; pushBindings(vk::DescriptorType::eCombinedImageSampler, stage.info.texture_descriptors, stageDescInfo.combinedImageSamplerDescCount, [&](const Shader::TextureDescriptor &desc, u32 descIdx) { auto addUsage{[&](auto idx) { - auto &usage{stageDescInfo.cbufUsages[desc.cbuf_index]}; - usage.combinedImageSamplers.push_back({bindingIndex, descIdx}); + auto &usage{stageDescInfo.cbufUsages[idx]}; + usage.combinedImageSamplers.push_back({bindingIndex, descIdx, descriptorInfo.totalCombinedImageSamplerCount}); usage.totalImageDescCount += desc.count; usage.writeDescCount++; }}; @@ -302,8 +306,12 @@ namespace skyline::gpu::interconnect::maxwell3d { addUsage(desc.cbuf_index); if (desc.has_secondary) addUsage(desc.secondary_cbuf_index); + + descriptorInfo.totalCombinedImageSamplerCount += desc.count; }, needsIndividualTextureBindingWrites); - pushBindings(vk::DescriptorType::eStorageImage, stage.info.image_descriptors, stageDescInfo.storageImageDescCount, [](const auto &, u32) {}); + pushBindings(vk::DescriptorType::eStorageImage, stage.info.image_descriptors, stageDescInfo.storageImageDescCount, [](const auto &, u32) { + Logger::Warn("Image descriptors are not supported"); + }); descriptorInfo.totalImageDescCount += stageDescInfo.combinedImageSamplerDescCount + stageDescInfo.storageImageDescCount; } return descriptorInfo; @@ -630,6 +638,10 @@ namespace skyline::gpu::interconnect::maxwell3d { return true; } + u32 Pipeline::GetTotalSampledImageCount() const { + return descriptorInfo.totalCombinedImageSamplerCount; + } + static DynamicBufferBinding GetConstantBufferBinding(InterconnectContext &ctx, const Shader::Info &info, BufferView view, size_t idx) { if (!view) // Return a dummy buffer if the constant buffer isn't bound return BufferBinding{ctx.gpu.megaBufferAllocator.Allocate(ctx.executor.cycle, 0).buffer, 0, PAGE_SIZE}; @@ -687,20 +699,23 @@ namespace skyline::gpu::interconnect::maxwell3d { return {.raw = primaryVal}; } - static vk::DescriptorImageInfo GetTextureBinding(InterconnectContext &ctx, const Shader::TextureDescriptor &desc, Samplers &samplers, Textures &textures, BindlessHandle handle) { + static std::pair GetTextureBinding(InterconnectContext &ctx, const Shader::TextureDescriptor &desc, Samplers &samplers, Textures &textures, BindlessHandle handle) { auto sampler{samplers.GetSampler(ctx, handle.samplerIndex, handle.textureIndex)}; auto texture{textures.GetTexture(ctx, handle.textureIndex, desc.type)}; ctx.executor.AttachTexture(texture); auto view{texture->GetView()}; - return vk::DescriptorImageInfo{ - .sampler = **sampler, - .imageView = view, - .imageLayout = texture->texture->layout, + return { + vk::DescriptorImageInfo{ + .sampler = **sampler, + .imageView = view, + .imageLayout = texture->texture->layout + }, + texture }; } - DescriptorUpdateInfo *Pipeline::SyncDescriptors(InterconnectContext &ctx, ConstantBufferSet &constantBuffers, Samplers &samplers, Textures &textures) { + DescriptorUpdateInfo *Pipeline::SyncDescriptors(InterconnectContext &ctx, ConstantBufferSet &constantBuffers, Samplers &samplers, Textures &textures, span sampledImages) { SyncCachedStorageBufferViews(ctx.executor.executionNumber); u32 writeIdx{}; @@ -712,7 +727,8 @@ namespace skyline::gpu::interconnect::maxwell3d { u32 imageIdx{}; auto imageDescs{ctx.executor.allocator->AllocateUntracked(descriptorInfo.totalImageDescCount)}; - u32 storageBufferIdx{}; // Need to keep track of this since to index into the cached view array + u32 storageBufferIdx{}; // Need to keep track of this to index into the cached view array + u32 combinedImageSamplerIdx{}; // Need to keep track of this to index into the sampled image array u32 bindingIdx{}; /** @@ -781,16 +797,15 @@ namespace skyline::gpu::interconnect::maxwell3d { writeBufferDescs(vk::DescriptorType::eStorageBuffer, stage.info.storage_buffers_descriptors, stageDescInfo.storageBufferDescCount, [&](const Shader::StorageBufferDescriptor &desc, size_t arrayIdx) { - auto binding{GetStorageBufferBinding(ctx, desc, constantBuffers[i][desc.cbuf_index], storageBufferViews[storageBufferIdx - arrayIdx ? 1 : 0])}; - // Storage buffer arrays all share the same view index, so to only increment the index once per array do it at element zero and subtract that for all subsequent array elems (see above) - storageBufferIdx += arrayIdx ? 0 : 1; - return binding; + return GetStorageBufferBinding(ctx, desc, constantBuffers[i][desc.cbuf_index], storageBufferViews[storageBufferIdx++]); }); writeImageDescs(vk::DescriptorType::eCombinedImageSampler, stage.info.texture_descriptors, stageDescInfo.combinedImageSamplerDescCount, [&](const Shader::TextureDescriptor &desc, size_t arrayIdx) { BindlessHandle handle{ReadBindlessHandle(ctx, constantBuffers[i], desc, arrayIdx)}; - return GetTextureBinding(ctx, desc, samplers, textures, handle); + auto binding{GetTextureBinding(ctx, desc, samplers, textures, handle)}; + sampledImages[combinedImageSamplerIdx++] = binding.second; + return binding.first; }, ctx.gpu.traits.quirks.needsIndividualTextureBindingWrites); } @@ -809,7 +824,7 @@ namespace skyline::gpu::interconnect::maxwell3d { }); } - DescriptorUpdateInfo *Pipeline::SyncDescriptorsQuickBind(InterconnectContext &ctx, ConstantBufferSet &constantBuffers, Samplers &samplers, Textures &textures, ConstantBuffers::QuickBind quickBind) { + DescriptorUpdateInfo *Pipeline::SyncDescriptorsQuickBind(InterconnectContext &ctx, ConstantBufferSet &constantBuffers, Samplers &samplers, Textures &textures, ConstantBuffers::QuickBind quickBind, span sampledImages) { SyncCachedStorageBufferViews(ctx.executor.executionNumber); size_t stageIndex{static_cast(quickBind.stage)}; @@ -869,13 +884,15 @@ namespace skyline::gpu::interconnect::maxwell3d { writeDescs.operator()(vk::DescriptorType::eStorageBuffer, cbufUsageInfo.storageBuffers, shaderInfo.storage_buffers_descriptors, [&](auto usage, const Shader::StorageBufferDescriptor &desc, size_t arrayIdx) { - return GetStorageBufferBinding(ctx, desc, stageConstantBuffers[desc.cbuf_index], storageBufferViews[usage.storageBufferIdx]); + return GetStorageBufferBinding(ctx, desc, stageConstantBuffers[desc.cbuf_index], storageBufferViews[usage.entirePipelineIdx + arrayIdx]); }); writeDescs.operator()(vk::DescriptorType::eCombinedImageSampler, cbufUsageInfo.combinedImageSamplers, shaderInfo.texture_descriptors, [&](auto usage, const Shader::TextureDescriptor &desc, size_t arrayIdx) { BindlessHandle handle{ReadBindlessHandle(ctx, stageConstantBuffers, desc, arrayIdx)}; - return GetTextureBinding(ctx, desc, samplers, textures, handle); + auto binding{GetTextureBinding(ctx, desc, samplers, textures, handle)}; + sampledImages[usage.entirePipelineIdx + arrayIdx] = binding.second; + return binding.first; }); // Since we don't implement all descriptor types the number of writes might not match what's expected diff --git a/app/src/main/cpp/skyline/gpu/interconnect/maxwell_3d/pipeline_manager.h b/app/src/main/cpp/skyline/gpu/interconnect/maxwell_3d/pipeline_manager.h index c1320abf..28d43ca9 100644 --- a/app/src/main/cpp/skyline/gpu/interconnect/maxwell_3d/pipeline_manager.h +++ b/app/src/main/cpp/skyline/gpu/interconnect/maxwell_3d/pipeline_manager.h @@ -60,7 +60,7 @@ namespace skyline::gpu::interconnect::maxwell3d { struct Usage { u32 binding; //!< Vulkan binding index u32 shaderDescIdx; //!< Index of the descriptor in the appropriate shader info member - u32 storageBufferIdx; //!< Index of the storage buffer in the per-pipeline storage buffer cache + u32 entirePipelineIdx; //!< Index of the image/storage buffer in the entire pipeline }; boost::container::small_vector uniformBuffers; @@ -78,6 +78,7 @@ namespace skyline::gpu::interconnect::maxwell3d { std::array stages; u32 totalStorageBufferCount; + u32 totalCombinedImageSamplerCount; u32 totalWriteDescCount; u32 totalBufferDescCount; @@ -100,6 +101,7 @@ namespace skyline::gpu::interconnect::maxwell3d { public: cache::GraphicsPipelineCache::CompiledPipeline compiledPipeline; + size_t sampledImageCount{}; PackedPipelineState sourcePackedState; @@ -111,9 +113,19 @@ namespace skyline::gpu::interconnect::maxwell3d { bool CheckBindingMatch(Pipeline *other); - DescriptorUpdateInfo *SyncDescriptors(InterconnectContext &ctx, ConstantBufferSet &constantBuffers, Samplers &samplers, Textures &textures); + u32 GetTotalSampledImageCount() const; - DescriptorUpdateInfo *SyncDescriptorsQuickBind(InterconnectContext &ctx, ConstantBufferSet &constantBuffers, Samplers &samplers, Textures &textures, ConstantBuffers::QuickBind quickBind); + /** + * @brief Creates a descriptor set update from the current GPU state + * @param sampledImages A span of size `GetTotalSampledImageCount()` in which texture view pointers for each sampled image will be written + */ + DescriptorUpdateInfo *SyncDescriptors(InterconnectContext &ctx, ConstantBufferSet &constantBuffers, Samplers &samplers, Textures &textures, span sampledImages); + + /** + * @brief Creates a partial descriptor set update from the current GPU state for only the subset of descriptors changed by the quick bind constant buffer + * @param sampledImages A span of size `GetTotalSampledImageCount()` in which texture view pointers for each sampled image will be written + */ + DescriptorUpdateInfo *SyncDescriptorsQuickBind(InterconnectContext &ctx, ConstantBufferSet &constantBuffers, Samplers &samplers, Textures &textures, ConstantBuffers::QuickBind quickBind, span sampledImages); }; class PipelineManager { diff --git a/app/src/main/cpp/skyline/gpu/texture/texture.cpp b/app/src/main/cpp/skyline/gpu/texture/texture.cpp index 7ca2ff01..d2e545dd 100644 --- a/app/src/main/cpp/skyline/gpu/texture/texture.cpp +++ b/app/src/main/cpp/skyline/gpu/texture/texture.cpp @@ -970,4 +970,13 @@ namespace skyline::gpu { newCycle->AttachObjects(std::move(source), shared_from_this()); cycle = newCycle; } + + bool Texture::ValidateRenderPassUsage(u32 renderPassIndex, texture::RenderPassUsage renderPassUsage) { + return lastRenderPassUsage == renderPassUsage || lastRenderPassIndex != renderPassIndex || lastRenderPassUsage == texture::RenderPassUsage::None; + } + + void Texture::UpdateRenderPassUsage(u32 renderPassIndex, texture::RenderPassUsage renderPassUsage) { + lastRenderPassUsage = renderPassUsage; + lastRenderPassIndex = renderPassIndex; + } } diff --git a/app/src/main/cpp/skyline/gpu/texture/texture.h b/app/src/main/cpp/skyline/gpu/texture/texture.h index 331987e9..808eb4d8 100644 --- a/app/src/main/cpp/skyline/gpu/texture/texture.h +++ b/app/src/main/cpp/skyline/gpu/texture/texture.h @@ -13,6 +13,12 @@ namespace skyline::gpu { namespace texture { + enum class RenderPassUsage : u8 { + None, + Sampled, + RenderTarget + }; + struct Dimensions { u32 width; u32 height; @@ -400,6 +406,9 @@ namespace skyline::gpu { std::vector views; + u32 lastRenderPassIndex{}; //!< The index of the last render pass that used this texture + texture::RenderPassUsage lastRenderPassUsage{texture::RenderPassUsage::None}; //!< The type of usage in the last render pass + friend TextureManager; friend TextureView; @@ -583,5 +592,16 @@ namespace skyline::gpu { bool FrequentlyLocked() { return accumulatedCpuLockCounter >= FrequentlyLockedThreshold; } + + /** + * @brief Checks if the previous usage in the renderpass is compatible with the current one + * @return If the new usage is compatible with the previous usage + */ + bool ValidateRenderPassUsage(u32 renderPassIndex, texture::RenderPassUsage renderPassUsage); + + /** + * @brief Updates renderpass usage tracking information + */ + void UpdateRenderPassUsage(u32 renderPassIndex, texture::RenderPassUsage renderPassUsage); }; }