diff --git a/app/src/main/cpp/skyline/gpu/interconnect/command_executor.cpp b/app/src/main/cpp/skyline/gpu/interconnect/command_executor.cpp index 94e8d38b..f20c7a08 100644 --- a/app/src/main/cpp/skyline/gpu/interconnect/command_executor.cpp +++ b/app/src/main/cpp/skyline/gpu/interconnect/command_executor.cpp @@ -56,7 +56,8 @@ namespace skyline::gpu::interconnect { fence{gpu.vkDevice, vk::FenceCreateInfo{ .flags = vk::FenceCreateFlagBits::eSignaled }}, semaphore{gpu.vkDevice, vk::SemaphoreCreateInfo{}}, cycle{std::make_shared(gpu.vkDevice, *fence, *semaphore, true)}, - nodes{allocator} { + nodes{allocator}, + pendingPostRenderPassNodes{allocator} { Begin(); } @@ -68,6 +69,7 @@ namespace skyline::gpu::interconnect { cycle{std::move(other.cycle)}, allocator{std::move(other.allocator)}, nodes{std::move(other.nodes)}, + pendingPostRenderPassNodes{std::move(other.pendingPostRenderPassNodes)}, ready{other.ready} {} std::shared_ptr CommandRecordThread::Slot::Reset(GPU &gpu) { @@ -372,9 +374,11 @@ namespace skyline::gpu::interconnect { // We need to create a render pass if one doesn't already exist or the current one isn't compatible if (renderPass != nullptr) { slot->nodes.emplace_back(std::in_place_type_t()); + slot->nodes.splice(slot->nodes.end(), slot->pendingPostRenderPassNodes); renderPassIndex++; } renderPass = &std::get(slot->nodes.emplace_back(std::in_place_type_t(), renderArea)); + renderPassIt = std::prev(slot->nodes.end()); addSubpass(); subpassCount = 1; } else if (!attachmentsMatch) { @@ -399,6 +403,7 @@ namespace skyline::gpu::interconnect { void CommandExecutor::FinishRenderPass() { if (renderPass) { slot->nodes.emplace_back(std::in_place_type_t()); + slot->nodes.splice(slot->nodes.end(), slot->pendingPostRenderPassNodes); renderPassIndex++; renderPass = nullptr; @@ -502,6 +507,22 @@ namespace skyline::gpu::interconnect { slot->nodes.emplace_back(std::in_place_type_t(), std::forward(function)); } + void CommandExecutor::AddCommand(std::function &, GPU &)> &&function) { + slot->nodes.emplace_back(std::in_place_type_t(), std::forward(function)); + } + + void CommandExecutor::InsertPreExecuteCommand(std::function &, GPU &)> &&function) { + slot->nodes.emplace(slot->nodes.begin(), std::in_place_type_t(), std::forward(function)); + } + + void CommandExecutor::InsertPreRpCommand(std::function &, GPU &)> &&function) { + slot->nodes.emplace(renderPass ? renderPassIt : slot->nodes.end(), std::in_place_type_t(), std::forward(function)); + } + + void CommandExecutor::InsertPostRpCommand(std::function &, GPU &)> &&function) { + slot->pendingPostRenderPassNodes.emplace_back(std::in_place_type_t(), std::forward(function)); + } + void CommandExecutor::AddFullBarrier() { AddOutsideRpCommand([](vk::raii::CommandBuffer &commandBuffer, const std::shared_ptr &, GPU &) { RecordFullBarrier(commandBuffer); @@ -570,6 +591,10 @@ namespace skyline::gpu::interconnect { callback(); } + std::optional CommandExecutor::GetRenderPassIndex() { + return renderPassIndex; + } + u32 CommandExecutor::AddCheckpointImpl(std::string_view annotation) { if (renderPass) FinishRenderPass(); @@ -589,6 +614,8 @@ namespace skyline::gpu::interconnect { if (renderPass) FinishRenderPass(); + slot->nodes.splice(slot->nodes.end(), slot->pendingPostRenderPassNodes); + { slot->WaitReady(); diff --git a/app/src/main/cpp/skyline/gpu/interconnect/command_executor.h b/app/src/main/cpp/skyline/gpu/interconnect/command_executor.h index 024fe133..1d8cde1f 100644 --- a/app/src/main/cpp/skyline/gpu/interconnect/command_executor.h +++ b/app/src/main/cpp/skyline/gpu/interconnect/command_executor.h @@ -41,6 +41,7 @@ namespace skyline::gpu::interconnect { std::shared_ptr cycle; LinearAllocatorState<> allocator; std::list> nodes; + std::list> pendingPostRenderPassNodes; std::mutex beginLock; std::condition_variable beginCondition; ContextTag executionTag; @@ -148,6 +149,7 @@ namespace skyline::gpu::interconnect { ExecutionWaiterThread waiterThread; std::optional checkpointPollerThread; node::RenderPassNode *renderPass{}; + std::list>::iterator renderPassIt; size_t subpassCount{}; //!< The number of subpasses in the current render pass u32 renderPassIndex{}; bool preserveLocked{}; @@ -309,6 +311,26 @@ namespace skyline::gpu::interconnect { */ void AddOutsideRpCommand(std::function &, GPU &)> &&function); + /** + * @brief Adds a command that can be executed inside or outside of an RP + */ + void AddCommand(std::function &, GPU &)> &&function); + + /** + * @brief Inserts the input command into the node list at the beginning of the execution + */ + void InsertPreExecuteCommand(std::function &, GPU &)> &&function); + + /** + * @brief Inserts the input command into the node list before the current RP begins (or immediately if not in an RP) + */ + void InsertPreRpCommand(std::function &, GPU &)> &&function); + + /** + * @brief Inserts the input command into the node list after the current RP (or execution) finishes + */ + void InsertPostRpCommand(std::function &, GPU &)> &&function); + /** * @brief Adds a full pipeline barrier to the command buffer */ @@ -329,6 +351,8 @@ namespace skyline::gpu::interconnect { */ void NotifyPipelineChange(); + std::optional GetRenderPassIndex(); + /** * @brief Records a checkpoint into the GPU command stream at the current * @param annotation A string annotation to display in perfetto for this checkpoint