Introduce several new node insertion functions for use with queries

Queries need the ability to insert commands at the beginning and end of RPs.
This commit is contained in:
Billy Laws 2023-03-21 22:34:27 +00:00
parent 9a51b5f54e
commit 202c97a1eb
2 changed files with 52 additions and 1 deletions

View File

@ -56,7 +56,8 @@ namespace skyline::gpu::interconnect {
fence{gpu.vkDevice, vk::FenceCreateInfo{ .flags = vk::FenceCreateFlagBits::eSignaled }}, fence{gpu.vkDevice, vk::FenceCreateInfo{ .flags = vk::FenceCreateFlagBits::eSignaled }},
semaphore{gpu.vkDevice, vk::SemaphoreCreateInfo{}}, semaphore{gpu.vkDevice, vk::SemaphoreCreateInfo{}},
cycle{std::make_shared<FenceCycle>(gpu.vkDevice, *fence, *semaphore, true)}, cycle{std::make_shared<FenceCycle>(gpu.vkDevice, *fence, *semaphore, true)},
nodes{allocator} { nodes{allocator},
pendingPostRenderPassNodes{allocator} {
Begin(); Begin();
} }
@ -68,6 +69,7 @@ namespace skyline::gpu::interconnect {
cycle{std::move(other.cycle)}, cycle{std::move(other.cycle)},
allocator{std::move(other.allocator)}, allocator{std::move(other.allocator)},
nodes{std::move(other.nodes)}, nodes{std::move(other.nodes)},
pendingPostRenderPassNodes{std::move(other.pendingPostRenderPassNodes)},
ready{other.ready} {} ready{other.ready} {}
std::shared_ptr<FenceCycle> CommandRecordThread::Slot::Reset(GPU &gpu) { std::shared_ptr<FenceCycle> CommandRecordThread::Slot::Reset(GPU &gpu) {
@ -372,9 +374,11 @@ namespace skyline::gpu::interconnect {
// We need to create a render pass if one doesn't already exist or the current one isn't compatible // We need to create a render pass if one doesn't already exist or the current one isn't compatible
if (renderPass != nullptr) { if (renderPass != nullptr) {
slot->nodes.emplace_back(std::in_place_type_t<node::RenderPassEndNode>()); slot->nodes.emplace_back(std::in_place_type_t<node::RenderPassEndNode>());
slot->nodes.splice(slot->nodes.end(), slot->pendingPostRenderPassNodes);
renderPassIndex++; renderPassIndex++;
} }
renderPass = &std::get<node::RenderPassNode>(slot->nodes.emplace_back(std::in_place_type_t<node::RenderPassNode>(), renderArea)); renderPass = &std::get<node::RenderPassNode>(slot->nodes.emplace_back(std::in_place_type_t<node::RenderPassNode>(), renderArea));
renderPassIt = std::prev(slot->nodes.end());
addSubpass(); addSubpass();
subpassCount = 1; subpassCount = 1;
} else if (!attachmentsMatch) { } else if (!attachmentsMatch) {
@ -399,6 +403,7 @@ namespace skyline::gpu::interconnect {
void CommandExecutor::FinishRenderPass() { void CommandExecutor::FinishRenderPass() {
if (renderPass) { if (renderPass) {
slot->nodes.emplace_back(std::in_place_type_t<node::RenderPassEndNode>()); slot->nodes.emplace_back(std::in_place_type_t<node::RenderPassEndNode>());
slot->nodes.splice(slot->nodes.end(), slot->pendingPostRenderPassNodes);
renderPassIndex++; renderPassIndex++;
renderPass = nullptr; renderPass = nullptr;
@ -502,6 +507,22 @@ namespace skyline::gpu::interconnect {
slot->nodes.emplace_back(std::in_place_type_t<node::FunctionNode>(), std::forward<decltype(function)>(function)); slot->nodes.emplace_back(std::in_place_type_t<node::FunctionNode>(), std::forward<decltype(function)>(function));
} }
void CommandExecutor::AddCommand(std::function<void(vk::raii::CommandBuffer &, const std::shared_ptr<FenceCycle> &, GPU &)> &&function) {
slot->nodes.emplace_back(std::in_place_type_t<node::FunctionNode>(), std::forward<decltype(function)>(function));
}
void CommandExecutor::InsertPreExecuteCommand(std::function<void(vk::raii::CommandBuffer &, const std::shared_ptr<FenceCycle> &, GPU &)> &&function) {
slot->nodes.emplace(slot->nodes.begin(), std::in_place_type_t<node::FunctionNode>(), std::forward<decltype(function)>(function));
}
void CommandExecutor::InsertPreRpCommand(std::function<void(vk::raii::CommandBuffer &, const std::shared_ptr<FenceCycle> &, GPU &)> &&function) {
slot->nodes.emplace(renderPass ? renderPassIt : slot->nodes.end(), std::in_place_type_t<node::FunctionNode>(), std::forward<decltype(function)>(function));
}
void CommandExecutor::InsertPostRpCommand(std::function<void(vk::raii::CommandBuffer &, const std::shared_ptr<FenceCycle> &, GPU &)> &&function) {
slot->pendingPostRenderPassNodes.emplace_back(std::in_place_type_t<node::FunctionNode>(), std::forward<decltype(function)>(function));
}
void CommandExecutor::AddFullBarrier() { void CommandExecutor::AddFullBarrier() {
AddOutsideRpCommand([](vk::raii::CommandBuffer &commandBuffer, const std::shared_ptr<FenceCycle> &, GPU &) { AddOutsideRpCommand([](vk::raii::CommandBuffer &commandBuffer, const std::shared_ptr<FenceCycle> &, GPU &) {
RecordFullBarrier(commandBuffer); RecordFullBarrier(commandBuffer);
@ -570,6 +591,10 @@ namespace skyline::gpu::interconnect {
callback(); callback();
} }
std::optional<u32> CommandExecutor::GetRenderPassIndex() {
return renderPassIndex;
}
u32 CommandExecutor::AddCheckpointImpl(std::string_view annotation) { u32 CommandExecutor::AddCheckpointImpl(std::string_view annotation) {
if (renderPass) if (renderPass)
FinishRenderPass(); FinishRenderPass();
@ -589,6 +614,8 @@ namespace skyline::gpu::interconnect {
if (renderPass) if (renderPass)
FinishRenderPass(); FinishRenderPass();
slot->nodes.splice(slot->nodes.end(), slot->pendingPostRenderPassNodes);
{ {
slot->WaitReady(); slot->WaitReady();

View File

@ -41,6 +41,7 @@ namespace skyline::gpu::interconnect {
std::shared_ptr<FenceCycle> cycle; std::shared_ptr<FenceCycle> cycle;
LinearAllocatorState<> allocator; LinearAllocatorState<> allocator;
std::list<node::NodeVariant, LinearAllocator<node::NodeVariant>> nodes; std::list<node::NodeVariant, LinearAllocator<node::NodeVariant>> nodes;
std::list<node::NodeVariant, LinearAllocator<node::NodeVariant>> pendingPostRenderPassNodes;
std::mutex beginLock; std::mutex beginLock;
std::condition_variable beginCondition; std::condition_variable beginCondition;
ContextTag executionTag; ContextTag executionTag;
@ -148,6 +149,7 @@ namespace skyline::gpu::interconnect {
ExecutionWaiterThread waiterThread; ExecutionWaiterThread waiterThread;
std::optional<CheckpointPollerThread> checkpointPollerThread; std::optional<CheckpointPollerThread> checkpointPollerThread;
node::RenderPassNode *renderPass{}; node::RenderPassNode *renderPass{};
std::list<node::NodeVariant, LinearAllocator<node::NodeVariant>>::iterator renderPassIt;
size_t subpassCount{}; //!< The number of subpasses in the current render pass size_t subpassCount{}; //!< The number of subpasses in the current render pass
u32 renderPassIndex{}; u32 renderPassIndex{};
bool preserveLocked{}; bool preserveLocked{};
@ -309,6 +311,26 @@ namespace skyline::gpu::interconnect {
*/ */
void AddOutsideRpCommand(std::function<void(vk::raii::CommandBuffer &, const std::shared_ptr<FenceCycle> &, GPU &)> &&function); void AddOutsideRpCommand(std::function<void(vk::raii::CommandBuffer &, const std::shared_ptr<FenceCycle> &, GPU &)> &&function);
/**
* @brief Adds a command that can be executed inside or outside of an RP
*/
void AddCommand(std::function<void(vk::raii::CommandBuffer &, const std::shared_ptr<FenceCycle> &, GPU &)> &&function);
/**
* @brief Inserts the input command into the node list at the beginning of the execution
*/
void InsertPreExecuteCommand(std::function<void(vk::raii::CommandBuffer &, const std::shared_ptr<FenceCycle> &, GPU &)> &&function);
/**
* @brief Inserts the input command into the node list before the current RP begins (or immediately if not in an RP)
*/
void InsertPreRpCommand(std::function<void(vk::raii::CommandBuffer &, const std::shared_ptr<FenceCycle> &, GPU &)> &&function);
/**
* @brief Inserts the input command into the node list after the current RP (or execution) finishes
*/
void InsertPostRpCommand(std::function<void(vk::raii::CommandBuffer &, const std::shared_ptr<FenceCycle> &, GPU &)> &&function);
/** /**
* @brief Adds a full pipeline barrier to the command buffer * @brief Adds a full pipeline barrier to the command buffer
*/ */
@ -329,6 +351,8 @@ namespace skyline::gpu::interconnect {
*/ */
void NotifyPipelineChange(); void NotifyPipelineChange();
std::optional<u32> GetRenderPassIndex();
/** /**
* @brief Records a checkpoint into the GPU command stream at the current * @brief Records a checkpoint into the GPU command stream at the current
* @param annotation A string annotation to display in perfetto for this checkpoint * @param annotation A string annotation to display in perfetto for this checkpoint