From 7dc3dde815e17c4627dbf089b545125c70e0c9b0 Mon Sep 17 00:00:00 2001 From: Billy Laws Date: Thu, 29 Sep 2022 21:24:02 +0100 Subject: [PATCH] Introduce support for waiting for submission to FenceCycle Introducing async record resulted in breaking the assumption that any work submitted through command scheduler would be submitted in order with graphics submits. Since async record now unlocks the texture before it's submitted a seperate mechanism is needed to ensure ordering of submits. This is achieved by building support into fence cycle itself, with a conditional variable that is waited on for submission before any fence waits occur. --- .../cpp/skyline/gpu/command_scheduler.cpp | 1 + app/src/main/cpp/skyline/gpu/fence_cycle.h | 43 ++++++++++++++++++- .../main/cpp/skyline/gpu/texture/texture.cpp | 7 +++ 3 files changed, 50 insertions(+), 1 deletion(-) diff --git a/app/src/main/cpp/skyline/gpu/command_scheduler.cpp b/app/src/main/cpp/skyline/gpu/command_scheduler.cpp index 36f90043..6e63001d 100644 --- a/app/src/main/cpp/skyline/gpu/command_scheduler.cpp +++ b/app/src/main/cpp/skyline/gpu/command_scheduler.cpp @@ -85,6 +85,7 @@ namespace skyline::gpu { }, cycle->fence); } + cycle->NotifySubmitted(); cycleQueue.Push(cycle); } } diff --git a/app/src/main/cpp/skyline/gpu/fence_cycle.h b/app/src/main/cpp/skyline/gpu/fence_cycle.h index 491ddc0b..e5b7b3ca 100644 --- a/app/src/main/cpp/skyline/gpu/fence_cycle.h +++ b/app/src/main/cpp/skyline/gpu/fence_cycle.h @@ -3,6 +3,7 @@ #pragma once +#include #include #include #include @@ -20,6 +21,9 @@ namespace skyline::gpu { std::atomic_flag signalled{}; //!< If the underlying fence has been signalled since the creation of this FenceCycle, this doesn't necessarily mean the dependencies have been destroyed std::atomic_flag alreadyDestroyed{}; //!< If the cycle's dependencies are already destroyed, this prevents multiple destructions const vk::raii::Device &device; + std::recursive_timed_mutex mutex; + std::condition_variable_any submitCondition; + bool submitted{}; //!< If the fence has been submitted to the GPU vk::Fence fence; friend CommandScheduler; @@ -54,11 +58,21 @@ namespace skyline::gpu { DestroyDependencies(); } + /** + * @brief Waits for submission of the command buffer associated with this cycle to the GPU + */ + void WaitSubmit() { + std::unique_lock lock{mutex}; + submitCondition.wait(lock, [this] { return submitted; }); + } + /** * @brief Wait on a fence cycle till it has been signalled * @param shouldDestroy If true, the dependencies of this cycle will be destroyed after the fence is signalled */ void Wait(bool shouldDestroy = false) { + std::unique_lock lock{mutex}; + if (signalled.test(std::memory_order_consume)) { if (shouldDestroy) DestroyDependencies(); @@ -69,6 +83,8 @@ namespace skyline::gpu { cycle->Wait(shouldDestroy); }); + submitCondition.wait(lock, [&] { return submitted; }); + vk::Result waitResult; while ((waitResult = (*device).waitForFences(1, &fence, false, std::numeric_limits::max(), *device.getDispatcher())) != vk::Result::eSuccess) { if (waitResult == vk::Result::eTimeout) @@ -93,6 +109,10 @@ namespace skyline::gpu { * @return If the wait was successful or timed out */ bool Wait(i64 timeoutNs, bool shouldDestroy = false) { + std::unique_lock lock{mutex, std::defer_lock}; + if (!lock.try_lock_for(std::chrono::nanoseconds{timeoutNs})) + return false; + if (signalled.test(std::memory_order_consume)) { if (shouldDestroy) DestroyDependencies(); @@ -108,6 +128,11 @@ namespace skyline::gpu { })) return false; + if (!submitCondition.wait_for(lock, std::chrono::nanoseconds(timeoutNs), [&] { return submitted; })) + return false; + + timeoutNs = std::max(0, initialTimeout - (util::GetTimeNs() - startTime)); + vk::Result waitResult; while ((waitResult = (*device).waitForFences(1, &fence, false, static_cast(timeoutNs), *device.getDispatcher())) != vk::Result::eSuccess) { if (waitResult == vk::Result::eTimeout) @@ -140,6 +165,10 @@ namespace skyline::gpu { * @return If the fence is signalled currently or not */ bool Poll(bool quick = true, bool shouldDestroy = false) { + std::unique_lock lock{mutex, std::try_to_lock}; + if (!lock) + return false; + if (signalled.test(std::memory_order_consume)) { if (shouldDestroy) DestroyDependencies(); @@ -152,6 +181,9 @@ namespace skyline::gpu { if (!chainedCycles.AllOf([=](auto &cycle) { return cycle->Poll(quick, shouldDestroy); })) return false; + if (!submitted) + return false; + auto status{(*device).getFenceStatus(fence, *device.getDispatcher())}; if (status == vk::Result::eSuccess) { signalled.test_and_set(std::memory_order_release); @@ -185,8 +217,17 @@ namespace skyline::gpu { * @param cycle The cycle to chain to this one, this is nullable and this function will be a no-op if this is nullptr */ void ChainCycle(const std::shared_ptr &cycle) { - if (cycle && !signalled.test(std::memory_order_consume) && cycle.get() != this && cycle->Poll()) + if (cycle && !signalled.test(std::memory_order_consume) && cycle.get() != this && !cycle->Poll()) chainedCycles.Append(cycle); // If the cycle isn't the current cycle or already signalled, we need to chain it } + + /** + * @brief Notifies all waiters that the command buffer associated with this cycle has been submitted + */ + void NotifySubmitted() { + std::scoped_lock lock{mutex}; + submitted = true; + submitCondition.notify_all(); + } }; } diff --git a/app/src/main/cpp/skyline/gpu/texture/texture.cpp b/app/src/main/cpp/skyline/gpu/texture/texture.cpp index 4e3a5c47..ec758b0f 100644 --- a/app/src/main/cpp/skyline/gpu/texture/texture.cpp +++ b/app/src/main/cpp/skyline/gpu/texture/texture.cpp @@ -699,6 +699,8 @@ namespace skyline::gpu { auto stagingBuffer{SynchronizeHostImpl()}; if (stagingBuffer) { + if (cycle) + cycle->WaitSubmit(); auto lCycle{gpu.scheduler.Submit([&](vk::raii::CommandBuffer &commandBuffer) { CopyFromStagingBuffer(commandBuffer, stagingBuffer); })}; @@ -810,6 +812,10 @@ namespace skyline::gpu { void Texture::CopyFrom(std::shared_ptr source, const vk::ImageSubresourceRange &subresource) { WaitOnBacking(); source->WaitOnBacking(); + if (cycle) + cycle->WaitSubmit(); + if (source->cycle) + source->cycle->WaitSubmit(); if (source->layout == vk::ImageLayout::eUndefined) throw exception("Cannot copy from image with undefined layout"); @@ -889,6 +895,7 @@ namespace skyline::gpu { })}; lCycle->AttachObjects(std::move(source), shared_from_this()); lCycle->ChainCycle(cycle); + lCycle->ChainCycle(source->cycle); cycle = lCycle; } }