Introduce support for waiting for submission to FenceCycle

Introducing async record resulted in breaking the assumption that any work submitted through command scheduler would be submitted in order with graphics submits. Since async record now unlocks the texture before it's submitted a seperate mechanism is needed to ensure ordering of submits. This is achieved by building support into fence cycle itself, with a conditional variable that is waited on for submission before any fence waits occur.
2025-02-22 12:57:10 +01:00 · 2022-09-29 21:24:02 +01:00 · 2022-09-29 21:24:02 +01:00 · 7dc3dde815
commit 7dc3dde815
parent 54b85583ae
3 changed files with 50 additions and 1 deletions
--- a/app/src/main/cpp/skyline/gpu/command_scheduler.cpp
+++ b/app/src/main/cpp/skyline/gpu/command_scheduler.cpp
@ -85,6 +85,7 @@ namespace skyline::gpu {
            }, cycle->fence);
        }

+        cycle->NotifySubmitted();
        cycleQueue.Push(cycle);
    }
 }
--- a/app/src/main/cpp/skyline/gpu/fence_cycle.h
+++ b/app/src/main/cpp/skyline/gpu/fence_cycle.h
@ -3,6 +3,7 @@

 #pragma once

+#include <condition_variable>
 #include <vulkan/vulkan_raii.hpp>
 #include <common.h>
 #include <common/atomic_forward_list.h>
@ -20,6 +21,9 @@ namespace skyline::gpu {
        std::atomic_flag signalled{}; //!< If the underlying fence has been signalled since the creation of this FenceCycle, this doesn't necessarily mean the dependencies have been destroyed
        std::atomic_flag alreadyDestroyed{}; //!< If the cycle's dependencies are already destroyed, this prevents multiple destructions
        const vk::raii::Device &device;
+        std::recursive_timed_mutex mutex;
+        std::condition_variable_any submitCondition;
+        bool submitted{}; //!< If the fence has been submitted to the GPU
        vk::Fence fence;

        friend CommandScheduler;
@ -54,11 +58,21 @@ namespace skyline::gpu {
            DestroyDependencies();
        }

+        /**
+         * @brief Waits for submission of the command buffer associated with this cycle to the GPU
+         */
+        void WaitSubmit() {
+            std::unique_lock lock{mutex};
+            submitCondition.wait(lock, [this] { return submitted; });
+        }
+
        /**
         * @brief Wait on a fence cycle till it has been signalled
         * @param shouldDestroy If true, the dependencies of this cycle will be destroyed after the fence is signalled
         */
        void Wait(bool shouldDestroy = false) {
+            std::unique_lock lock{mutex};
+
            if (signalled.test(std::memory_order_consume)) {
                if (shouldDestroy)
                    DestroyDependencies();
@ -69,6 +83,8 @@ namespace skyline::gpu {
                cycle->Wait(shouldDestroy);
            });

+            submitCondition.wait(lock, [&] { return submitted; });
+
            vk::Result waitResult;
            while ((waitResult = (*device).waitForFences(1, &fence, false, std::numeric_limits<u64>::max(), *device.getDispatcher())) != vk::Result::eSuccess) {
                if (waitResult == vk::Result::eTimeout)
@ -93,6 +109,10 @@ namespace skyline::gpu {
         * @return If the wait was successful or timed out
         */
        bool Wait(i64 timeoutNs, bool shouldDestroy = false) {
+            std::unique_lock lock{mutex, std::defer_lock};
+            if (!lock.try_lock_for(std::chrono::nanoseconds{timeoutNs}))
+                return false;
+
            if (signalled.test(std::memory_order_consume)) {
                if (shouldDestroy)
                    DestroyDependencies();
@ -108,6 +128,11 @@ namespace skyline::gpu {
            }))
                return false;

+            if (!submitCondition.wait_for(lock, std::chrono::nanoseconds(timeoutNs), [&] { return submitted; }))
+                return false;
+
+            timeoutNs = std::max<i64>(0, initialTimeout - (util::GetTimeNs() - startTime));
+
            vk::Result waitResult;
            while ((waitResult = (*device).waitForFences(1, &fence, false, static_cast<u64>(timeoutNs), *device.getDispatcher())) != vk::Result::eSuccess) {
                if (waitResult == vk::Result::eTimeout)
@ -140,6 +165,10 @@ namespace skyline::gpu {
         * @return If the fence is signalled currently or not
         */
        bool Poll(bool quick = true, bool shouldDestroy = false) {
+            std::unique_lock lock{mutex, std::try_to_lock};
+            if (!lock)
+                return false;
+
            if (signalled.test(std::memory_order_consume)) {
                if (shouldDestroy)
                    DestroyDependencies();
@ -152,6 +181,9 @@ namespace skyline::gpu {
            if (!chainedCycles.AllOf([=](auto &cycle) { return cycle->Poll(quick, shouldDestroy); }))
                return false;

+            if (!submitted)
+                return false;
+
            auto status{(*device).getFenceStatus(fence, *device.getDispatcher())};
            if (status == vk::Result::eSuccess) {
                signalled.test_and_set(std::memory_order_release);
@ -185,8 +217,17 @@ namespace skyline::gpu {
         * @param cycle The cycle to chain to this one, this is nullable and this function will be a no-op if this is nullptr
         */
        void ChainCycle(const std::shared_ptr<FenceCycle> &cycle) {
-            if (cycle && !signalled.test(std::memory_order_consume) && cycle.get() != this && cycle->Poll())
+            if (cycle && !signalled.test(std::memory_order_consume) && cycle.get() != this && !cycle->Poll())
                chainedCycles.Append(cycle); // If the cycle isn't the current cycle or already signalled, we need to chain it
        }
+
+        /**
+         * @brief Notifies all waiters that the command buffer associated with this cycle has been submitted
+         */
+        void NotifySubmitted() {
+            std::scoped_lock lock{mutex};
+            submitted = true;
+            submitCondition.notify_all();
+        }
    };
 }
--- a/app/src/main/cpp/skyline/gpu/texture/texture.cpp
+++ b/app/src/main/cpp/skyline/gpu/texture/texture.cpp
@ -699,6 +699,8 @@ namespace skyline::gpu {

        auto stagingBuffer{SynchronizeHostImpl()};
        if (stagingBuffer) {
+            if (cycle)
+                cycle->WaitSubmit();
            auto lCycle{gpu.scheduler.Submit([&](vk::raii::CommandBuffer &commandBuffer) {
                CopyFromStagingBuffer(commandBuffer, stagingBuffer);
            })};
@ -810,6 +812,10 @@ namespace skyline::gpu {
    void Texture::CopyFrom(std::shared_ptr<Texture> source, const vk::ImageSubresourceRange &subresource) {
        WaitOnBacking();
        source->WaitOnBacking();
+        if (cycle)
+            cycle->WaitSubmit();
+        if (source->cycle)
+            source->cycle->WaitSubmit();

        if (source->layout == vk::ImageLayout::eUndefined)
            throw exception("Cannot copy from image with undefined layout");
@ -889,6 +895,7 @@ namespace skyline::gpu {
        })};
        lCycle->AttachObjects(std::move(source), shared_from_this());
        lCycle->ChainCycle(cycle);
+        lCycle->ChainCycle(source->cycle);
        cycle = lCycle;
    }
 }