From 07d45ee504fd0f9b59b5e9509c1941bd6dc62d2c Mon Sep 17 00:00:00 2001 From: PixelyIon Date: Sun, 26 Jun 2022 12:26:18 +0530 Subject: [PATCH] Introduce `FenceCycle` Chaining If we want to allow submitting multiple pieces of work to the GPU at once while still requiring CPU synchronization, we'll need to track all past fence cycles associated with a resource alongside the current one. To solve this the concept of chaining fences has been introduced, fences from past usages can be chained to the latest fence which'll then recursively forward operations to chained fences. This change also ends up mandating a move away from `FenceCycleDependency` as it would prevent fences from concurrently locking the same resources which is required for chaining to work as two fences being chained fundamentally means they're locking the same resources. The `AtomicForwardList` is therefore used as the new container. --- app/src/main/cpp/skyline/gpu/buffer.cpp | 2 +- app/src/main/cpp/skyline/gpu/buffer.h | 4 +- app/src/main/cpp/skyline/gpu/fence_cycle.h | 133 ++++++++---------- .../gpu/interconnect/command_executor.cpp | 2 +- .../gpu/interconnect/command_executor.h | 2 +- .../gpu/interconnect/graphics_context.h | 6 +- app/src/main/cpp/skyline/gpu/memory_manager.h | 2 +- .../main/cpp/skyline/gpu/texture/texture.h | 8 +- 8 files changed, 71 insertions(+), 88 deletions(-) diff --git a/app/src/main/cpp/skyline/gpu/buffer.cpp b/app/src/main/cpp/skyline/gpu/buffer.cpp index 33006c81..b0157dc8 100644 --- a/app/src/main/cpp/skyline/gpu/buffer.cpp +++ b/app/src/main/cpp/skyline/gpu/buffer.cpp @@ -198,7 +198,7 @@ namespace skyline::gpu { /** * @brief A FenceCycleDependency that synchronizes the contents of a host buffer with the guest buffer */ - struct BufferGuestSync : public FenceCycleDependency { + struct BufferGuestSync { std::shared_ptr buffer; explicit BufferGuestSync(std::shared_ptr buffer) : buffer(std::move(buffer)) {} diff --git a/app/src/main/cpp/skyline/gpu/buffer.h b/app/src/main/cpp/skyline/gpu/buffer.h index 631c3bed..abd8e7ef 100644 --- a/app/src/main/cpp/skyline/gpu/buffer.h +++ b/app/src/main/cpp/skyline/gpu/buffer.h @@ -20,7 +20,7 @@ namespace skyline::gpu { * @brief A buffer which is backed by host constructs while being synchronized with the underlying guest buffer * @note This class conforms to the Lockable and BasicLockable C++ named requirements */ - class Buffer : public std::enable_shared_from_this, public FenceCycleDependency { + class Buffer : public std::enable_shared_from_this { private: GPU &gpu; std::mutex mutex; //!< Synchronizes any mutations to the buffer or its backing @@ -93,7 +93,7 @@ namespace skyline::gpu { * @brief A delegate for a strong reference to a Buffer by a BufferView which can be changed to another Buffer transparently * @note This class conforms to the Lockable and BasicLockable C++ named requirements */ - struct BufferDelegate : public FenceCycleDependency { + struct BufferDelegate { std::shared_ptr buffer; const Buffer::BufferViewStorage *view; std::function &)> usageCallback; diff --git a/app/src/main/cpp/skyline/gpu/fence_cycle.h b/app/src/main/cpp/skyline/gpu/fence_cycle.h index 70933016..a0966561 100644 --- a/app/src/main/cpp/skyline/gpu/fence_cycle.h +++ b/app/src/main/cpp/skyline/gpu/fence_cycle.h @@ -3,23 +3,13 @@ #pragma once -#include #include #include +#include namespace skyline::gpu { struct FenceCycle; - /** - * @brief Any object whose lifetime can be attached to a fence cycle needs to inherit this class - */ - struct FenceCycleDependency { - private: - std::shared_ptr next{}; //!< A shared pointer to the next dependendency to form a linked list - FenceCycle *cycle{}; //!< A pointer to the fence cycle owning this, used to prevent attaching to the same cycle multiple times but should never be used to access the object due to not being an owning reference nor nullified on the destruction of the owning cycle - friend FenceCycle; - }; - /** * @brief A wrapper around a Vulkan Fence which only tracks a single reset -> signal cycle with the ability to attach lifetimes of objects to it * @note This provides the guarantee that the fence must be signalled prior to destruction when objects are to be destroyed @@ -30,23 +20,20 @@ namespace skyline::gpu { std::atomic_flag signalled; const vk::raii::Device &device; vk::Fence fence; - std::shared_ptr list{}; + + AtomicForwardList> dependencies; //!< A list of all dependencies on this fence cycle + AtomicForwardList> chainedCycles; //!< A list of all chained FenceCycles, this is used to express multi-fence dependencies /** * @brief Sequentially iterate through the shared_ptr linked list of dependencies and reset all pointers in a thread-safe atomic manner - * @note We cannot simply nullify the base pointer of the list as a false dependency chain is maintained between the objects when retained exteranlly + * @note We cannot simply nullify the base pointer of the list as a false dependency chain is maintained between the objects when retained externally */ void DestroyDependencies() { - auto current{std::atomic_exchange_explicit(&list, std::shared_ptr{}, std::memory_order_acquire)}; - while (current) { - std::shared_ptr next{}; - next.swap(current->next); - current.swap(next); - } + dependencies.Clear(); } public: - FenceCycle(const vk::raii::Device &device, vk::Fence fence) : signalled(false), device(device), fence(fence) { + FenceCycle(const vk::raii::Device &device, vk::Fence fence) : signalled{false}, device{device}, fence{fence} { device.resetFences(fence); } @@ -69,6 +56,10 @@ namespace skyline::gpu { if (signalled.test(std::memory_order_consume)) return; + chainedCycles.Iterate([](auto &cycle) { + cycle->Wait(); + }); + vk::Result waitResult; while ((waitResult = (*device).waitForFences(1, &fence, false, std::numeric_limits::max(), *device.getDispatcher())) != vk::Result::eSuccess) { if (waitResult == vk::Result::eTimeout) @@ -90,10 +81,33 @@ namespace skyline::gpu { * @brief Wait on a fence cycle with a timeout in nanoseconds * @return If the wait was successful or timed out */ - bool Wait(std::chrono::duration timeout) { + bool Wait(i64 timeoutNs) { if (signalled.test(std::memory_order_consume)) return true; - if ((*device).waitForFences(1, &fence, false, std::numeric_limits::max(), *device.getDispatcher()) == vk::Result::eSuccess) { + + i64 startTime{util::GetTimeNs()}, initialTimeout{timeoutNs}; + if (!chainedCycles.AllOf([&](auto &cycle) { + if (!cycle->Wait(timeoutNs)) + return false; + timeoutNs = std::max(0, initialTimeout - (util::GetTimeNs() - startTime)); + return true; + })) + return false; + + vk::Result waitResult; + while ((waitResult = (*device).waitForFences(1, &fence, false, static_cast(timeoutNs), *device.getDispatcher())) != vk::Result::eSuccess) { + if (waitResult == vk::Result::eTimeout) + break; + + if (waitResult == vk::Result::eErrorInitializationFailed) { + timeoutNs = std::max(0, initialTimeout - (util::GetTimeNs() - startTime)); + continue; + } + + throw exception("An error occurred while waiting for fence 0x{:X}: {}", static_cast(fence), vk::to_string(waitResult)); + } + + if (waitResult == vk::Result::eSuccess) { if (!signalled.test_and_set(std::memory_order_release)) DestroyDependencies(); return true; @@ -102,12 +116,20 @@ namespace skyline::gpu { } } + bool Wait(std::chrono::duration timeout) { + return Wait(timeout.count()); + } + /** * @return If the fence is signalled currently or not */ bool Poll() { if (signalled.test(std::memory_order_consume)) return true; + + if (!chainedCycles.AllOf([](auto &cycle) { return cycle->Poll(); })) + return false; + auto status{(*device).getFenceStatus(fence, *device.getDispatcher())}; if (status == vk::Result::eSuccess) { if (!signalled.test_and_set(std::memory_order_release)) @@ -121,66 +143,27 @@ namespace skyline::gpu { /** * @brief Attach the lifetime of an object to the fence being signalled */ - void AttachObject(const std::shared_ptr &dependency) { - if (dependency->cycle != this && !signalled.test(std::memory_order_consume)) { - // Only try to insert if the object isn't already owned by this fence cycle and it hasn't been signalled yet - dependency->next = std::atomic_load_explicit(&list, std::memory_order_acquire); - - do { - if (dependency->next == nullptr && signalled.test(std::memory_order_consume)) { - // `list` will be nullptr after being signalled, dependencies will not be destroyed and we need to do so ourselves - dependency->next.reset(); // We need to reset the dependency so it doesn't incorrectly extend the lifetime of any resources - return; - } - } while (!std::atomic_compare_exchange_strong_explicit(&list, &dependency->next, dependency, std::memory_order_release, std::memory_order_acquire)); - - dependency->cycle = this; - } + void AttachObject(const std::shared_ptr &dependency) { + if (!signalled.test(std::memory_order_consume)) + dependencies.Append(dependency); } /** * @brief A version of AttachObject optimized for several objects being attached at once */ - void AttachObjects(std::initializer_list> dependencies) { - if (!signalled.test(std::memory_order_consume)) { - { - auto it{dependencies.begin()}, next{std::next(it)}; - if (it != dependencies.end()) { - // Only try to insert if the object isn't already owned by this fence cycle - for (; next != dependencies.end(); next++) { - if ((*it)->cycle != this) { - (*it)->next = *next; - (*it)->cycle = this; - } - it = next; - } - } - } - - auto &dependency{*dependencies.begin()}; - auto &lastDependency{*std::prev(dependencies.end())}; - lastDependency->next = std::atomic_load_explicit(&list, std::memory_order_acquire); - - do { - if (lastDependency->next == nullptr && signalled.test(std::memory_order_consume)) { - // `list` will be nullptr after being signalled, dependencies will not be destroyed and we need to do so ourselves - - auto current{dependency->next}; // We need to reset any chained dependencies before exiting - while (current) { - std::shared_ptr next{}; - next.swap(current->next); - current.swap(next); - } - - return; - } - } while (!std::atomic_compare_exchange_strong_explicit(&list, &dependency->next, dependency, std::memory_order_release, std::memory_order_acquire)); - } + template + void AttachObjects(Dependencies &&... pDependencies) { + if (!signalled.test(std::memory_order_consume)) + dependencies.Append(pDependencies...); } - template - void AttachObjects(Dependencies &&... dependencies) { - AttachObjects(std::initializer_list>{std::forward(dependencies)...}); + /** + * @brief Chains another cycle to this cycle, this cycle will not be signalled till the supplied cycle is signalled + * @param cycle The cycle to chain to this one, this is nullable and this function will be a no-op if this is nullptr + */ + void ChainCycle(const std::shared_ptr &cycle) { + if (cycle && !signalled.test(std::memory_order_consume) && cycle.get() != this && cycle->Poll()) + chainedCycles.Append(cycle); // If the cycle isn't the current cycle or already signalled, we need to chain it } }; } diff --git a/app/src/main/cpp/skyline/gpu/interconnect/command_executor.cpp b/app/src/main/cpp/skyline/gpu/interconnect/command_executor.cpp index 9db07e3f..d599ab12 100644 --- a/app/src/main/cpp/skyline/gpu/interconnect/command_executor.cpp +++ b/app/src/main/cpp/skyline/gpu/interconnect/command_executor.cpp @@ -115,7 +115,7 @@ namespace skyline::gpu::interconnect { return didLock; } - void CommandExecutor::AttachDependency(const std::shared_ptr &dependency) { + void CommandExecutor::AttachDependency(const std::shared_ptr &dependency) { cycle->AttachObject(dependency); } diff --git a/app/src/main/cpp/skyline/gpu/interconnect/command_executor.h b/app/src/main/cpp/skyline/gpu/interconnect/command_executor.h index 2346e561..7c244e1e 100644 --- a/app/src/main/cpp/skyline/gpu/interconnect/command_executor.h +++ b/app/src/main/cpp/skyline/gpu/interconnect/command_executor.h @@ -114,7 +114,7 @@ namespace skyline::gpu::interconnect { /** * @brief Attach the lifetime of the fence cycle dependency to the command buffer */ - void AttachDependency(const std::shared_ptr &dependency); + void AttachDependency(const std::shared_ptr &dependency); /** * @brief Adds a command that needs to be executed inside a subpass configured with certain attachments diff --git a/app/src/main/cpp/skyline/gpu/interconnect/graphics_context.h b/app/src/main/cpp/skyline/gpu/interconnect/graphics_context.h index 508d68d1..0889e49e 100644 --- a/app/src/main/cpp/skyline/gpu/interconnect/graphics_context.h +++ b/app/src/main/cpp/skyline/gpu/interconnect/graphics_context.h @@ -2043,7 +2043,7 @@ namespace skyline::gpu::interconnect { u32 bindlessTextureConstantBufferIndex{}; std::shared_ptr nullTextureView; //!< View used instead of a null descriptor when an empty TIC is encountered, this avoids the need for the nullDescriptor VK feature - struct PoolTexture : public FenceCycleDependency { + struct PoolTexture { GuestTexture guest; std::weak_ptr view; }; @@ -2338,7 +2338,7 @@ namespace skyline::gpu::interconnect { private: bool tscIndexLinked{}; //!< If the TSC index in bindless texture handles is the same as the TIC index or if it's independent from the TIC index - struct Sampler : public vk::raii::Sampler, public FenceCycleDependency { + struct Sampler : public vk::raii::Sampler { using vk::raii::Sampler::Sampler; }; @@ -2937,7 +2937,7 @@ namespace skyline::gpu::interconnect { }, programState.descriptorSetBindings)}; // Draw Persistent Storage - struct DrawStorage : FenceCycleDependency { + struct DrawStorage { ShaderProgramState::DescriptorSetWrites descriptorSetWrites; std::optional descriptorSet; diff --git a/app/src/main/cpp/skyline/gpu/memory_manager.h b/app/src/main/cpp/skyline/gpu/memory_manager.h index a60bb0a7..2b5da9d0 100644 --- a/app/src/main/cpp/skyline/gpu/memory_manager.h +++ b/app/src/main/cpp/skyline/gpu/memory_manager.h @@ -39,7 +39,7 @@ namespace skyline::gpu::memory { /** * @brief A Buffer that can be independently attached to a fence cycle */ - class StagingBuffer : public Buffer, public FenceCycleDependency { + class StagingBuffer : public Buffer { using Buffer::Buffer; }; diff --git a/app/src/main/cpp/skyline/gpu/texture/texture.h b/app/src/main/cpp/skyline/gpu/texture/texture.h index 907ae184..3d5f3782 100644 --- a/app/src/main/cpp/skyline/gpu/texture/texture.h +++ b/app/src/main/cpp/skyline/gpu/texture/texture.h @@ -293,7 +293,7 @@ namespace skyline::gpu { * @note The object **must** be locked prior to accessing any members as values will be mutated * @note This class conforms to the Lockable and BasicLockable C++ named requirements */ - class TextureView : public FenceCycleDependency, public std::enable_shared_from_this { + class TextureView : public std::enable_shared_from_this { private: vk::ImageView vkView{}; @@ -350,7 +350,7 @@ namespace skyline::gpu { * @brief A texture which is backed by host constructs while being synchronized with the underlying guest texture * @note This class conforms to the Lockable and BasicLockable C++ named requirements */ - class Texture : public std::enable_shared_from_this, public FenceCycleDependency { + class Texture : public std::enable_shared_from_this { private: GPU &gpu; std::mutex mutex; //!< Synchronizes any mutations to the texture or its backing @@ -415,9 +415,9 @@ namespace skyline::gpu { void CopyToGuest(u8 *hostBuffer); /** - * @brief A FenceCycleDependency that copies the contents of a staging buffer or mapped image backing the texture to the guest texture + * @brief A fence cycle dependency that copies the contents of a staging buffer or mapped image backing the texture to the guest texture */ - struct TextureBufferCopy : public FenceCycleDependency { + struct TextureBufferCopy { std::shared_ptr texture; std::shared_ptr stagingBuffer;