From b2132fd7aa50aee33c5dea8440b809e7b0b72742 Mon Sep 17 00:00:00 2001
From: PixelyIon <pixelyion@protonmail.com>
Date: Sat, 22 May 2021 22:01:22 +0530
Subject: [PATCH] Implement Fence Cycle, Memory Manager and Command Scheduler

Implements a wrapper over fences to track a single cycle of activation, implement a Vulkan memory manager that wraps the Vulkan-Memory-Allocator library and a command scheduler for scheduling Vulkan command buffers
---
 .gitmodules                                   |   3 +
 .idea/vcs.xml                                 |   1 +
 app/CMakeLists.txt                            |  19 ++-
 app/libraries/vkma                            |   1 +
 app/libraries/vkma.cpp                        |   7 +
 app/src/main/cpp/skyline/gpu.cpp              |  15 +-
 app/src/main/cpp/skyline/gpu.h                |  19 ++-
 .../cpp/skyline/gpu/command_scheduler.cpp     |  53 +++++++
 .../main/cpp/skyline/gpu/command_scheduler.h  |  97 ++++++++++++
 app/src/main/cpp/skyline/gpu/fence_cycle.h    | 149 ++++++++++++++++++
 .../main/cpp/skyline/gpu/memory_manager.cpp   |  78 +++++++++
 app/src/main/cpp/skyline/gpu/memory_manager.h |  47 ++++++
 12 files changed, 470 insertions(+), 19 deletions(-)
 create mode 160000 app/libraries/vkma
 create mode 100644 app/libraries/vkma.cpp
 create mode 100644 app/src/main/cpp/skyline/gpu/command_scheduler.cpp
 create mode 100644 app/src/main/cpp/skyline/gpu/command_scheduler.h
 create mode 100644 app/src/main/cpp/skyline/gpu/fence_cycle.h
 create mode 100644 app/src/main/cpp/skyline/gpu/memory_manager.cpp
 create mode 100644 app/src/main/cpp/skyline/gpu/memory_manager.h
diff --git a/.gitmodules b/.gitmodules
index 4fb679b8..6fbd72f2 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -25,3 +25,6 @@
 [submodule "app/libraries/vkhpp"]
 	path = app/libraries/vkhpp
 	url = https://github.com/KhronosGroup/Vulkan-Hpp
+[submodule "app/libraries/vkma"]
+	path = app/libraries/vkma
+	url = https://github.com/GPUOpen-LibrariesAndSDKs/VulkanMemoryAllocator
diff --git a/.idea/vcs.xml b/.idea/vcs.xml
index f7d4b52b..759c6d4f 100644
--- a/.idea/vcs.xml
+++ b/.idea/vcs.xml
@@ -15,5 +15,6 @@
     <mapping directory="$PROJECT_DIR$/app/libraries/vkhpp/glm" vcs="Git" />
     <mapping directory="$PROJECT_DIR$/app/libraries/vkhpp/glslang" vcs="Git" />
     <mapping directory="$PROJECT_DIR$/app/libraries/vkhpp/tinyxml2" vcs="Git" />
+    <mapping directory="$PROJECT_DIR$/app/libraries/vkma" vcs="Git" />
   </component>
 </project>
\ No newline at end of file
diff --git a/app/CMakeLists.txt b/app/CMakeLists.txt
index e236e265..e96c81cf 100644
--- a/app/CMakeLists.txt
+++ b/app/CMakeLists.txt
@@ -16,14 +16,18 @@ if (uppercase_CMAKE_BUILD_TYPE STREQUAL "RELEASE")
     set(CMAKE_INTERPROCEDURAL_OPTIMIZATION TRUE)
 endif ()
 
+# {fmt}
 add_subdirectory("libraries/fmt")
 
+# TzCode
 add_subdirectory("libraries/tzcode")
 target_compile_options(tzcode PRIVATE -Wno-everything)
 
+# Oboe
 add_subdirectory("libraries/oboe")
 include_directories("libraries/oboe/include")
 
+# LZ4
 set(LZ4_BUILD_CLI OFF CACHE BOOL "Build LZ4 CLI" FORCE)
 set(LZ4_BUILD_LEGACY_LZ4C OFF CACHE BOOL "Build lz4c progam with legacy argument support" FORCE)
 add_subdirectory("libraries/lz4/build/cmake")
@@ -37,9 +41,18 @@ add_compile_definitions(VULKAN_HPP_DISPATCH_LOADER_DYNAMIC=1) # We use the dynam
 include_directories("libraries/vkhpp")
 include_directories("libraries/vkhpp/Vulkan-Headers/include") # We use base Vulkan headers from this to ensure version parity with Vulkan-Hpp
 
-include_directories("libraries/pugixml/src") # We use PugiXML in header-only mode
+# Vulkan Memory Allocator
+include_directories("libraries/vkma/include")
+add_library(vkma STATIC libraries/vkma.cpp)
+target_compile_options(vkma PRIVATE -Wno-everything)
+
+# PugiXML (Header-only mode)
+include_directories("libraries/pugixml/src")
+
+# Frozen
 include_directories("libraries/frozen/include")
 
+# MbedTLS
 find_package(mbedtls REQUIRED CONFIG)
 
 # Perfetto SDK
@@ -75,6 +88,8 @@ add_library(skyline SHARED
         ${source_DIR}/skyline/audio/resampler.cpp
         ${source_DIR}/skyline/audio/adpcm_decoder.cpp
         ${source_DIR}/skyline/gpu.cpp
+        ${source_DIR}/skyline/gpu/memory_manager.cpp
+        ${source_DIR}/skyline/gpu/command_scheduler.cpp
         ${source_DIR}/skyline/gpu/presentation_engine.cpp
         ${source_DIR}/skyline/gpu/texture.cpp
         ${source_DIR}/skyline/soc/gmmu.cpp
@@ -198,5 +213,5 @@ add_library(skyline SHARED
         ${source_DIR}/skyline/services/mmnv/IRequest.cpp
         )
 # target_precompile_headers(skyline PRIVATE ${source_DIR}/skyline/common.h) # PCH will currently break Intellisense
-target_link_libraries(skyline android perfetto fmt lz4_static tzcode oboe mbedtls::mbedcrypto)
+target_link_libraries(skyline android perfetto fmt lz4_static tzcode oboe vkma mbedtls::mbedcrypto)
 target_compile_options(skyline PRIVATE -Wall -Wno-unknown-attributes -Wno-c++20-extensions -Wno-c++17-extensions -Wno-c99-designator -Wno-reorder -Wno-missing-braces -Wno-unused-variable -Wno-unused-private-field)
diff --git a/app/libraries/vkma b/app/libraries/vkma
new file mode 160000
index 00000000..6889faaa
--- /dev/null
+++ b/app/libraries/vkma
@@ -0,0 +1 @@
+Subproject commit 6889faaaa284e7ec514e75ea74b113539021d1ad
diff --git a/app/libraries/vkma.cpp b/app/libraries/vkma.cpp
new file mode 100644
index 00000000..48b0f854
--- /dev/null
+++ b/app/libraries/vkma.cpp
@@ -0,0 +1,7 @@
+// SPDX-License-Identifier: MPL-2.0
+// Copyright © 2021 Skyline Team and Contributors (https://github.com/skyline-emu/)
+
+#define VMA_IMPLEMENTATION
+#define VMA_STATIC_VULKAN_FUNCTIONS 0
+#define VMA_DYNAMIC_VULKAN_FUNCTIONS 0
+#include <vk_mem_alloc.h>
diff --git a/app/src/main/cpp/skyline/gpu.cpp b/app/src/main/cpp/skyline/gpu.cpp
index 931bd59b..19d9ce18 100644
--- a/app/src/main/cpp/skyline/gpu.cpp
+++ b/app/src/main/cpp/skyline/gpu.cpp
@@ -10,10 +10,10 @@ namespace skyline::gpu {
             .pApplicationName = "Skyline",
             .applicationVersion = state.jvm->GetVersionCode(), // Get the application version from JNI
             .pEngineName = "FTX1", // "Fast Tegra X1"
-            .apiVersion = VK_API_VERSION_1_1,
+            .apiVersion = VkApiVersion,
         };
 
-        #ifdef NDEBUG
+        #ifndef NDEBUG
         constexpr std::array<const char *, 0> requiredLayers{};
         #else
         constexpr std::array<const char *, 1> requiredLayers{
@@ -39,18 +39,11 @@ namespace skyline::gpu {
                 throw exception("Cannot find Vulkan layer: \"{}\"", requiredLayer);
         }
 
-        #ifdef NDEBUG
-        constexpr std::array<const char*, 2> requiredInstanceExtensions{
-            VK_KHR_SURFACE_EXTENSION_NAME,
-            VK_KHR_ANDROID_SURFACE_EXTENSION_NAME,
-        };
-        #else
-        constexpr std::array<const char *, 3> requiredInstanceExtensions{
+        constexpr std::array<const char*, 3> requiredInstanceExtensions{
             VK_EXT_DEBUG_REPORT_EXTENSION_NAME,
             VK_KHR_SURFACE_EXTENSION_NAME,
             VK_KHR_ANDROID_SURFACE_EXTENSION_NAME,
         };
-        #endif
 
         auto instanceExtensions{context.enumerateInstanceExtensionProperties()};
         if (state.logger->configLevel >= Logger::LogLevel::Debug) {
@@ -161,5 +154,5 @@ namespace skyline::gpu {
         });
     }
 
-    GPU::GPU(const DeviceState &state) : vkInstance(CreateInstance(state, vkContext)), vkDebugReportCallback(CreateDebugReportCallback(state, vkInstance)), vkPhysicalDevice(CreatePhysicalDevice(state, vkInstance)), vkDevice(CreateDevice(state, vkPhysicalDevice, vkQueueFamilyIndex)), vkQueue(vkDevice, vkQueueFamilyIndex, 0), presentation(state, *this) {}
+    GPU::GPU(const DeviceState &state) : vkInstance(CreateInstance(state, vkContext)), vkDebugReportCallback(CreateDebugReportCallback(state, vkInstance)), vkPhysicalDevice(CreatePhysicalDevice(state, vkInstance)), vkDevice(CreateDevice(state, vkPhysicalDevice, vkQueueFamilyIndex)), vkQueue(vkDevice, vkQueueFamilyIndex, 0), memory(*this), scheduler(*this), presentation(state, *this) {}
 }
diff --git a/app/src/main/cpp/skyline/gpu.h b/app/src/main/cpp/skyline/gpu.h
index 7f42d6f5..67967d76 100644
--- a/app/src/main/cpp/skyline/gpu.h
+++ b/app/src/main/cpp/skyline/gpu.h
@@ -3,6 +3,8 @@
 
 #pragma once
 
+#include "gpu/memory_manager.h"
+#include "gpu/command_scheduler.h"
 #include "gpu/presentation_engine.h"
 
 namespace skyline::gpu {
@@ -22,14 +24,19 @@ namespace skyline::gpu {
         static vk::raii::Device CreateDevice(const DeviceState &state, const vk::raii::PhysicalDevice &physicalDevice, typeof(vk::DeviceQueueCreateInfo::queueCount)& queueConfiguration);
 
       public:
-        vk::raii::Context vkContext; //!< An overarching context for Vulkan with
-        vk::raii::Instance vkInstance; //!< An instance of Vulkan with all application context
-        vk::raii::DebugReportCallbackEXT vkDebugReportCallback; //!< An RAII Vulkan debug report manager which calls into DebugCallback
-        vk::raii::PhysicalDevice vkPhysicalDevice; //!< The underlying physical Vulkan device from which we derieve our logical device
-        typeof(vk::DeviceQueueCreateInfo::queueCount) vkQueueFamilyIndex{}; //!< The index of the family the queue is from
-        vk::raii::Device vkDevice; //!< The logical Vulkan device which we want to render using
+        static constexpr u32 VkApiVersion{VK_API_VERSION_1_1}; //!< The version of core Vulkan that we require
+
+        vk::raii::Context vkContext;
+        vk::raii::Instance vkInstance;
+        vk::raii::DebugReportCallbackEXT vkDebugReportCallback; //!< An RAII Vulkan debug report manager which calls into 'GPU::DebugCallback'
+        vk::raii::PhysicalDevice vkPhysicalDevice;
+        typeof(vk::DeviceQueueCreateInfo::queueCount) vkQueueFamilyIndex{};
+        vk::raii::Device vkDevice;
+        std::mutex queueMutex; //!< Synchronizes access to the queue as it is externally synchronized
         vk::raii::Queue vkQueue; //!< A Vulkan Queue supporting graphics and compute operations
 
+        memory::MemoryManager memory;
+        CommandScheduler scheduler;
         PresentationEngine presentation;
 
         GPU(const DeviceState &state);
diff --git a/app/src/main/cpp/skyline/gpu/command_scheduler.cpp b/app/src/main/cpp/skyline/gpu/command_scheduler.cpp
new file mode 100644
index 00000000..74d509ac
--- /dev/null
+++ b/app/src/main/cpp/skyline/gpu/command_scheduler.cpp
@@ -0,0 +1,53 @@
+// SPDX-License-Identifier: MPL-2.0
+// Copyright © 2021 Skyline Team and Contributors (https://github.com/skyline-emu/)
+
+#include <gpu.h>
+#include "command_scheduler.h"
+
+namespace skyline::gpu {
+    CommandScheduler::CommandBufferSlot::CommandBufferSlot(vk::raii::Device &device, vk::CommandBuffer commandBuffer, vk::raii::CommandPool &pool) : active(true), device(device), commandBuffer(device, commandBuffer, pool), fence(device, vk::FenceCreateInfo{}), cycle(std::make_shared<FenceCycle>(device, *fence)) {}
+
+    bool CommandScheduler::CommandBufferSlot::AllocateIfFree(CommandScheduler::CommandBufferSlot &slot) {
+        if (slot.active.test_and_set(std::memory_order_acq_rel)) {
+            if (slot.cycle->Poll()) {
+                slot.cycle = std::make_shared<FenceCycle>(slot.device, *slot.fence);
+                return true;
+            } else {
+                slot.active.clear(std::memory_order_release);
+            }
+        }
+        return false;
+    }
+
+    CommandScheduler::CommandScheduler(GPU &pGpu) : gpu(pGpu), vkCommandPool(pGpu.vkDevice, vk::CommandPoolCreateInfo{
+        .flags = vk::CommandPoolCreateFlagBits::eTransient | vk::CommandPoolCreateFlagBits::eResetCommandBuffer,
+        .queueFamilyIndex = pGpu.vkQueueFamilyIndex,
+    }) {}
+
+    CommandScheduler::ActiveCommandBuffer CommandScheduler::AllocateCommandBuffer() {
+        auto slot{std::find_if(commandBuffers.begin(), commandBuffers.end(), CommandBufferSlot::AllocateIfFree)};
+        if (slot != commandBuffers.end())
+            return ActiveCommandBuffer(*slot);
+
+        std::scoped_lock lock(mutex);
+        vk::CommandBuffer commandBuffer;
+        vk::CommandBufferAllocateInfo commandBufferAllocateInfo{
+            .commandPool = *vkCommandPool,
+            .level = vk::CommandBufferLevel::ePrimary,
+            .commandBufferCount = 1,
+        };
+
+        auto result{(*gpu.vkDevice).allocateCommandBuffers(&commandBufferAllocateInfo, &commandBuffer, *gpu.vkDevice.getDispatcher())};
+        if (result != vk::Result::eSuccess)
+            vk::throwResultException(result, __builtin_FUNCTION());
+        return ActiveCommandBuffer(commandBuffers.emplace_back(gpu.vkDevice, commandBuffer, vkCommandPool));
+    }
+
+    void CommandScheduler::SubmitCommandBuffer(const vk::raii::CommandBuffer &commandBuffer, vk::Fence fence) {
+        std::lock_guard lock(gpu.queueMutex);
+        gpu.vkQueue.submit(vk::SubmitInfo{
+            .commandBufferCount = 1,
+            .pCommandBuffers = &*commandBuffer,
+        }, fence);
+    }
+}
diff --git a/app/src/main/cpp/skyline/gpu/command_scheduler.h b/app/src/main/cpp/skyline/gpu/command_scheduler.h
new file mode 100644
index 00000000..055bb737
--- /dev/null
+++ b/app/src/main/cpp/skyline/gpu/command_scheduler.h
@@ -0,0 +1,97 @@
+// SPDX-License-Identifier: MPL-2.0
+// Copyright © 2021 Skyline Team and Contributors (https://github.com/skyline-emu/)
+
+#pragma once
+
+#include "fence_cycle.h"
+
+namespace skyline::gpu {
+    /**
+     * @brief The allocation and synchronized submission of command buffers to the host GPU is handled by this class
+     */
+    class CommandScheduler {
+      private:
+        /**
+         * @brief A wrapper around a command buffer which tracks its state to avoid concurrent usage
+         */
+        struct CommandBufferSlot {
+            std::atomic_flag active; //!< If the command buffer is currently being recorded to
+            const vk::raii::Device &device;
+            vk::raii::CommandBuffer commandBuffer;
+            vk::raii::Fence fence; //!< A fence used for tracking all submits of a buffer
+            std::shared_ptr<FenceCycle> cycle; //!< The latest cycle on the fence, all waits must be performed through this
+
+            CommandBufferSlot(vk::raii::Device &device, vk::CommandBuffer commandBuffer, vk::raii::CommandPool &pool);
+
+            /**
+             * @brief Attempts to allocate the buffer if it is free (Not being recorded/executing)
+             * @return If the allocation was successful or not
+             */
+            static bool AllocateIfFree(CommandBufferSlot &slot);
+        };
+
+        /**
+         * @brief An active command buffer occupies a slot and ensures that its status is updated correctly
+         */
+        class ActiveCommandBuffer {
+          private:
+            CommandBufferSlot &slot;
+
+          public:
+            constexpr ActiveCommandBuffer(CommandBufferSlot &slot) : slot(slot) {}
+
+            ~ActiveCommandBuffer() {
+                slot.active.clear();
+            }
+
+            vk::Fence GetFence() {
+                return *slot.fence;
+            }
+
+            std::shared_ptr<FenceCycle> GetFenceCycle() {
+                return slot.cycle;
+            }
+
+            vk::raii::CommandBuffer &operator*() {
+                return slot.commandBuffer;
+            }
+
+            vk::raii::CommandBuffer *operator->() {
+                return &slot.commandBuffer;
+            }
+        };
+
+        GPU &gpu;
+        std::mutex mutex; //!< Synchronizes mutations to the command pool due to allocations
+        vk::raii::CommandPool vkCommandPool;
+        std::list<CommandBufferSlot> commandBuffers;
+
+        /**
+         * @brief Allocates an existing or new primary command buffer from the pool
+         */
+        ActiveCommandBuffer AllocateCommandBuffer();
+
+        /**
+         * @brief Submits a single command buffer to the GPU queue with an optional fence
+         */
+        void SubmitCommandBuffer(const vk::raii::CommandBuffer &commandBuffer, vk::Fence fence = {});
+
+      public:
+        CommandScheduler(GPU &gpu);
+
+        /**
+         * @brief Submits a command buffer recorded with the supplied function synchronously
+         */
+        template<typename RecordFunction>
+        std::shared_ptr<FenceCycle> Submit(RecordFunction recordFunction) {
+            auto commandBuffer{AllocateCommandBuffer()};
+            commandBuffer->begin(vk::CommandBufferBeginInfo{
+                .flags = vk::CommandBufferUsageFlagBits::eOneTimeSubmit,
+            });
+            recordFunction(*commandBuffer);
+            commandBuffer->end();
+            SubmitCommandBuffer(*commandBuffer, commandBuffer.GetFence());
+            return commandBuffer.GetFenceCycle();
+        }
+    };
+}
diff --git a/app/src/main/cpp/skyline/gpu/fence_cycle.h b/app/src/main/cpp/skyline/gpu/fence_cycle.h
new file mode 100644
index 00000000..099d5e00
--- /dev/null
+++ b/app/src/main/cpp/skyline/gpu/fence_cycle.h
@@ -0,0 +1,149 @@
+// SPDX-License-Identifier: MPL-2.0
+// Copyright © 2021 Skyline Team and Contributors (https://github.com/skyline-emu/)
+
+#pragma once
+
+#include <forward_list>
+#include <vulkan/vulkan_raii.hpp>
+#include <common.h>
+
+namespace skyline::gpu {
+    struct FenceCycle;
+
+    /**
+     * @brief Any object whose lifetime can be attached to a fence cycle needs to inherit this class
+     */
+    struct FenceCycleDependency {
+      private:
+        std::shared_ptr<FenceCycleDependency> next{}; //!< A shared pointer to the next dependendency to form a linked list
+        friend FenceCycle;
+    };
+
+    /**
+     * @brief A wrapper around a Vulkan Fence which only tracks a single reset -> signal cycle with the ability to attach lifetimes of objects to it
+     * @note This provides the guarantee that the fence must be signalled prior to destruction when objects are to be destroyed
+     * @note All waits to the fence **must** be done through the same instance of this, the state of the fence changing externally will lead to UB
+     */
+    struct FenceCycle {
+      private:
+        std::atomic_flag signalled;
+        const vk::raii::Device &device;
+        vk::Fence fence;
+        std::shared_ptr<FenceCycleDependency> list;
+
+        /**
+         * @brief Sequentially iterate through the shared_ptr linked list of dependencies and reset all pointers in a thread-safe atomic manner
+         * @note We cannot simply nullify the base pointer of the list as a false dependency chain is maintained between the objects when retained exteranlly
+         */
+        void DestroyDependencies() {
+            auto current{std::atomic_exchange_explicit(&list, std::shared_ptr<FenceCycleDependency>{}, std::memory_order_acquire)};
+            while (current) {
+                std::shared_ptr<FenceCycleDependency> next{};
+                next.swap(current->next);
+                current.swap(next);
+            }
+        }
+
+      public:
+        FenceCycle(const vk::raii::Device &device, vk::Fence fence) : signalled(false), device(device), fence(fence) {
+            device.resetFences(fence);
+        }
+
+        ~FenceCycle() {
+            Wait();
+        }
+
+        /**
+         * @brief Wait on a fence cycle till it has been signalled
+         */
+        void Wait() {
+            if (signalled.test(std::memory_order_consume))
+                return;
+            while (device.waitForFences(fence, false, std::numeric_limits<u64>::max()) != vk::Result::eSuccess);
+            if (signalled.test_and_set(std::memory_order_release))
+                DestroyDependencies();
+        }
+
+        /**
+         * @brief Wait on a fence cycle with a timeout in nanoseconds
+         * @return If the wait was successful or timed out
+         */
+        bool Wait(std::chrono::duration<u64, std::nano> timeout) {
+            if (signalled.test(std::memory_order_consume))
+                return true;
+            if (device.waitForFences(fence, false, timeout.count()) == vk::Result::eSuccess) {
+                if (signalled.test_and_set(std::memory_order_release))
+                    DestroyDependencies();
+                return true;
+            } else {
+                return false;
+            }
+        }
+
+        /**
+         * @return If the fence is signalled currently or not
+         */
+        bool Poll() {
+            if (signalled.test(std::memory_order_consume))
+                return true;
+            if ((*device).getFenceStatus(fence, *device.getDispatcher()) == vk::Result::eSuccess) {
+                if (signalled.test_and_set(std::memory_order_release))
+                    DestroyDependencies();
+                return true;
+            } else {
+                return false;
+            }
+        }
+
+        /**
+         * @brief Attach the lifetime of an object to the fence being signalled
+         */
+        void AttachObject(const std::shared_ptr<FenceCycleDependency> &dependency) {
+            if (!signalled.test(std::memory_order_consume)) {
+                std::shared_ptr<FenceCycleDependency> next{std::atomic_load_explicit(&list, std::memory_order_consume)};
+                do {
+                    dependency->next = next;
+                    if (!next && signalled.test(std::memory_order_consume))
+                        return;
+                } while (std::atomic_compare_exchange_strong_explicit(&list, &next, dependency, std::memory_order_release, std::memory_order_consume));
+            }
+        }
+
+        /**
+         * @brief A version of AttachObject optimized for several objects being attached at once
+         */
+        void AttachObjects(std::initializer_list<std::shared_ptr<FenceCycleDependency>> dependencies) {
+            if (!signalled.test(std::memory_order_consume)) {
+                {
+                    auto it{dependencies.begin()};
+                    while (it != dependencies.end()) {
+                        auto next{std::next(it)};
+                        (*it)->next = *next;
+                        it = next;
+                    }
+                }
+
+                const auto& first{*dependencies.begin()};
+                const auto& last{*dependencies.end()};
+                std::shared_ptr<FenceCycleDependency> next{std::atomic_load_explicit(&list, std::memory_order_consume)};
+                do {
+                    last->next = next;
+                    if (!next && signalled.test(std::memory_order_consume)) {
+                        std::shared_ptr<FenceCycleDependency> current{first};
+                        while (current) {
+                            next.swap(first->next);
+                            current.swap(next);
+                            next.reset();
+                        }
+                        return;
+                    }
+                } while (std::atomic_compare_exchange_strong(&list, &next, first));
+            }
+        }
+
+        template<typename... Dependencies>
+        void AttachObjects(Dependencies... dependencies) {
+            AttachObjects(std::initializer_list<std::shared_ptr<FenceCycleDependency>>{std::forward<Dependencies>(dependencies)...});
+        }
+    };
+}
diff --git a/app/src/main/cpp/skyline/gpu/memory_manager.cpp b/app/src/main/cpp/skyline/gpu/memory_manager.cpp
new file mode 100644
index 00000000..a247464c
--- /dev/null
+++ b/app/src/main/cpp/skyline/gpu/memory_manager.cpp
@@ -0,0 +1,78 @@
+// SPDX-License-Identifier: MPL-2.0
+// Copyright © 2021 Skyline Team and Contributors (https://github.com/skyline-emu/)
+
+#include <gpu.h>
+#include "memory_manager.h"
+
+namespace skyline::gpu::memory {
+    StagingBuffer::~StagingBuffer() {
+        vmaDestroyBuffer(vmaAllocator, vkBuffer, vmaAllocation);
+    }
+
+    void MemoryManager::ThrowOnFail(VkResult result, const char *function) {
+        if (result != VK_SUCCESS)
+            vk::throwResultException(vk::Result(result), function);
+    }
+
+    MemoryManager::MemoryManager(const GPU &pGpu) : gpu(pGpu) {
+        auto dispatcher{gpu.vkDevice.getDispatcher()};
+        VmaVulkanFunctions vulkanFunctions{
+            .vkGetPhysicalDeviceProperties = dispatcher->vkGetPhysicalDeviceProperties,
+            .vkGetPhysicalDeviceMemoryProperties = dispatcher->vkGetPhysicalDeviceMemoryProperties,
+            .vkAllocateMemory = dispatcher->vkAllocateMemory,
+            .vkFreeMemory = dispatcher->vkFreeMemory,
+            .vkMapMemory = dispatcher->vkMapMemory,
+            .vkUnmapMemory = dispatcher->vkUnmapMemory,
+            .vkFlushMappedMemoryRanges = dispatcher->vkFlushMappedMemoryRanges,
+            .vkInvalidateMappedMemoryRanges = dispatcher->vkInvalidateMappedMemoryRanges,
+            .vkBindBufferMemory = dispatcher->vkBindBufferMemory,
+            .vkBindImageMemory = dispatcher->vkBindImageMemory,
+            .vkGetBufferMemoryRequirements = dispatcher->vkGetBufferMemoryRequirements,
+            .vkGetImageMemoryRequirements = dispatcher->vkGetImageMemoryRequirements,
+            .vkCreateBuffer = dispatcher->vkCreateBuffer,
+            .vkDestroyBuffer = dispatcher->vkDestroyBuffer,
+            .vkCreateImage = dispatcher->vkCreateImage,
+            .vkDestroyImage = dispatcher->vkDestroyImage,
+            .vkCmdCopyBuffer = dispatcher->vkCmdCopyBuffer,
+            .vkGetBufferMemoryRequirements2KHR = dispatcher->vkGetBufferMemoryRequirements2,
+            .vkGetImageMemoryRequirements2KHR = dispatcher->vkGetImageMemoryRequirements2,
+            .vkBindBufferMemory2KHR = dispatcher->vkBindBufferMemory2,
+            .vkBindImageMemory2KHR = dispatcher->vkBindImageMemory2,
+            .vkGetPhysicalDeviceMemoryProperties2KHR = dispatcher->vkGetPhysicalDeviceMemoryProperties2,
+        };
+        VmaAllocatorCreateInfo allocatorCreateInfo{
+            .physicalDevice = *gpu.vkPhysicalDevice,
+            .device = *gpu.vkDevice,
+            .instance = *gpu.vkInstance,
+            .pVulkanFunctions = &vulkanFunctions,
+            .vulkanApiVersion = GPU::VkApiVersion,
+        };
+        ThrowOnFail(vmaCreateAllocator(&allocatorCreateInfo, &vmaAllocator));
+        // TODO: Use VK_KHR_dedicated_allocation when available (Should be on Adreno GPUs)
+    }
+
+    MemoryManager::~MemoryManager() {
+        vmaDestroyAllocator(vmaAllocator);
+    }
+
+    std::shared_ptr<StagingBuffer> MemoryManager::AllocateStagingBuffer(vk::DeviceSize size) {
+        vk::BufferCreateInfo bufferCreateInfo{
+            .size = size,
+            .usage = vk::BufferUsageFlagBits::eTransferSrc,
+            .sharingMode = vk::SharingMode::eExclusive,
+            .queueFamilyIndexCount = 1,
+            .pQueueFamilyIndices = &gpu.vkQueueFamilyIndex,
+        };
+        VmaAllocationCreateInfo allocationCreateInfo{
+            .flags = VMA_ALLOCATION_CREATE_MAPPED_BIT,
+            .usage = VMA_MEMORY_USAGE_CPU_ONLY,
+        };
+
+        VkBuffer buffer;
+        VmaAllocation allocation;
+        VmaAllocationInfo allocationInfo;
+        ThrowOnFail(vmaCreateBuffer(vmaAllocator, &static_cast<const VkBufferCreateInfo &>(bufferCreateInfo), &allocationCreateInfo, &buffer, &allocation, &allocationInfo));
+
+        return std::make_shared<memory::StagingBuffer>(reinterpret_cast<u8 *>(allocationInfo.pMappedData), allocationInfo.size, vmaAllocator, buffer, allocation);
+    }
+}
diff --git a/app/src/main/cpp/skyline/gpu/memory_manager.h b/app/src/main/cpp/skyline/gpu/memory_manager.h
new file mode 100644
index 00000000..7e60dd17
--- /dev/null
+++ b/app/src/main/cpp/skyline/gpu/memory_manager.h
@@ -0,0 +1,47 @@
+// SPDX-License-Identifier: MPL-2.0
+// Copyright © 2021 Skyline Team and Contributors (https://github.com/skyline-emu/)
+
+#pragma once
+
+#include <vk_mem_alloc.h>
+#include "fence_cycle.h"
+
+namespace skyline::gpu::memory {
+    /**
+     * @brief A view into a CPU mapping of a Vulkan buffer
+     * @note The mapping **should not** be used after the lifetime of the object has ended
+     */
+    struct StagingBuffer : public span<u8>, FenceCycleDependency {
+        VmaAllocator vmaAllocator;
+        VmaAllocation vmaAllocation;
+        vk::Buffer vkBuffer;
+
+        constexpr StagingBuffer(u8 *pointer, size_t size, VmaAllocator vmaAllocator, vk::Buffer vkBuffer, VmaAllocation vmaAllocation) : vmaAllocator(vmaAllocator), vkBuffer(vkBuffer), vmaAllocation(vmaAllocation), span(pointer, size) {}
+
+        ~StagingBuffer();
+    };
+
+    /**
+     * @brief An abstraction over memory operations done in Vulkan, it's used for all allocations on the host GPU
+     */
+    class MemoryManager {
+      private:
+        const GPU &gpu;
+        VmaAllocator vmaAllocator{VK_NULL_HANDLE};
+
+        /**
+         * @brief If the result isn't VK_SUCCESS then an exception is thrown
+         */
+        static void ThrowOnFail(VkResult result, const char *function = __builtin_FUNCTION());
+
+      public:
+        MemoryManager(const GPU &gpu);
+
+        ~MemoryManager();
+
+        /**
+         * @brief Creates a buffer which is optimized for staging (Transfer Source)
+         */
+        std::shared_ptr<StagingBuffer> AllocateStagingBuffer(vk::DeviceSize size);
+    };
+}