Implement Fence Cycle, Memory Manager and Command Scheduler

Implements a wrapper over fences to track a single cycle of activation, implement a Vulkan memory manager that wraps the Vulkan-Memory-Allocator library and a command scheduler for scheduling Vulkan command buffers
2024-11-22 13:59:18 +01:00 · 2021-05-22 22:01:22 +05:30 · 2021-05-22 22:01:22 +05:30 · b2132fd7aa
commit b2132fd7aa
parent d8025e7178
12 changed files with 470 additions and 19 deletions
--- a/.gitmodules
+++ b/.gitmodules
@ -25,3 +25,6 @@
 [submodule "app/libraries/vkhpp"]
 	path = app/libraries/vkhpp
 	url = https://github.com/KhronosGroup/Vulkan-Hpp
 [submodule "app/libraries/vkma"]
 	path = app/libraries/vkma
 	url = https://github.com/GPUOpen-LibrariesAndSDKs/VulkanMemoryAllocator
--- a/.idea/vcs.xml
+++ b/.idea/vcs.xml
@ -15,5 +15,6 @@
    <mapping directory="$PROJECT_DIR$/app/libraries/vkhpp/glm" vcs="Git" />
    <mapping directory="$PROJECT_DIR$/app/libraries/vkhpp/glslang" vcs="Git" />
    <mapping directory="$PROJECT_DIR$/app/libraries/vkhpp/tinyxml2" vcs="Git" />
    <mapping directory="$PROJECT_DIR$/app/libraries/vkma" vcs="Git" />
  </component>
 </project>
--- a/app/CMakeLists.txt
+++ b/app/CMakeLists.txt
@ -16,14 +16,18 @@ if (uppercase_CMAKE_BUILD_TYPE STREQUAL "RELEASE")
    set(CMAKE_INTERPROCEDURAL_OPTIMIZATION TRUE)
 endif ()
 # {fmt}
 add_subdirectory("libraries/fmt")
 # TzCode
 add_subdirectory("libraries/tzcode")
 target_compile_options(tzcode PRIVATE -Wno-everything)
 # Oboe
 add_subdirectory("libraries/oboe")
 include_directories("libraries/oboe/include")
 # LZ4
 set(LZ4_BUILD_CLI OFF CACHE BOOL "Build LZ4 CLI" FORCE)
 set(LZ4_BUILD_LEGACY_LZ4C OFF CACHE BOOL "Build lz4c progam with legacy argument support" FORCE)
 add_subdirectory("libraries/lz4/build/cmake")
@ -37,9 +41,18 @@ add_compile_definitions(VULKAN_HPP_DISPATCH_LOADER_DYNAMIC=1) # We use the dynam
 include_directories("libraries/vkhpp")
 include_directories("libraries/vkhpp/Vulkan-Headers/include") # We use base Vulkan headers from this to ensure version parity with Vulkan-Hpp
-include_directories("libraries/pugixml/src") # We use PugiXML in header-only mode
+# Vulkan Memory Allocator
 include_directories("libraries/vkma/include")
 add_library(vkma STATIC libraries/vkma.cpp)
 target_compile_options(vkma PRIVATE -Wno-everything)
 # PugiXML (Header-only mode)
 include_directories("libraries/pugixml/src")
 # Frozen
 include_directories("libraries/frozen/include")
 # MbedTLS
 find_package(mbedtls REQUIRED CONFIG)
 # Perfetto SDK
@ -75,6 +88,8 @@ add_library(skyline SHARED
        ${source_DIR}/skyline/audio/resampler.cpp
        ${source_DIR}/skyline/audio/adpcm_decoder.cpp
        ${source_DIR}/skyline/gpu.cpp
        ${source_DIR}/skyline/gpu/memory_manager.cpp
        ${source_DIR}/skyline/gpu/command_scheduler.cpp
        ${source_DIR}/skyline/gpu/presentation_engine.cpp
        ${source_DIR}/skyline/gpu/texture.cpp
        ${source_DIR}/skyline/soc/gmmu.cpp
@ -198,5 +213,5 @@ add_library(skyline SHARED
        ${source_DIR}/skyline/services/mmnv/IRequest.cpp
        )
 # target_precompile_headers(skyline PRIVATE ${source_DIR}/skyline/common.h) # PCH will currently break Intellisense
-target_link_libraries(skyline android perfetto fmt lz4_static tzcode oboe mbedtls::mbedcrypto)
+target_link_libraries(skyline android perfetto fmt lz4_static tzcode oboe vkma mbedtls::mbedcrypto)
 target_compile_options(skyline PRIVATE -Wall -Wno-unknown-attributes -Wno-c++20-extensions -Wno-c++17-extensions -Wno-c99-designator -Wno-reorder -Wno-missing-braces -Wno-unused-variable -Wno-unused-private-field)
--- a/app/libraries/vkma
+++ b/app/libraries/vkma
@ -0,0 +1 @@
 Subproject commit 6889faaaa284e7ec514e75ea74b113539021d1ad
--- a/app/libraries/vkma.cpp
+++ b/app/libraries/vkma.cpp
@ -0,0 +1,7 @@
 // SPDX-License-Identifier: MPL-2.0
 // Copyright © 2021 Skyline Team and Contributors (https://github.com/skyline-emu/)
 #define VMA_IMPLEMENTATION
 #define VMA_STATIC_VULKAN_FUNCTIONS 0
 #define VMA_DYNAMIC_VULKAN_FUNCTIONS 0
 #include <vk_mem_alloc.h>
--- a/app/src/main/cpp/skyline/gpu.cpp
+++ b/app/src/main/cpp/skyline/gpu.cpp
@ -10,10 +10,10 @@ namespace skyline::gpu {
            .pApplicationName = "Skyline",
            .applicationVersion = state.jvm->GetVersionCode(), // Get the application version from JNI
            .pEngineName = "FTX1", // "Fast Tegra X1"
-            .apiVersion = VK_API_VERSION_1_1,
+            .apiVersion = VkApiVersion,
        };
-        #ifdef NDEBUG
+        #ifndef NDEBUG
        constexpr std::array<const char *, 0> requiredLayers{};
        #else
        constexpr std::array<const char *, 1> requiredLayers{
@ -39,18 +39,11 @@ namespace skyline::gpu {
                throw exception("Cannot find Vulkan layer: \"{}\"", requiredLayer);
        }
-        #ifdef NDEBUG
+        constexpr std::array<const char*, 3> requiredInstanceExtensions{
        constexpr std::array<const char*, 2> requiredInstanceExtensions{
            VK_KHR_SURFACE_EXTENSION_NAME,
            VK_KHR_ANDROID_SURFACE_EXTENSION_NAME,
        };
        #else
        constexpr std::array<const char *, 3> requiredInstanceExtensions{
            VK_EXT_DEBUG_REPORT_EXTENSION_NAME,
            VK_KHR_SURFACE_EXTENSION_NAME,
            VK_KHR_ANDROID_SURFACE_EXTENSION_NAME,
        };
        #endif
        auto instanceExtensions{context.enumerateInstanceExtensionProperties()};
        if (state.logger->configLevel >= Logger::LogLevel::Debug) {
@ -161,5 +154,5 @@ namespace skyline::gpu {
        });
    }
-    GPU::GPU(const DeviceState &state) : vkInstance(CreateInstance(state, vkContext)), vkDebugReportCallback(CreateDebugReportCallback(state, vkInstance)), vkPhysicalDevice(CreatePhysicalDevice(state, vkInstance)), vkDevice(CreateDevice(state, vkPhysicalDevice, vkQueueFamilyIndex)), vkQueue(vkDevice, vkQueueFamilyIndex, 0), presentation(state, *this) {}
+    GPU::GPU(const DeviceState &state) : vkInstance(CreateInstance(state, vkContext)), vkDebugReportCallback(CreateDebugReportCallback(state, vkInstance)), vkPhysicalDevice(CreatePhysicalDevice(state, vkInstance)), vkDevice(CreateDevice(state, vkPhysicalDevice, vkQueueFamilyIndex)), vkQueue(vkDevice, vkQueueFamilyIndex, 0), memory(*this), scheduler(*this), presentation(state, *this) {}
 }
--- a/app/src/main/cpp/skyline/gpu.h
+++ b/app/src/main/cpp/skyline/gpu.h
@ -3,6 +3,8 @@
 #pragma once
 #include "gpu/memory_manager.h"
 #include "gpu/command_scheduler.h"
 #include "gpu/presentation_engine.h"
 namespace skyline::gpu {
@ -22,14 +24,19 @@ namespace skyline::gpu {
        static vk::raii::Device CreateDevice(const DeviceState &state, const vk::raii::PhysicalDevice &physicalDevice, typeof(vk::DeviceQueueCreateInfo::queueCount)& queueConfiguration);
      public:
-        vk::raii::Context vkContext; //!< An overarching context for Vulkan with
+        static constexpr u32 VkApiVersion{VK_API_VERSION_1_1}; //!< The version of core Vulkan that we require
-        vk::raii::Instance vkInstance; //!< An instance of Vulkan with all application context
+
-        vk::raii::DebugReportCallbackEXT vkDebugReportCallback; //!< An RAII Vulkan debug report manager which calls into DebugCallback
+        vk::raii::Context vkContext;
-        vk::raii::PhysicalDevice vkPhysicalDevice; //!< The underlying physical Vulkan device from which we derieve our logical device
+        vk::raii::Instance vkInstance;
-        typeof(vk::DeviceQueueCreateInfo::queueCount) vkQueueFamilyIndex{}; //!< The index of the family the queue is from
+        vk::raii::DebugReportCallbackEXT vkDebugReportCallback; //!< An RAII Vulkan debug report manager which calls into 'GPU::DebugCallback'
-        vk::raii::Device vkDevice; //!< The logical Vulkan device which we want to render using
+        vk::raii::PhysicalDevice vkPhysicalDevice;
        typeof(vk::DeviceQueueCreateInfo::queueCount) vkQueueFamilyIndex{};
        vk::raii::Device vkDevice;
        std::mutex queueMutex; //!< Synchronizes access to the queue as it is externally synchronized
        vk::raii::Queue vkQueue; //!< A Vulkan Queue supporting graphics and compute operations
        memory::MemoryManager memory;
        CommandScheduler scheduler;
        PresentationEngine presentation;
        GPU(const DeviceState &state);
--- a/app/src/main/cpp/skyline/gpu/command_scheduler.cpp
+++ b/app/src/main/cpp/skyline/gpu/command_scheduler.cpp
@ -0,0 +1,53 @@
 // SPDX-License-Identifier: MPL-2.0
 // Copyright © 2021 Skyline Team and Contributors (https://github.com/skyline-emu/)
 #include <gpu.h>
 #include "command_scheduler.h"
 namespace skyline::gpu {
    CommandScheduler::CommandBufferSlot::CommandBufferSlot(vk::raii::Device &device, vk::CommandBuffer commandBuffer, vk::raii::CommandPool &pool) : active(true), device(device), commandBuffer(device, commandBuffer, pool), fence(device, vk::FenceCreateInfo{}), cycle(std::make_shared<FenceCycle>(device, *fence)) {}
    bool CommandScheduler::CommandBufferSlot::AllocateIfFree(CommandScheduler::CommandBufferSlot &slot) {
        if (slot.active.test_and_set(std::memory_order_acq_rel)) {
            if (slot.cycle->Poll()) {
                slot.cycle = std::make_shared<FenceCycle>(slot.device, *slot.fence);
                return true;
            } else {
                slot.active.clear(std::memory_order_release);
            }
        }
        return false;
    }
    CommandScheduler::CommandScheduler(GPU &pGpu) : gpu(pGpu), vkCommandPool(pGpu.vkDevice, vk::CommandPoolCreateInfo{
        .flags = vk::CommandPoolCreateFlagBits::eTransient | vk::CommandPoolCreateFlagBits::eResetCommandBuffer,
        .queueFamilyIndex = pGpu.vkQueueFamilyIndex,
    }) {}
    CommandScheduler::ActiveCommandBuffer CommandScheduler::AllocateCommandBuffer() {
        auto slot{std::find_if(commandBuffers.begin(), commandBuffers.end(), CommandBufferSlot::AllocateIfFree)};
        if (slot != commandBuffers.end())
            return ActiveCommandBuffer(*slot);
        std::scoped_lock lock(mutex);
        vk::CommandBuffer commandBuffer;
        vk::CommandBufferAllocateInfo commandBufferAllocateInfo{
            .commandPool = *vkCommandPool,
            .level = vk::CommandBufferLevel::ePrimary,
            .commandBufferCount = 1,
        };
        auto result{(*gpu.vkDevice).allocateCommandBuffers(&commandBufferAllocateInfo, &commandBuffer, *gpu.vkDevice.getDispatcher())};
        if (result != vk::Result::eSuccess)
            vk::throwResultException(result, __builtin_FUNCTION());
        return ActiveCommandBuffer(commandBuffers.emplace_back(gpu.vkDevice, commandBuffer, vkCommandPool));
    }
    void CommandScheduler::SubmitCommandBuffer(const vk::raii::CommandBuffer &commandBuffer, vk::Fence fence) {
        std::lock_guard lock(gpu.queueMutex);
        gpu.vkQueue.submit(vk::SubmitInfo{
            .commandBufferCount = 1,
            .pCommandBuffers = &*commandBuffer,
        }, fence);
    }
 }
--- a/app/src/main/cpp/skyline/gpu/command_scheduler.h
+++ b/app/src/main/cpp/skyline/gpu/command_scheduler.h
@ -0,0 +1,97 @@
 // SPDX-License-Identifier: MPL-2.0
 // Copyright © 2021 Skyline Team and Contributors (https://github.com/skyline-emu/)
 #pragma once
 #include "fence_cycle.h"
 namespace skyline::gpu {
    /**
     * @brief The allocation and synchronized submission of command buffers to the host GPU is handled by this class
     */
    class CommandScheduler {
      private:
        /**
         * @brief A wrapper around a command buffer which tracks its state to avoid concurrent usage
         */
        struct CommandBufferSlot {
            std::atomic_flag active; //!< If the command buffer is currently being recorded to
            const vk::raii::Device &device;
            vk::raii::CommandBuffer commandBuffer;
            vk::raii::Fence fence; //!< A fence used for tracking all submits of a buffer
            std::shared_ptr<FenceCycle> cycle; //!< The latest cycle on the fence, all waits must be performed through this
            CommandBufferSlot(vk::raii::Device &device, vk::CommandBuffer commandBuffer, vk::raii::CommandPool &pool);
            /**
             * @brief Attempts to allocate the buffer if it is free (Not being recorded/executing)
             * @return If the allocation was successful or not
             */
            static bool AllocateIfFree(CommandBufferSlot &slot);
        };
        /**
         * @brief An active command buffer occupies a slot and ensures that its status is updated correctly
         */
        class ActiveCommandBuffer {
          private:
            CommandBufferSlot &slot;
          public:
            constexpr ActiveCommandBuffer(CommandBufferSlot &slot) : slot(slot) {}
            ~ActiveCommandBuffer() {
                slot.active.clear();
            }
            vk::Fence GetFence() {
                return *slot.fence;
            }
            std::shared_ptr<FenceCycle> GetFenceCycle() {
                return slot.cycle;
            }
            vk::raii::CommandBuffer &operator*() {
                return slot.commandBuffer;
            }
            vk::raii::CommandBuffer *operator->() {
                return &slot.commandBuffer;
            }
        };
        GPU &gpu;
        std::mutex mutex; //!< Synchronizes mutations to the command pool due to allocations
        vk::raii::CommandPool vkCommandPool;
        std::list<CommandBufferSlot> commandBuffers;
        /**
         * @brief Allocates an existing or new primary command buffer from the pool
         */
        ActiveCommandBuffer AllocateCommandBuffer();
        /**
         * @brief Submits a single command buffer to the GPU queue with an optional fence
         */
        void SubmitCommandBuffer(const vk::raii::CommandBuffer &commandBuffer, vk::Fence fence = {});
      public:
        CommandScheduler(GPU &gpu);
        /**
         * @brief Submits a command buffer recorded with the supplied function synchronously
         */
        template<typename RecordFunction>
        std::shared_ptr<FenceCycle> Submit(RecordFunction recordFunction) {
            auto commandBuffer{AllocateCommandBuffer()};
            commandBuffer->begin(vk::CommandBufferBeginInfo{
                .flags = vk::CommandBufferUsageFlagBits::eOneTimeSubmit,
            });
            recordFunction(*commandBuffer);
            commandBuffer->end();
            SubmitCommandBuffer(*commandBuffer, commandBuffer.GetFence());
            return commandBuffer.GetFenceCycle();
        }
    };
 }
--- a/app/src/main/cpp/skyline/gpu/fence_cycle.h
+++ b/app/src/main/cpp/skyline/gpu/fence_cycle.h
@ -0,0 +1,149 @@
 // SPDX-License-Identifier: MPL-2.0
 // Copyright © 2021 Skyline Team and Contributors (https://github.com/skyline-emu/)
 #pragma once
 #include <forward_list>
 #include <vulkan/vulkan_raii.hpp>
 #include <common.h>
 namespace skyline::gpu {
    struct FenceCycle;
    /**
     * @brief Any object whose lifetime can be attached to a fence cycle needs to inherit this class
     */
    struct FenceCycleDependency {
      private:
        std::shared_ptr<FenceCycleDependency> next{}; //!< A shared pointer to the next dependendency to form a linked list
        friend FenceCycle;
    };
    /**
     * @brief A wrapper around a Vulkan Fence which only tracks a single reset -> signal cycle with the ability to attach lifetimes of objects to it
     * @note This provides the guarantee that the fence must be signalled prior to destruction when objects are to be destroyed
     * @note All waits to the fence **must** be done through the same instance of this, the state of the fence changing externally will lead to UB
     */
    struct FenceCycle {
      private:
        std::atomic_flag signalled;
        const vk::raii::Device &device;
        vk::Fence fence;
        std::shared_ptr<FenceCycleDependency> list;
        /**
         * @brief Sequentially iterate through the shared_ptr linked list of dependencies and reset all pointers in a thread-safe atomic manner
         * @note We cannot simply nullify the base pointer of the list as a false dependency chain is maintained between the objects when retained exteranlly
         */
        void DestroyDependencies() {
            auto current{std::atomic_exchange_explicit(&list, std::shared_ptr<FenceCycleDependency>{}, std::memory_order_acquire)};
            while (current) {
                std::shared_ptr<FenceCycleDependency> next{};
                next.swap(current->next);
                current.swap(next);
            }
        }
      public:
        FenceCycle(const vk::raii::Device &device, vk::Fence fence) : signalled(false), device(device), fence(fence) {
            device.resetFences(fence);
        }
        ~FenceCycle() {
            Wait();
        }
        /**
         * @brief Wait on a fence cycle till it has been signalled
         */
        void Wait() {
            if (signalled.test(std::memory_order_consume))
                return;
            while (device.waitForFences(fence, false, std::numeric_limits<u64>::max()) != vk::Result::eSuccess);
            if (signalled.test_and_set(std::memory_order_release))
                DestroyDependencies();
        }
        /**
         * @brief Wait on a fence cycle with a timeout in nanoseconds
         * @return If the wait was successful or timed out
         */
        bool Wait(std::chrono::duration<u64, std::nano> timeout) {
            if (signalled.test(std::memory_order_consume))
                return true;
            if (device.waitForFences(fence, false, timeout.count()) == vk::Result::eSuccess) {
                if (signalled.test_and_set(std::memory_order_release))
                    DestroyDependencies();
                return true;
            } else {
                return false;
            }
        }
        /**
         * @return If the fence is signalled currently or not
         */
        bool Poll() {
            if (signalled.test(std::memory_order_consume))
                return true;
            if ((*device).getFenceStatus(fence, *device.getDispatcher()) == vk::Result::eSuccess) {
                if (signalled.test_and_set(std::memory_order_release))
                    DestroyDependencies();
                return true;
            } else {
                return false;
            }
        }
        /**
         * @brief Attach the lifetime of an object to the fence being signalled
         */
        void AttachObject(const std::shared_ptr<FenceCycleDependency> &dependency) {
            if (!signalled.test(std::memory_order_consume)) {
                std::shared_ptr<FenceCycleDependency> next{std::atomic_load_explicit(&list, std::memory_order_consume)};
                do {
                    dependency->next = next;
                    if (!next && signalled.test(std::memory_order_consume))
                        return;
                } while (std::atomic_compare_exchange_strong_explicit(&list, &next, dependency, std::memory_order_release, std::memory_order_consume));
            }
        }
        /**
         * @brief A version of AttachObject optimized for several objects being attached at once
         */
        void AttachObjects(std::initializer_list<std::shared_ptr<FenceCycleDependency>> dependencies) {
            if (!signalled.test(std::memory_order_consume)) {
                {
                    auto it{dependencies.begin()};
                    while (it != dependencies.end()) {
                        auto next{std::next(it)};
                        (*it)->next = *next;
                        it = next;
                    }
                }
                const auto& first{*dependencies.begin()};
                const auto& last{*dependencies.end()};
                std::shared_ptr<FenceCycleDependency> next{std::atomic_load_explicit(&list, std::memory_order_consume)};
                do {
                    last->next = next;
                    if (!next && signalled.test(std::memory_order_consume)) {
                        std::shared_ptr<FenceCycleDependency> current{first};
                        while (current) {
                            next.swap(first->next);
                            current.swap(next);
                            next.reset();
                        }
                        return;
                    }
                } while (std::atomic_compare_exchange_strong(&list, &next, first));
            }
        }
        template<typename... Dependencies>
        void AttachObjects(Dependencies... dependencies) {
            AttachObjects(std::initializer_list<std::shared_ptr<FenceCycleDependency>>{std::forward<Dependencies>(dependencies)...});
        }
    };
 }
--- a/app/src/main/cpp/skyline/gpu/memory_manager.cpp
+++ b/app/src/main/cpp/skyline/gpu/memory_manager.cpp
@ -0,0 +1,78 @@
 // SPDX-License-Identifier: MPL-2.0
 // Copyright © 2021 Skyline Team and Contributors (https://github.com/skyline-emu/)
 #include <gpu.h>
 #include "memory_manager.h"
 namespace skyline::gpu::memory {
    StagingBuffer::~StagingBuffer() {
        vmaDestroyBuffer(vmaAllocator, vkBuffer, vmaAllocation);
    }
    void MemoryManager::ThrowOnFail(VkResult result, const char *function) {
        if (result != VK_SUCCESS)
            vk::throwResultException(vk::Result(result), function);
    }
    MemoryManager::MemoryManager(const GPU &pGpu) : gpu(pGpu) {
        auto dispatcher{gpu.vkDevice.getDispatcher()};
        VmaVulkanFunctions vulkanFunctions{
            .vkGetPhysicalDeviceProperties = dispatcher->vkGetPhysicalDeviceProperties,
            .vkGetPhysicalDeviceMemoryProperties = dispatcher->vkGetPhysicalDeviceMemoryProperties,
            .vkAllocateMemory = dispatcher->vkAllocateMemory,
            .vkFreeMemory = dispatcher->vkFreeMemory,
            .vkMapMemory = dispatcher->vkMapMemory,
            .vkUnmapMemory = dispatcher->vkUnmapMemory,
            .vkFlushMappedMemoryRanges = dispatcher->vkFlushMappedMemoryRanges,
            .vkInvalidateMappedMemoryRanges = dispatcher->vkInvalidateMappedMemoryRanges,
            .vkBindBufferMemory = dispatcher->vkBindBufferMemory,
            .vkBindImageMemory = dispatcher->vkBindImageMemory,
            .vkGetBufferMemoryRequirements = dispatcher->vkGetBufferMemoryRequirements,
            .vkGetImageMemoryRequirements = dispatcher->vkGetImageMemoryRequirements,
            .vkCreateBuffer = dispatcher->vkCreateBuffer,
            .vkDestroyBuffer = dispatcher->vkDestroyBuffer,
            .vkCreateImage = dispatcher->vkCreateImage,
            .vkDestroyImage = dispatcher->vkDestroyImage,
            .vkCmdCopyBuffer = dispatcher->vkCmdCopyBuffer,
            .vkGetBufferMemoryRequirements2KHR = dispatcher->vkGetBufferMemoryRequirements2,
            .vkGetImageMemoryRequirements2KHR = dispatcher->vkGetImageMemoryRequirements2,
            .vkBindBufferMemory2KHR = dispatcher->vkBindBufferMemory2,
            .vkBindImageMemory2KHR = dispatcher->vkBindImageMemory2,
            .vkGetPhysicalDeviceMemoryProperties2KHR = dispatcher->vkGetPhysicalDeviceMemoryProperties2,
        };
        VmaAllocatorCreateInfo allocatorCreateInfo{
            .physicalDevice = *gpu.vkPhysicalDevice,
            .device = *gpu.vkDevice,
            .instance = *gpu.vkInstance,
            .pVulkanFunctions = &vulkanFunctions,
            .vulkanApiVersion = GPU::VkApiVersion,
        };
        ThrowOnFail(vmaCreateAllocator(&allocatorCreateInfo, &vmaAllocator));
        // TODO: Use VK_KHR_dedicated_allocation when available (Should be on Adreno GPUs)
    }
    MemoryManager::~MemoryManager() {
        vmaDestroyAllocator(vmaAllocator);
    }
    std::shared_ptr<StagingBuffer> MemoryManager::AllocateStagingBuffer(vk::DeviceSize size) {
        vk::BufferCreateInfo bufferCreateInfo{
            .size = size,
            .usage = vk::BufferUsageFlagBits::eTransferSrc,
            .sharingMode = vk::SharingMode::eExclusive,
            .queueFamilyIndexCount = 1,
            .pQueueFamilyIndices = &gpu.vkQueueFamilyIndex,
        };
        VmaAllocationCreateInfo allocationCreateInfo{
            .flags = VMA_ALLOCATION_CREATE_MAPPED_BIT,
            .usage = VMA_MEMORY_USAGE_CPU_ONLY,
        };
        VkBuffer buffer;
        VmaAllocation allocation;
        VmaAllocationInfo allocationInfo;
        ThrowOnFail(vmaCreateBuffer(vmaAllocator, &static_cast<const VkBufferCreateInfo &>(bufferCreateInfo), &allocationCreateInfo, &buffer, &allocation, &allocationInfo));
        return std::make_shared<memory::StagingBuffer>(reinterpret_cast<u8 *>(allocationInfo.pMappedData), allocationInfo.size, vmaAllocator, buffer, allocation);
    }
 }
--- a/app/src/main/cpp/skyline/gpu/memory_manager.h
+++ b/app/src/main/cpp/skyline/gpu/memory_manager.h
@ -0,0 +1,47 @@
 // SPDX-License-Identifier: MPL-2.0
 // Copyright © 2021 Skyline Team and Contributors (https://github.com/skyline-emu/)
 #pragma once
 #include <vk_mem_alloc.h>
 #include "fence_cycle.h"
 namespace skyline::gpu::memory {
    /**
     * @brief A view into a CPU mapping of a Vulkan buffer
     * @note The mapping **should not** be used after the lifetime of the object has ended
     */
    struct StagingBuffer : public span<u8>, FenceCycleDependency {
        VmaAllocator vmaAllocator;
        VmaAllocation vmaAllocation;
        vk::Buffer vkBuffer;
        constexpr StagingBuffer(u8 *pointer, size_t size, VmaAllocator vmaAllocator, vk::Buffer vkBuffer, VmaAllocation vmaAllocation) : vmaAllocator(vmaAllocator), vkBuffer(vkBuffer), vmaAllocation(vmaAllocation), span(pointer, size) {}
        ~StagingBuffer();
    };
    /**
     * @brief An abstraction over memory operations done in Vulkan, it's used for all allocations on the host GPU
     */
    class MemoryManager {
      private:
        const GPU &gpu;
        VmaAllocator vmaAllocator{VK_NULL_HANDLE};
        /**
         * @brief If the result isn't VK_SUCCESS then an exception is thrown
         */
        static void ThrowOnFail(VkResult result, const char *function = __builtin_FUNCTION());
      public:
        MemoryManager(const GPU &gpu);
        ~MemoryManager();
        /**
         * @brief Creates a buffer which is optimized for staging (Transfer Source)
         */
        std::shared_ptr<StagingBuffer> AllocateStagingBuffer(vk::DeviceSize size);
    };
 }
		`@ -0,0 +1 @@`
							`Subproject commit 6889faaaa284e7ec514e75ea74b113539021d1ad`