Rework BufferManager, Buffer and BufferView

This commit encapsulates a complex sequence of cascading changes in the process of supporting overlaps for buffers: * We determined that it is impossible to resolve overlaps with multiple intervals per buffer within the constraints of each overlap being a contiguous view, support for multiple intervals was therefore dropped. The older buffer manager code was entirely reworked to be simpler due to only handling one interval per buffer with code now being based off `IntervalMap` but tailored specifically for buffers. * During overlap resolution, the problem of how existing views into the buffer being recreated would be updated, it had to be replaced with a larger buffer that could contain all overlaps and all existing views would need to be repointed to it. This was addressed by a buffer owning all views to itself, we could automatically recalculate the offset of all views and update the buffers with it. * We still needed to update usage of existing views which was done by handling all access (such as inside a recorded draw) to buffer view properties via `BufferView::RegisterUsage` which dispatches a callback with the view and the corresponding backing buffer. This callback can be stored and called during overlap resolution with the new buffer. * We had issues with lifetime of the buffer with the handle-like semantics of `BufferView` introduced in the last buffer-related commit, if we updated the view to be owned by a new buffer we'd need to extend the lifetime of the new buffer not the older one and the only way to do this was a proxy owner object `BufferDelegate` which holds a shared pointer to the real `Buffer` which in-turn holds a pointer to all `BufferDelegate` objects to update on repointing. A `BufferView` is effectively just a wrapper around `std::shared_ptr<BufferDelegate>` with more favorable semantics but generally just forwarding calls. It should be additionally noted that to support usage of `RegisterUsage` the code around buffers in `GraphicsContext` was refactored to defer truly binding till the recording phase.
2024-11-23 04:39:19 +01:00 · 2022-03-28 12:27:05 +05:30 · 2022-03-28 12:27:05 +05:30 · cb1ec9a7f4
commit cb1ec9a7f4
parent a6781b38f4
7 changed files with 363 additions and 304 deletions
--- a/app/src/main/cpp/skyline/gpu/buffer.cpp
+++ b/app/src/main/cpp/skyline/gpu/buffer.cpp
@ -8,45 +8,14 @@
 #include "buffer.h"

 namespace skyline::gpu {
-    vk::DeviceSize GuestBuffer::BufferSize() const {
-        vk::DeviceSize size{};
-        for (const auto &buffer : mappings)
-            size += buffer.size_bytes();
-        return size;
-    }
-
    void Buffer::SetupGuestMappings() {
-        auto &mappings{guest.mappings};
-        if (mappings.size() == 1) {
-            auto mapping{mappings.front()};
-            u8 *alignedData{util::AlignDown(mapping.data(), PAGE_SIZE)};
-            size_t alignedSize{static_cast<size_t>(util::AlignUp(mapping.data() + mapping.size(), PAGE_SIZE) - alignedData)};
+        u8 *alignedData{util::AlignDown(guest.data(), PAGE_SIZE)};
+        size_t alignedSize{static_cast<size_t>(util::AlignUp(guest.data() + guest.size(), PAGE_SIZE) - alignedData)};

-            alignedMirror = gpu.state.process->memory.CreateMirror(alignedData, alignedSize);
-            mirror = alignedMirror.subspan(static_cast<size_t>(mapping.data() - alignedData), mapping.size());
-        } else {
-            std::vector<span<u8>> alignedMappings;
+        alignedMirror = gpu.state.process->memory.CreateMirror(alignedData, alignedSize);
+        mirror = alignedMirror.subspan(static_cast<size_t>(guest.data() - alignedData), guest.size());

-            const auto &frontMapping{mappings.front()};
-            u8 *alignedData{util::AlignDown(frontMapping.data(), PAGE_SIZE)};
-            alignedMappings.emplace_back(alignedData, (frontMapping.data() + frontMapping.size()) - alignedData);
-
-            size_t totalSize{frontMapping.size()};
-            for (auto it{std::next(mappings.begin())}; it != std::prev(mappings.end()); ++it) {
-                auto mappingSize{it->size()};
-                alignedMappings.emplace_back(it->data(), mappingSize);
-                totalSize += mappingSize;
-            }
-
-            const auto &backMapping{mappings.back()};
-            totalSize += backMapping.size();
-            alignedMappings.emplace_back(backMapping.data(), util::AlignUp(backMapping.size(), PAGE_SIZE));
-
-            alignedMirror = gpu.state.process->memory.CreateMirrors(alignedMappings);
-            mirror = alignedMirror.subspan(static_cast<size_t>(frontMapping.data() - alignedData), totalSize);
-        }
-
-        trapHandle = gpu.state.nce->TrapRegions(mappings, true, [this] {
+        trapHandle = gpu.state.nce->TrapRegions(guest, true, [this] {
            std::lock_guard lock(*this);
            SynchronizeGuest(true); // We can skip trapping since the caller will do it
            WaitOnFence();
@ -58,7 +27,7 @@ namespace skyline::gpu {
        });
    }

-    Buffer::Buffer(GPU &gpu, GuestBuffer guest) : gpu(gpu), size(guest.BufferSize()), backing(gpu.memory.AllocateBuffer(size)), guest(std::move(guest)) {
+    Buffer::Buffer(GPU &gpu, GuestBuffer guest) : gpu(gpu), backing(gpu.memory.AllocateBuffer(guest.size())), guest(guest) {
        SetupGuestMappings();
    }

@ -111,7 +80,7 @@ namespace skyline::gpu {
        if (dirtyState != DirtyState::CpuDirty)
            return;

-        if (pCycle != cycle.lock())
+        if (!cycle.owner_before(pCycle))
            WaitOnFence();

        TRACE_EVENT("gpu", "Buffer::SynchronizeHostWithCycle");
@ -127,11 +96,12 @@ namespace skyline::gpu {
        }
    }

-    void Buffer::SynchronizeGuest(bool skipTrap) {
+    void Buffer::SynchronizeGuest(bool skipTrap, bool skipFence) {
        if (dirtyState != DirtyState::GpuDirty)
            return; // If the buffer has not been used on the GPU, there is no need to synchronize it

-        WaitOnFence();
+        if (!skipFence)
+            WaitOnFence();

        TRACE_EVENT("gpu", "Buffer::SynchronizeGuest");

@ -157,13 +127,20 @@ namespace skyline::gpu {
    };

    void Buffer::SynchronizeGuestWithCycle(const std::shared_ptr<FenceCycle> &pCycle) {
-        if (pCycle != cycle.lock())
+        if (!cycle.owner_before(pCycle))
            WaitOnFence();

        pCycle->AttachObject(std::make_shared<BufferGuestSync>(shared_from_this()));
        cycle = pCycle;
    }

+    void Buffer::Read(span<u8> data, vk::DeviceSize offset) {
+        if (dirtyState == DirtyState::CpuDirty || dirtyState == DirtyState::Clean)
+            std::memcpy(data.data(), mirror.data() + offset, data.size());
+        else if (dirtyState == DirtyState::GpuDirty)
+            std::memcpy(data.data(), backing.data() + offset, data.size());
+    }
+
    void Buffer::Write(span<u8> data, vk::DeviceSize offset) {
        if (dirtyState == DirtyState::CpuDirty || dirtyState == DirtyState::Clean)
            std::memcpy(mirror.data() + offset, data.data(), data.size());
@ -171,51 +148,89 @@ namespace skyline::gpu {
            std::memcpy(backing.data() + offset, data.data(), data.size());
    }

-    Buffer::BufferViewStorage::BufferViewStorage(vk::DeviceSize offset, vk::DeviceSize range, vk::Format format) : offset(offset), range(range), format(format) {}
+    Buffer::BufferViewStorage::BufferViewStorage(vk::DeviceSize offset, vk::DeviceSize size, vk::Format format) : offset(offset), size(size), format(format) {}

-    BufferView Buffer::GetView(vk::DeviceSize offset, vk::DeviceSize range, vk::Format format) {
-        for (auto &view : views)
-            if (view.offset == offset && view.range == range && view.format == format)
-                return BufferView{shared_from_this(), &view};
-
-        views.emplace_back(offset, range, format);
-        return BufferView{shared_from_this(), &views.back()};
+    Buffer::BufferDelegate::BufferDelegate(std::shared_ptr<Buffer> pBuffer, Buffer::BufferViewStorage *view) : buffer(std::move(pBuffer)), view(view) {
+        iterator = buffer->delegates.emplace(buffer->delegates.end(), this);
    }

-    BufferView::BufferView(std::shared_ptr<Buffer> buffer, Buffer::BufferViewStorage *view) : buffer(buffer), view(view) {}
+    Buffer::BufferDelegate::~BufferDelegate() {
+        std::scoped_lock lock(*this);
+        buffer->delegates.erase(iterator);
+    }

-    void BufferView::lock() {
-        auto backing{std::atomic_load(&buffer)};
+    void Buffer::BufferDelegate::lock() {
+        auto lBuffer{std::atomic_load(&buffer)};
        while (true) {
-            backing->lock();
+            lBuffer->lock();

            auto latestBacking{std::atomic_load(&buffer)};
-            if (backing == latestBacking)
+            if (lBuffer == latestBacking)
                return;

-            backing->unlock();
-            backing = latestBacking;
+            lBuffer->unlock();
+            lBuffer = latestBacking;
        }
    }

-    void BufferView::unlock() {
+    void Buffer::BufferDelegate::unlock() {
        buffer->unlock();
    }

-    bool BufferView::try_lock() {
-        auto backing{std::atomic_load(&buffer)};
+    bool Buffer::BufferDelegate::try_lock() {
+        auto lBuffer{std::atomic_load(&buffer)};
        while (true) {
-            bool success{backing->try_lock()};
+            bool success{lBuffer->try_lock()};

-            auto latestBacking{std::atomic_load(&buffer)};
-            if (backing == latestBacking)
+            auto latestBuffer{std::atomic_load(&buffer)};
+            if (lBuffer == latestBuffer)
                // We want to ensure that the try_lock() was on the latest backing and not on an outdated one
                return success;

            if (success)
                // We only unlock() if the try_lock() was successful and we acquired the mutex
-                backing->unlock();
-            backing = latestBacking;
+                lBuffer->unlock();
+            lBuffer = latestBuffer;
        }
    }
+
+    BufferView Buffer::GetView(vk::DeviceSize offset, vk::DeviceSize size, vk::Format format) {
+        for (auto &view : views)
+            if (view.offset == offset && view.size == size && view.format == format)
+                return BufferView{shared_from_this(), &view};
+
+        views.emplace_back(offset, size, format);
+        return BufferView{shared_from_this(), &views.back()};
+    }
+
+    BufferView::BufferView(std::shared_ptr<Buffer> buffer, Buffer::BufferViewStorage *view) : bufferDelegate(std::make_shared<Buffer::BufferDelegate>(std::move(buffer), view)) {}
+
+    void BufferView::AttachCycle(const std::shared_ptr<FenceCycle> &cycle) {
+        auto buffer{bufferDelegate->buffer.get()};
+        if (!buffer->cycle.owner_before(cycle)) {
+            buffer->WaitOnFence();
+            buffer->cycle = cycle;
+            cycle->AttachObject(bufferDelegate);
+        }
+    }
+
+    void BufferView::RegisterUsage(const std::function<void(const Buffer::BufferViewStorage &, const std::shared_ptr<Buffer> &)> &usageCallback) {
+        usageCallback(*bufferDelegate->view, bufferDelegate->buffer);
+        if (!bufferDelegate->usageCallback) {
+            bufferDelegate->usageCallback = usageCallback;
+        } else {
+            bufferDelegate->usageCallback = [usageCallback, oldCallback = std::move(bufferDelegate->usageCallback)](const Buffer::BufferViewStorage &pView, const std::shared_ptr<Buffer> &buffer) {
+                oldCallback(pView, buffer);
+                usageCallback(pView, buffer);
+            };
+        }
+    }
+
+    void BufferView::Read(span<u8> data, vk::DeviceSize offset) const {
+        bufferDelegate->buffer->Read(data, offset + bufferDelegate->view->offset);
+    }
+
+    void BufferView::Write(span<u8> data, vk::DeviceSize offset) const {
+        bufferDelegate->buffer->Write(data, offset + bufferDelegate->view->offset);
+    }
 }
--- a/app/src/main/cpp/skyline/gpu/buffer.h
+++ b/app/src/main/cpp/skyline/gpu/buffer.h
@ -7,18 +7,7 @@
 #include "memory_manager.h"

 namespace skyline::gpu {
-    /**
-     * @brief A descriptor for a GPU buffer on the guest
-     */
-    struct GuestBuffer {
-        using Mappings = boost::container::small_vector<span < u8>, 3>;
-        Mappings mappings; //!< Spans to CPU memory for the underlying data backing this buffer
-
-        /**
-         * @return The total size of the buffer by adding up the size of all mappings
-         */
-        vk::DeviceSize BufferSize() const;
-    };
+    using GuestBuffer = span<u8>; //!< The CPU mapping for the guest buffer, multiple mappings for buffers aren't supported since overlaps cannot be reconciled

    struct BufferView;
    class BufferManager;
@ -31,7 +20,6 @@ namespace skyline::gpu {
      private:
        GPU &gpu;
        std::mutex mutex; //!< Synchronizes any mutations to the buffer or its backing
-        vk::DeviceSize size;
        memory::Buffer backing;
        GuestBuffer guest;

@ -44,19 +32,46 @@ namespace skyline::gpu {
            GpuDirty, //!< The GPU buffer has been modified but the CPU mappings have not been updated
        } dirtyState{DirtyState::CpuDirty}; //!< The state of the CPU mappings with respect to the GPU buffer

+      public:
        /**
         * @brief Storage for all metadata about a specific view into the buffer, used to prevent redundant view creation and duplication of VkBufferView(s)
         */
        struct BufferViewStorage {
-          public:
            vk::DeviceSize offset;
-            vk::DeviceSize range;
+            vk::DeviceSize size;
            vk::Format format;

-            BufferViewStorage(vk::DeviceSize offset, vk::DeviceSize range, vk::Format format);
+            BufferViewStorage(vk::DeviceSize offset, vk::DeviceSize size, vk::Format format);
        };
+
+      private:
        std::list<BufferViewStorage> views; //!< BufferViewStorage(s) that are backed by this Buffer, used for storage and repointing to a new Buffer on deletion

+      public:
+        /**
+         * @brief A delegate for a strong reference to a Buffer by a BufferView which can be changed to another Buffer transparently
+         * @note This class conforms to the Lockable and BasicLockable C++ named requirements
+         */
+        struct BufferDelegate : public FenceCycleDependency {
+            std::shared_ptr<Buffer> buffer;
+            Buffer::BufferViewStorage *view;
+            std::function<void(const BufferViewStorage &, const std::shared_ptr<Buffer> &)> usageCallback;
+            std::list<BufferDelegate *>::iterator iterator;
+
+            BufferDelegate(std::shared_ptr<Buffer> buffer, Buffer::BufferViewStorage *view);
+
+            ~BufferDelegate();
+
+            void lock();
+
+            void unlock();
+
+            bool try_lock();
+        };
+
+      private:
+        std::list<BufferDelegate *> delegates; //!< The reference delegates for this buffer, used to prevent the buffer from being deleted while it is still in use
+
        friend BufferView;
        friend BufferManager;

@ -131,9 +146,10 @@ namespace skyline::gpu {
        /**
         * @brief Synchronizes the guest buffer with the host buffer
         * @param skipTrap If true, setting up a CPU trap will be skipped and the dirty state will be Clean/CpuDirty
+         * @param skipFence If true, waiting on the currently attached fence will be skipped
         * @note The buffer **must** be locked prior to calling this
         */
-        void SynchronizeGuest(bool skipTrap = false);
+        void SynchronizeGuest(bool skipTrap = false, bool skipFence = false);

        /**
         * @brief Synchronizes the guest buffer with the host buffer when the FenceCycle is signalled
@ -142,6 +158,11 @@ namespace skyline::gpu {
         */
        void SynchronizeGuestWithCycle(const std::shared_ptr<FenceCycle> &cycle);

+        /**
+         * @brief Reads data at the specified offset in the buffer
+         */
+        void Read(span<u8> data, vk::DeviceSize offset);
+
        /**
         * @brief Writes data at the specified offset in the buffer
         */
@ -151,7 +172,7 @@ namespace skyline::gpu {
         * @return A cached or newly created view into this buffer with the supplied attributes
         * @note The buffer **must** be locked prior to calling this
         */
-        BufferView GetView(vk::DeviceSize offset, vk::DeviceSize range, vk::Format format = {});
+        BufferView GetView(vk::DeviceSize offset, vk::DeviceSize size, vk::Format format = {});
    };

    /**
@ -160,41 +181,70 @@ namespace skyline::gpu {
     * @note This class conforms to the Lockable and BasicLockable C++ named requirements
     */
    struct BufferView {
-        std::shared_ptr<Buffer> buffer;
-        Buffer::BufferViewStorage *view;
+        std::shared_ptr<Buffer::BufferDelegate> bufferDelegate;

        BufferView(std::shared_ptr<Buffer> buffer, Buffer::BufferViewStorage *view);

-        constexpr BufferView(nullptr_t = nullptr) : buffer(nullptr), view(nullptr) {}
-
-        constexpr operator bool() const {
-            return view != nullptr;
-        }
-
-        constexpr Buffer::BufferViewStorage *operator->() {
-            return view;
-        }
-
-        operator std::shared_ptr<FenceCycleDependency>() {
-            return buffer;
-        }
+        constexpr BufferView(nullptr_t = nullptr) : bufferDelegate(nullptr) {}

        /**
         * @brief Acquires an exclusive lock on the buffer for the calling thread
         * @note Naming is in accordance to the BasicLockable named requirement
         */
-        void lock();
+        void lock() const {
+            bufferDelegate->lock();
+        }

        /**
         * @brief Relinquishes an existing lock on the buffer by the calling thread
         * @note Naming is in accordance to the BasicLockable named requirement
         */
-        void unlock();
+        void unlock() const {
+            bufferDelegate->unlock();
+        }

        /**
         * @brief Attempts to acquire an exclusive lock but returns immediately if it's captured by another thread
         * @note Naming is in accordance to the Lockable named requirement
         */
-        bool try_lock();
+        bool try_lock() const {
+            return bufferDelegate->try_lock();
+        }
+
+        constexpr operator bool() const {
+            return bufferDelegate != nullptr;
+        }
+
+        /**
+         * @note The buffer **must** be locked prior to calling this
+         */
+        Buffer::BufferDelegate *operator->() const {
+            return bufferDelegate.get();
+        }
+
+        /**
+         * @brief Attaches a fence cycle to the underlying buffer in a way that it will be synchronized with the latest backing buffer
+         * @note The view **must** be locked prior to calling this
+         */
+        void AttachCycle(const std::shared_ptr<FenceCycle> &cycle);
+
+        /**
+         * @brief Registers a callback for a usage of this view, it may be called multiple times due to the view being recreated with different backings
+         * @note The callback will be automatically called the first time after registration
+         * @note The view **must** be locked prior to calling this
+         */
+        void RegisterUsage(const std::function<void(const Buffer::BufferViewStorage &, const std::shared_ptr<Buffer> &)> &usageCallback);
+
+        /**
+         * @brief Reads data at the specified offset in the view
+         * @note The view **must** be locked prior to calling this
+         */
+        void Read(span<u8> data, vk::DeviceSize offset) const;
+
+        /**
+         * @brief Writes data at the specified offset in the view
+         * @note The view **must** be locked prior to calling this
+         */
+        void Write(span<u8> data, vk::DeviceSize offset) const;
    };
 }
--- a/app/src/main/cpp/skyline/gpu/buffer_manager.cpp
+++ b/app/src/main/cpp/skyline/gpu/buffer_manager.cpp
@ -8,101 +8,68 @@
 namespace skyline::gpu {
    BufferManager::BufferManager(GPU &gpu) : gpu(gpu) {}

-    BufferView BufferManager::FindOrCreate(const GuestBuffer &guest) {
-        auto guestMapping{guest.mappings.front()};
-
-        /*
-         * Iterate over all buffers that overlap with the first mapping of the guest buffer and compare the mappings:
-         * 1) All mappings match up perfectly, we check that the rest of the supplied mappings correspond to mappings in the buffer
-         * 1.1) If they match as well, we return a view encompassing the entire buffer
-         * 2) Only a contiguous range of mappings match, we check for the overlap bounds, it can go two ways:
-         * 2.1) If the supplied buffer is smaller than the matching buffer, we return a view encompassing the mappings into the buffer
-         * 2.2) If the matching buffer is smaller than the supplied buffer, we make the matching buffer larger and return it
-         * 3) If there's another overlap we go back to (1) with it else we go to (4)
-         * 4) Create a new buffer and insert it in the map then return it
-         */
+    bool BufferManager::BufferLessThan(const std::shared_ptr<Buffer> &it, u8 *pointer) {
+        return it->guest.begin().base() < pointer;
+    }

+    BufferView BufferManager::FindOrCreate(GuestBuffer guestMapping, const std::shared_ptr<FenceCycle> &cycle) {
        std::scoped_lock lock(mutex);
-        std::shared_ptr<Buffer> match{};
-        auto mappingEnd{std::upper_bound(buffers.begin(), buffers.end(), guestMapping)}, hostMapping{mappingEnd};
-        if (hostMapping != buffers.begin() && (--hostMapping)->end() > guestMapping.begin()) {
-            auto &hostMappings{hostMapping->buffer->guest.mappings};
-            if (hostMapping->contains(guestMapping)) {
-                // We need to check that all corresponding mappings in the candidate buffer and the guest buffer match up
-                // Only the start of the first matched mapping and the end of the last mapping can not match up as this is the case for views
-                auto firstHostMapping{hostMapping->iterator};
-                auto lastGuestMapping{guest.mappings.back()};
-                auto endHostMapping{std::find_if(firstHostMapping, hostMappings.end(), [&lastGuestMapping](const span<u8> &it) {
-                    return lastGuestMapping.begin() > it.begin() && lastGuestMapping.end() > it.end();
-                })}; //!< A past-the-end iterator for the last host mapping, the final valid mapping is prior to this iterator
-                bool mappingMatch{std::equal(firstHostMapping, endHostMapping, guest.mappings.begin(), guest.mappings.end(), [](const span<u8> &lhs, const span<u8> &rhs) {
-                    return lhs.end() == rhs.end(); // We check end() here to implicitly ignore any offset from the first mapping
-                })};

-                auto &lastHostMapping{*std::prev(endHostMapping)};
-                if (firstHostMapping == hostMappings.begin() && firstHostMapping->begin() == guestMapping.begin() && mappingMatch && endHostMapping == hostMappings.end() && lastGuestMapping.end() == lastHostMapping.end()) {
-                    // We've gotten a perfect 1:1 match for *all* mappings from the start to end
-                    std::scoped_lock bufferLock(*hostMapping->buffer);
-                    return hostMapping->buffer->GetView(0, hostMapping->buffer->size);
-                } else if (mappingMatch && firstHostMapping->begin() > guestMapping.begin() && lastHostMapping.end() > lastGuestMapping.end()) {
-                    // We've gotten a guest buffer that is located entirely within a host buffer
-                    std::scoped_lock bufferLock(*hostMapping->buffer);
-                    return hostMapping->buffer->GetView(hostMapping->offset + static_cast<vk::DeviceSize>(hostMapping->begin() - guestMapping.begin()), guest.BufferSize());
-                }
+        // Lookup for any buffers overlapping with the supplied guest mapping
+        boost::container::small_vector<std::shared_ptr<Buffer>, 4> overlaps;
+        for (auto entryIt{std::lower_bound(buffers.begin(), buffers.end(), guestMapping.end().base(), BufferLessThan)}; entryIt != buffers.begin() && (*--entryIt)->guest.begin() <= guestMapping.end();)
+            if ((*entryIt)->guest.end() > guestMapping.begin())
+                overlaps.push_back(*entryIt);
+
+        if (overlaps.size() == 1) [[likely]] {
+            auto buffer{overlaps.front()};
+            if (buffer->guest.begin() <= guestMapping.begin() && buffer->guest.end() >= guestMapping.end()) {
+                // If we find a buffer which can entirely fit the guest mapping, we can just return a view into it
+                std::scoped_lock bufferLock{*buffer};
+                return buffer->GetView(static_cast<vk::DeviceSize>(guestMapping.begin() - buffer->guest.begin()), guestMapping.size());
            }
        }

-        /* TODO: Handle overlapping buffers
-        // Create a list of all overlapping buffers and update the guest mappings to fit them all
-        boost::container::small_vector<std::pair<std::shared_ptr<Buffer>, u32>, 4> overlappingBuffers;
-        GuestBuffer::Mappings newMappings;
-
-        auto guestMappingIt{guest.mappings.begin()};
-        while (true) {
-            do {
-                hostMapping->begin();
-                overlappingBuffers.emplace_back(hostMapping->buffer, 4);
-            } while (hostMapping != buffers.begin() && (--hostMapping)->end() > guestMappingIt->begin());
-
-            // Iterate over all guest mappings to find overlapping buffers, not just the first
-            auto nextGuestMappingIt{std::next(guestMappingIt)};
-            if (nextGuestMappingIt != guest.mappings.end())
-                hostMapping = std::upper_bound(buffers.begin(), buffers.end(), *nextGuestMappingIt);
-            else
-                break;
-            guestMappingIt = nextGuestMappingIt;
+        // Find the extents of the new buffer we want to create that can hold all overlapping buffers
+        auto lowestAddress{guestMapping.begin().base()}, highestAddress{guestMapping.end().base()};
+        for (const auto &overlap : overlaps) {
+            auto mapping{overlap->guest};
+            if (mapping.begin().base() < lowestAddress)
+                lowestAddress = mapping.begin().base();
+            if (mapping.end().base() > highestAddress)
+                highestAddress = mapping.end().base();
        }

-        // Create a buffer that can contain all the overlapping buffers
-        auto buffer{std::make_shared<Buffer>(gpu, guest)};
+        auto newBuffer{std::make_shared<Buffer>(gpu, span<u8>(lowestAddress, highestAddress))};
+        for (auto &overlap : overlaps) {
+            std::scoped_lock overlapLock{*overlap};

-        // Delete mappings from all overlapping buffers and repoint all buffer views
-        for (auto &overlappingBuffer : overlappingBuffers) {
-            std::scoped_lock overlappingBufferLock(*overlappingBuffer.first);
-            auto &bufferMappings{hostMapping->buffer->guest.mappings};
+            if (!overlap->cycle.owner_before(cycle))
+                overlap->WaitOnFence(); // We want to only wait on the fence cycle if it's not the current fence cycle
+            overlap->SynchronizeGuest(true, true); // Sync back the buffer before we destroy it

-            // Delete all mappings of the overlapping buffers
-            while ((++it) != buffer->guest.mappings.end()) {
-                guestMapping = *it;
-                auto mapping{std::upper_bound(buffers.begin(), buffers.end(), guestMapping)};
-                buffers.emplace(mapping, BufferMapping{buffer, it, offset, guestMapping});
-                offset += mapping->size_bytes();
+            buffers.erase(std::find(buffers.begin(), buffers.end(), overlap));
+
+            // Transfer all views from the overlapping buffer to the new buffer with the new buffer and updated offset
+            vk::DeviceSize overlapOffset{static_cast<vk::DeviceSize>(overlap->guest.begin() - newBuffer->guest.begin())};
+            if (overlapOffset != 0)
+                for (auto &view : overlap->views)
+                    view.offset += overlapOffset;
+
+            newBuffer->views.splice(newBuffer->views.end(), overlap->views);
+
+            // Transfer all delegates references from the overlapping buffer to the new buffer
+            for (auto &delegate : overlap->delegates) {
+                atomic_exchange(&delegate->buffer, newBuffer);
+                if (delegate->usageCallback)
+                    delegate->usageCallback(*delegate->view, newBuffer);
            }
-        }
-         */

-        auto buffer{std::make_shared<Buffer>(gpu, guest)};
-        auto it{buffer->guest.mappings.begin()};
-        buffers.emplace(mappingEnd, BufferMapping{buffer, it, 0, guestMapping});
-
-        vk::DeviceSize offset{};
-        while ((++it) != buffer->guest.mappings.end()) {
-            guestMapping = *it;
-            auto mapping{std::upper_bound(buffers.begin(), buffers.end(), guestMapping)};
-            buffers.emplace(mapping, BufferMapping{buffer, it, offset, guestMapping});
-            offset += mapping->size_bytes();
+            newBuffer->delegates.splice(newBuffer->delegates.end(), overlap->delegates);
        }

-        return buffer->GetView(0, buffer->size);
+        buffers.insert(std::lower_bound(buffers.begin(), buffers.end(), newBuffer->guest.end().base(), BufferLessThan), newBuffer);
+
+        return newBuffer->GetView(static_cast<vk::DeviceSize>(guestMapping.begin() - newBuffer->guest.begin()), guestMapping.size());
    }
 }
--- a/app/src/main/cpp/skyline/gpu/buffer_manager.h
+++ b/app/src/main/cpp/skyline/gpu/buffer_manager.h
@ -11,25 +11,14 @@ namespace skyline::gpu {
     */
    class BufferManager {
      private:
-        /**
-         * @brief A single contiguous mapping of a buffer in the CPU address space
-         */
-        struct BufferMapping : span<u8> {
-            std::shared_ptr<Buffer> buffer;
-            GuestBuffer::Mappings::iterator iterator; //!< An iterator to the mapping in the buffer's GuestBufferMappings corresponding to this mapping
-            vk::DeviceSize offset; //!< Offset of this mapping relative to the start of the buffer
-
-            template<typename... Args>
-            BufferMapping(std::shared_ptr<Buffer> buffer, GuestBuffer::Mappings::iterator iterator, vk::DeviceSize offset, Args &&... args)
-                : span<u8>(std::forward<Args>(args)...),
-                  buffer(std::move(buffer)),
-                  iterator(iterator),
-                  offset(offset) {}
-        };
-
        GPU &gpu;
        std::mutex mutex; //!< Synchronizes access to the buffer mappings
-        std::vector<BufferMapping> buffers; //!< A sorted vector of all buffer mappings
+        std::vector<std::shared_ptr<Buffer>> buffers; //!< A sorted vector of all buffer mappings
+
+        /**
+         * @return If the end of the supplied buffer is less than the supplied pointer
+         */
+        static bool BufferLessThan(const std::shared_ptr<Buffer> &it, u8 *pointer);

      public:
        BufferManager(GPU &gpu);
@ -37,6 +26,6 @@ namespace skyline::gpu {
        /**
         * @return A pre-existing or newly created Buffer object which covers the supplied mappings
         */
-        BufferView FindOrCreate(const GuestBuffer &guest);
+        BufferView FindOrCreate(GuestBuffer guestMapping, const std::shared_ptr<FenceCycle> &cycle = nullptr);
    };
 }
--- a/app/src/main/cpp/skyline/gpu/interconnect/command_executor.cpp
+++ b/app/src/main/cpp/skyline/gpu/interconnect/command_executor.cpp
@ -35,13 +35,10 @@ namespace skyline::gpu::interconnect {
        cycle->AttachObject(view->shared_from_this());
    }

-    void CommandExecutor::AttachBuffer(BufferView view) {
-        auto buffer{view.buffer.get()};
-        if (!syncBuffers.contains(buffer)) {
-            buffer->WaitOnFence();
-            buffer->cycle = cycle;
-            cycle->AttachObject(view);
-            syncBuffers.emplace(buffer);
+    void CommandExecutor::AttachBuffer(BufferView &view) {
+        if (!syncBuffers.contains(view.bufferDelegate)) {
+            view.AttachCycle(cycle);
+            syncBuffers.emplace(view.bufferDelegate);
        }
    }

@ -133,8 +130,10 @@ namespace skyline::gpu::interconnect {
                for (auto texture : syncTextures)
                    texture->SynchronizeHostWithBuffer(commandBuffer, cycle, true);

-                for (auto buffer : syncBuffers)
-                    buffer->SynchronizeHostWithCycle(cycle, true);
+                for (const auto& delegate : syncBuffers) {
+                    delegate->buffer->SynchronizeHostWithCycle(cycle, true);
+                    delegate->usageCallback = nullptr;
+                }

                vk::RenderPass lRenderPass;
                u32 subpassIndex;
--- a/app/src/main/cpp/skyline/gpu/interconnect/command_executor.h
+++ b/app/src/main/cpp/skyline/gpu/interconnect/command_executor.h
@ -16,11 +16,12 @@ namespace skyline::gpu::interconnect {
      private:
        GPU &gpu;
        CommandScheduler::ActiveCommandBuffer activeCommandBuffer;
-        std::shared_ptr<FenceCycle> cycle;
        boost::container::stable_vector<node::NodeVariant> nodes;
        node::RenderPassNode *renderPass{};
        std::unordered_set<Texture *> syncTextures; //!< All textures that need to be synced prior to and after execution
-        std::unordered_set<Buffer *> syncBuffers; //!< All buffers that need to be synced prior to and after execution
+
+        using SharedBufferDelegate = std::shared_ptr<Buffer::BufferDelegate>;
+        std::unordered_set<SharedBufferDelegate> syncBuffers; //!< All buffers that need to be synced prior to and after execution

        /**
         * @return If a new render pass was created by the function or the current one was reused as it was compatible
@ -28,6 +29,8 @@ namespace skyline::gpu::interconnect {
        bool CreateRenderPass(vk::Rect2D renderArea);

      public:
+        std::shared_ptr<FenceCycle> cycle; //!< The fence cycle that this command executor uses to wait for the GPU to finish executing commands
+
        CommandExecutor(const DeviceState &state);

        ~CommandExecutor();
@ -44,7 +47,7 @@ namespace skyline::gpu::interconnect {
         * @note The supplied buffer **must** be locked by the calling thread
         * @note This'll automatically handle syncing of the buffer in the most optimal way possible
         */
-        void AttachBuffer(BufferView view);
+        void AttachBuffer(BufferView &view);

        /**
         * @brief Attach the lifetime of the fence cycle dependency to the command buffer
--- a/app/src/main/cpp/skyline/gpu/interconnect/graphics_context.h
+++ b/app/src/main/cpp/skyline/gpu/interconnect/graphics_context.h
@ -571,7 +571,6 @@ namespace skyline::gpu::interconnect {
        struct ConstantBuffer {
            IOVA iova;
            u32 size;
-            GuestBuffer guest;
            BufferView view;

            /**
@ -581,20 +580,9 @@ namespace skyline::gpu::interconnect {
            template<typename T>
            T Read(size_t offset) const {
                T object;
-                size_t objectOffset{};
-                for (auto &mapping : guest.mappings) {
-                    if (offset < mapping.size_bytes()) {
-                        auto copySize{std::min(mapping.size_bytes() - offset, sizeof(T))};
-                        std::memcpy(reinterpret_cast<u8 *>(&object) + objectOffset, mapping.data() + offset, copySize);
-                        objectOffset += copySize;
-                        if (objectOffset == sizeof(T))
-                            return object;
-                        offset = mapping.size_bytes();
-                    } else {
-                        offset -= mapping.size_bytes();
-                    }
-                }
-                throw exception("Object extent ({} + {} = {}) is larger than constant buffer size: {}", size + offset, sizeof(T), size + offset + sizeof(T), size);
+                std::scoped_lock lock{view};
+                view.Read(span<T>(object).template cast<u8>(), offset);
+                return object;
            }

            /**
@ -604,7 +592,7 @@ namespace skyline::gpu::interconnect {
            template<typename T>
            void Write(T &object, size_t offset) {
                std::scoped_lock lock{view};
-                view.buffer->Write(span<T>(object).template cast<u8>(), view->offset + offset);
+                view.Write(span<T>(object).template cast<u8>(), offset);
            }
        };
        ConstantBuffer constantBufferSelector; //!< The constant buffer selector is used to bind a constant buffer to a stage or update data in it
@ -633,12 +621,7 @@ namespace skyline::gpu::interconnect {

            auto mappings{channelCtx.asCtx->gmmu.TranslateRange(constantBufferSelector.iova, constantBufferSelector.size)};

-            // Ignore unmapped areas from mappings due to buggy games setting the wrong cbuf size
-            mappings.erase(ranges::find_if(mappings, [](const auto &mapping) { return !mapping.valid(); }), mappings.end());
-
-            constantBufferSelector.guest.mappings.assign(mappings.begin(), mappings.end());
-
-            constantBufferSelector.view = gpu.buffer.FindOrCreate(constantBufferSelector.guest);
+            constantBufferSelector.view = gpu.buffer.FindOrCreate(mappings.front(), executor.cycle);
            return constantBufferSelector;
        }

@ -750,14 +733,11 @@ namespace skyline::gpu::interconnect {
            .convert_depth_mode = true // This is required for the default GPU register state
        };

-        constexpr static size_t PipelineUniqueDescriptorTypeCount{2}; //!< The amount of unique descriptor types that may be bound to a pipeline
-        constexpr static size_t MaxPipelineDescriptorWriteCount{maxwell3d::PipelineStageCount * PipelineUniqueDescriptorTypeCount}; //!< The maxium amount of descriptors writes that are used to bind a pipeline
+        constexpr static size_t PipelineUniqueDescriptorTypeCount{3}; //!< The amount of unique descriptor types that may be bound to a pipeline
+        constexpr static size_t PipelineDescriptorWritesReservedCount{maxwell3d::PipelineStageCount * PipelineUniqueDescriptorTypeCount}; //!< The amount of descriptors writes reserved in advance to bind a pipeline, this is not a hard limit due to the Adreno descriptor quirk
        constexpr static size_t MaxPipelineDescriptorCount{100}; //!< The maxium amount of descriptors we support being bound to a pipeline

-        boost::container::static_vector<vk::WriteDescriptorSet, MaxPipelineDescriptorWriteCount> descriptorSetWrites;
        boost::container::static_vector<vk::DescriptorSetLayoutBinding, MaxPipelineDescriptorCount> layoutBindings;
-        boost::container::static_vector<vk::DescriptorBufferInfo, MaxPipelineDescriptorCount> bufferInfo;
-        boost::container::static_vector<vk::DescriptorImageInfo, MaxPipelineDescriptorCount> imageInfo;

        /**
         * @brief All state concerning the shader programs and their bindings
@ -767,7 +747,18 @@ namespace skyline::gpu::interconnect {
            boost::container::static_vector<std::shared_ptr<vk::raii::ShaderModule>, maxwell3d::PipelineStageCount> shaderModules; //!< Shader modules for every pipeline stage
            boost::container::static_vector<vk::PipelineShaderStageCreateInfo, maxwell3d::PipelineStageCount> shaderStages; //!< Shader modules for every pipeline stage
            vk::raii::DescriptorSetLayout descriptorSetLayout; //!< The descriptor set layout for the pipeline (Only valid when `activeShaderStagesInfoCount` is non-zero)
-            span<vk::WriteDescriptorSet> descriptorSetWrites; //!< The writes to the descriptor set that need to be done prior to executing a pipeline
+
+            struct DescriptorSetWrites {
+                std::vector<vk::WriteDescriptorSet> writes; //!< The descriptor set writes for the pipeline
+                std::vector<vk::DescriptorBufferInfo> bufferDescriptors; //!< The storage for buffer descriptors
+                std::vector<vk::DescriptorImageInfo> imageDescriptors; //!< The storage for image descriptors
+
+                std::vector<vk::WriteDescriptorSet> &operator*() {
+                    return writes;
+                }
+            };
+
+            std::unique_ptr<DescriptorSetWrites> descriptorSetWrites; //!< The writes to the descriptor set that need to be done prior to executing a pipeline
        };

        /**
@ -798,11 +789,10 @@ namespace skyline::gpu::interconnect {
            auto ssbo{cbuf.Read<SsboDescriptor>(descriptor.cbuf_offset)};

            auto mappings{channelCtx.asCtx->gmmu.TranslateRange(ssbo.iova, ssbo.size)};
+            if (mappings.size() != 1)
+                Logger::Warn("Multiple buffer mappings ({}) are not supported", mappings.size());

-            GuestBuffer guestBuffer;
-            guestBuffer.mappings.assign(mappings.begin(), mappings.end());
-
-            return gpu.buffer.FindOrCreate(guestBuffer);
+            return gpu.buffer.FindOrCreate(mappings.front(), executor.cycle);
        }

        /**
@ -889,14 +879,29 @@ namespace skyline::gpu::interconnect {
                }
            }

-            descriptorSetWrites.clear();
+            auto descriptorSetWrites{std::make_unique<ShaderProgramState::DescriptorSetWrites>()};
+            auto &descriptorWrites{**descriptorSetWrites};
+            descriptorWrites.reserve(PipelineDescriptorWritesReservedCount);
+
+            auto &bufferDescriptors{descriptorSetWrites->bufferDescriptors};
+            auto &imageDescriptors{descriptorSetWrites->imageDescriptors};
+            size_t bufferCount{}, imageCount{};
+            for (auto &pipelineStage : pipelineStages) {
+                if (pipelineStage.enabled) {
+                    auto &program{pipelineStage.program->program};
+                    bufferCount += program.info.constant_buffer_descriptors.size() + program.info.storage_buffers_descriptors.size();
+                    imageCount += program.info.texture_descriptors.size();
+                }
+            }
+            bufferDescriptors.resize(bufferCount);
+            imageDescriptors.resize(imageCount);
+
            layoutBindings.clear();
-            bufferInfo.clear();
-            imageInfo.clear();

            runtimeInfo.previous_stage_stores.mask.set(); // First stage should always have all bits set
            ShaderCompiler::Backend::Bindings bindings{};

+            size_t bufferIndex{}, imageIndex{};
            boost::container::static_vector<std::shared_ptr<vk::raii::ShaderModule>, maxwell3d::PipelineStageCount> shaderModules;
            boost::container::static_vector<vk::PipelineShaderStageCreateInfo, maxwell3d::PipelineStageCount> shaderStages;
            for (auto &pipelineStage : pipelineStages) {
@ -920,11 +925,11 @@ namespace skyline::gpu::interconnect {

                u32 bindingIndex{pipelineStage.bindingBase};
                if (!program.info.constant_buffer_descriptors.empty()) {
-                    descriptorSetWrites.push_back(vk::WriteDescriptorSet{
+                    descriptorWrites.push_back(vk::WriteDescriptorSet{
                        .dstBinding = bindingIndex,
                        .descriptorCount = static_cast<u32>(program.info.constant_buffer_descriptors.size()),
                        .descriptorType = vk::DescriptorType::eUniformBuffer,
-                        .pBufferInfo = bufferInfo.data() + bufferInfo.size(),
+                        .pBufferInfo = bufferDescriptors.data() + bufferIndex,
                    });

                    for (auto &constantBuffer : program.info.constant_buffer_descriptors) {
@ -936,23 +941,24 @@ namespace skyline::gpu::interconnect {
                        });

                        auto view{pipelineStage.constantBuffers[constantBuffer.index].view};
-                        std::scoped_lock lock{view};
-                        bufferInfo.push_back(vk::DescriptorBufferInfo{
-                            .buffer = view.buffer->GetBacking(),
-                            .offset = view->offset,
-                            .range = view->range,
+                        std::scoped_lock lock(view);
+                        view.RegisterUsage([descriptor = bufferDescriptors.data() + bufferIndex++](const Buffer::BufferViewStorage &view, const std::shared_ptr<Buffer> &buffer) {
+                            *descriptor = vk::DescriptorBufferInfo{
+                                .buffer = buffer->GetBacking(),
+                                .offset = view.offset,
+                                .range = view.size,
+                            };
                        });
                        executor.AttachBuffer(view);
                    }
                }

-
                if (!program.info.storage_buffers_descriptors.empty()) {
-                    descriptorSetWrites.push_back({
+                    descriptorWrites.push_back(vk::WriteDescriptorSet{
                        .dstBinding = bindingIndex,
                        .descriptorCount = static_cast<u32>(program.info.storage_buffers_descriptors.size()),
                        .descriptorType = vk::DescriptorType::eStorageBuffer,
-                        .pBufferInfo = bufferInfo.data() + bufferInfo.size(),
+                        .pBufferInfo = bufferDescriptors.data() + bufferIndex,
                    });

                    for (auto &storageBuffer : program.info.storage_buffers_descriptors) {
@ -965,10 +971,12 @@ namespace skyline::gpu::interconnect {

                        auto view{GetSsboViewFromDescriptor(storageBuffer, pipelineStage.constantBuffers)};
                        std::scoped_lock lock{view};
-                        bufferInfo.push_back(vk::DescriptorBufferInfo{
-                            .buffer = view.buffer->GetBacking(),
-                            .offset = view->offset,
-                            .range = view->range,
+                        view.RegisterUsage([descriptor = bufferDescriptors.data() + bufferIndex++](const Buffer::BufferViewStorage &view, const std::shared_ptr<Buffer> &buffer) {
+                            *descriptor = vk::DescriptorBufferInfo{
+                                .buffer = buffer->GetBacking(),
+                                .offset = view.offset,
+                                .range = view.size,
+                            };
                        });
                        executor.AttachBuffer(view);
                    }
@ -982,22 +990,22 @@ namespace skyline::gpu::interconnect {

                if (!program.info.texture_descriptors.empty()) {
                    if (!gpu.traits.quirks.needsIndividualTextureBindingWrites)
-                        descriptorSetWrites.push_back(vk::WriteDescriptorSet{
+                        descriptorWrites.push_back(vk::WriteDescriptorSet{
                            .dstBinding = bindingIndex,
                            .descriptorCount = static_cast<u32>(program.info.texture_descriptors.size()),
                            .descriptorType = vk::DescriptorType::eCombinedImageSampler,
-                            .pImageInfo = imageInfo.data() + imageInfo.size(),
+                            .pImageInfo = imageDescriptors.data() + imageIndex,
                        });
                    else
-                        descriptorSetWrites.reserve(descriptorSetWrites.size() + program.info.texture_descriptors.size());
+                        descriptorWrites.reserve(descriptorWrites.size() + program.info.texture_descriptors.size());

                    for (auto &texture : program.info.texture_descriptors) {
                        if (gpu.traits.quirks.needsIndividualTextureBindingWrites)
-                            descriptorSetWrites.push_back(vk::WriteDescriptorSet{
+                            descriptorWrites.push_back(vk::WriteDescriptorSet{
                                .dstBinding = bindingIndex,
                                .descriptorCount = 1,
                                .descriptorType = vk::DescriptorType::eCombinedImageSampler,
-                                .pImageInfo = imageInfo.data() + imageInfo.size(),
+                                .pImageInfo = imageDescriptors.data() + imageIndex,
                            });

                        layoutBindings.push_back(vk::DescriptorSetLayoutBinding{
@ -1020,11 +1028,11 @@ namespace skyline::gpu::interconnect {
                        auto textureView{GetPoolTextureView(handle.textureIndex)};

                        std::scoped_lock lock(*textureView);
-                        imageInfo.push_back(vk::DescriptorImageInfo{
+                        imageDescriptors[imageIndex++] = vk::DescriptorImageInfo{
                            .sampler = **sampler,
                            .imageView = textureView->GetView(),
                            .imageLayout = textureView->texture->layout,
-                        });
+                        };
                        executor.AttachTexture(textureView.get());
                        executor.AttachDependency(std::move(sampler));
                    }
@ -1048,7 +1056,7 @@ namespace skyline::gpu::interconnect {
                    .pBindings = layoutBindings.data(),
                    .bindingCount = static_cast<u32>(layoutBindings.size()),
                }),
-                descriptorSetWrites,
+                std::move(descriptorSetWrites),
            };
        }

@ -1643,11 +1651,11 @@ namespace skyline::gpu::interconnect {
            else if (vertexBuffer.view)
                return vertexBuffer.view;

-            GuestBuffer guest;
            auto mappings{channelCtx.asCtx->gmmu.TranslateRange(vertexBuffer.start, (vertexBuffer.end + 1) - vertexBuffer.start)};
-            guest.mappings.assign(mappings.begin(), mappings.end());
+            if (mappings.size() != 1)
+                Logger::Warn("Multiple buffer mappings ({}) are not supported", mappings.size());

-            vertexBuffer.view = gpu.buffer.FindOrCreate(guest);
+            vertexBuffer.view = gpu.buffer.FindOrCreate(mappings.front(), executor.cycle);
            return vertexBuffer.view;
        }

@ -1842,7 +1850,7 @@ namespace skyline::gpu::interconnect {
         * @tparam ConvGR Converts all green component
         * @tparam SwapBR Swaps blue and red components
         */
-        template <bool ConvGR, bool SwapBR>
+        template<bool ConvGR, bool SwapBR>
        vk::ComponentMapping ConvertTicSwizzleMapping(TextureImageControl::FormatWord format) {
            auto convertComponentSwizzle{[](TextureImageControl::ImageSwizzle swizzle) {
                switch (swizzle) {
@ -2223,11 +2231,12 @@ namespace skyline::gpu::interconnect {
            else if (indexBuffer.view && size == indexBuffer.viewSize)
                return indexBuffer.view;

-            GuestBuffer guestBuffer;
            auto mappings{channelCtx.asCtx->gmmu.TranslateRange(indexBuffer.start, size)};
-            guestBuffer.mappings.assign(mappings.begin(), mappings.end());
+            if (mappings.size() != 1)
+                Logger::Warn("Multiple buffer mappings ({}) are not supported", mappings.size());

-            indexBuffer.view = gpu.buffer.FindOrCreate(guestBuffer);
+            auto mapping{mappings.front()};
+            indexBuffer.view = gpu.buffer.FindOrCreate(span<u8>(mapping.data(), size), executor.cycle);
            return indexBuffer.view;
        }

@ -2433,33 +2442,43 @@ namespace skyline::gpu::interconnect {
        void Draw(u32 count, u32 first, i32 vertexOffset = 0) {
            // Shader + Binding Setup
            auto programState{CompileShaderProgramState()};
-
            auto descriptorSet{gpu.descriptor.AllocateSet(*programState.descriptorSetLayout)};
-            for (auto &descriptorSetWrite : programState.descriptorSetWrites)
+            for (auto &descriptorSetWrite : **programState.descriptorSetWrites)
                descriptorSetWrite.dstSet = descriptorSet;
-            gpu.vkDevice.updateDescriptorSets(programState.descriptorSetWrites, nullptr);

            vk::raii::PipelineLayout pipelineLayout(gpu.vkDevice, vk::PipelineLayoutCreateInfo{
                .pSetLayouts = &*programState.descriptorSetLayout,
                .setLayoutCount = 1,
            });

-            vk::Buffer indexBufferHandle;
-            vk::DeviceSize indexBufferOffset;
-            vk::IndexType indexBufferType;
+            struct BoundIndexBuffer {
+                vk::Buffer handle{};
+                vk::DeviceSize offset{};
+                vk::IndexType type{};
+            };
+
+            auto boundIndexBuffer{std::make_shared<BoundIndexBuffer>()};
            if constexpr (IsIndexed) {
                auto indexBufferView{GetIndexBuffer(count)};
-                std::scoped_lock lock(indexBufferView);
-                executor.AttachBuffer(indexBufferView);
+                {
+                    std::scoped_lock lock(indexBufferView);

-                indexBufferHandle = indexBufferView.buffer->GetBacking();
-                indexBufferOffset = indexBufferView->offset;
-                indexBufferType = indexBuffer.type;
+                    boundIndexBuffer->type = indexBuffer.type;
+                    indexBufferView.RegisterUsage([=](const Buffer::BufferViewStorage &view, const std::shared_ptr<Buffer> &buffer) {
+                        boundIndexBuffer->handle = buffer->GetBacking();
+                        boundIndexBuffer->offset = view.offset;
+                    });
+
+                    executor.AttachBuffer(indexBufferView);
+                }
            }

            // Vertex Buffer Setup
-            std::array<vk::Buffer, maxwell3d::VertexBufferCount> vertexBufferHandles{};
-            std::array<vk::DeviceSize, maxwell3d::VertexBufferCount> vertexBufferOffsets{};
+            struct BoundVertexBuffers {
+                std::array<vk::Buffer, maxwell3d::VertexBufferCount> handles{};
+                std::array<vk::DeviceSize, maxwell3d::VertexBufferCount> offsets{};
+            };
+            auto boundVertexBuffers{std::make_shared<BoundVertexBuffers>()};

            boost::container::static_vector<vk::VertexInputBindingDescription, maxwell3d::VertexBufferCount> vertexBindingDescriptions{};
            boost::container::static_vector<vk::VertexInputBindingDivisorDescriptionEXT, maxwell3d::VertexBufferCount> vertexBindingDivisorsDescriptions{};
@ -2473,8 +2492,11 @@ namespace skyline::gpu::interconnect {
                        vertexBindingDivisorsDescriptions.push_back(vertexBuffer.bindingDivisorDescription);

                    std::scoped_lock vertexBufferLock(vertexBufferView);
-                    vertexBufferHandles[index] = vertexBufferView.buffer->GetBacking();
-                    vertexBufferOffsets[index] = vertexBufferView->offset;
+                    vertexBufferView.RegisterUsage([handle = boundVertexBuffers->handles.data() + index, offset = boundVertexBuffers->offsets.data() + index](const Buffer::BufferViewStorage &view, const std::shared_ptr<Buffer> &buffer) {
+                        *handle = buffer->GetBacking();
+                        *offset = view.offset;
+                    });
+
                    executor.AttachBuffer(vertexBufferView);
                }
            }
@ -2505,18 +2527,29 @@ namespace skyline::gpu::interconnect {
                depthTargetLock.emplace(*depthRenderTargetView);

            // Draw Persistent Storage
-            struct Storage : FenceCycleDependency {
+            struct DrawStorage {
+                vk::raii::DescriptorSetLayout descriptorSetLayout;
+                std::unique_ptr<ShaderProgramState::DescriptorSetWrites> descriptorSetWrites;
                vk::raii::PipelineLayout pipelineLayout;
-                std::optional<vk::raii::Pipeline> pipeline;
-                DescriptorAllocator::ActiveDescriptorSet descriptorSet;

-                Storage(vk::raii::PipelineLayout &&pipelineLayout, DescriptorAllocator::ActiveDescriptorSet &&descriptorSet) : pipelineLayout(std::move(pipelineLayout)), descriptorSet(std::move(descriptorSet)) {}
+                DrawStorage(vk::raii::DescriptorSetLayout &&descriptorSetLayout, std::unique_ptr<ShaderProgramState::DescriptorSetWrites> &&descriptorSetWrites, vk::raii::PipelineLayout &&pipelineLayout) : descriptorSetLayout(std::move(descriptorSetLayout)), descriptorSetWrites(std::move(descriptorSetWrites)), pipelineLayout(std::move(pipelineLayout)) {}
            };

-            auto storage{std::make_shared<Storage>(std::move(pipelineLayout), std::move(descriptorSet))};
+            auto drawStorage{std::make_shared<DrawStorage>(std::move(programState.descriptorSetLayout), std::move(programState.descriptorSetWrites), std::move(pipelineLayout))};
+
+            // Command Buffer Persistent Storage
+            struct FenceStorage : FenceCycleDependency {
+                std::optional<vk::raii::Pipeline> pipeline;
+                DescriptorAllocator::ActiveDescriptorSet descriptorSet;
+                std::shared_ptr<DrawStorage> drawStorage{};
+
+                FenceStorage(DescriptorAllocator::ActiveDescriptorSet &&descriptorSet) : descriptorSet(std::move(descriptorSet)) {}
+            };
+
+            auto fenceStorage{std::make_shared<FenceStorage>(std::move(descriptorSet))};

            // Submit Draw
-            executor.AddSubpass([=, &vkDevice = gpu.vkDevice, shaderModules = programState.shaderModules, shaderStages = programState.shaderStages, inputAssemblyState = inputAssemblyState, multiViewport = gpu.traits.supportsMultipleViewports, viewports = viewports, scissors = scissors, rasterizerState = rasterizerState, multisampleState = multisampleState, depthState = depthState, blendState = blendState, storage = std::move(storage), supportsVertexAttributeDivisor = gpu.traits.supportsVertexAttributeDivisor, vertexBufferHandles = std::move(vertexBufferHandles), vertexBufferOffsets = std::move(vertexBufferOffsets), pipelineCache = *pipelineCache](vk::raii::CommandBuffer &commandBuffer, const std::shared_ptr<FenceCycle> &cycle, GPU &, vk::RenderPass renderPass, u32 subpassIndex) mutable {
+            executor.AddSubpass([=, &vkDevice = gpu.vkDevice, shaderModules = programState.shaderModules, shaderStages = programState.shaderStages, inputAssemblyState = inputAssemblyState, multiViewport = gpu.traits.supportsMultipleViewports, viewports = viewports, scissors = scissors, rasterizerState = rasterizerState, multisampleState = multisampleState, depthState = depthState, blendState = blendState, drawStorage = std::move(drawStorage), fenceStorage = std::move(fenceStorage), supportsVertexAttributeDivisor = gpu.traits.supportsVertexAttributeDivisor, pipelineCache = *pipelineCache](vk::raii::CommandBuffer &commandBuffer, const std::shared_ptr<FenceCycle> &cycle, GPU &, vk::RenderPass renderPass, u32 subpassIndex) mutable {
                vk::StructureChain<vk::PipelineVertexInputStateCreateInfo, vk::PipelineVertexInputDivisorStateCreateInfoEXT> vertexState{
                    vk::PipelineVertexInputStateCreateInfo{
                        .pVertexBindingDescriptions = vertexBindingDescriptions.data(),
@ -2553,7 +2586,7 @@ namespace skyline::gpu::interconnect {
                    .pDepthStencilState = &depthState,
                    .pColorBlendState = &blendState,
                    .pDynamicState = nullptr,
-                    .layout = *storage->pipelineLayout,
+                    .layout = *drawStorage->pipelineLayout,
                    .renderPass = renderPass,
                    .subpass = subpassIndex,
                };
@ -2564,6 +2597,7 @@ namespace skyline::gpu::interconnect {

                commandBuffer.bindPipeline(vk::PipelineBindPoint::eGraphics, pipeline.value);

+                auto &vertexBufferHandles{boundVertexBuffers->handles};
                for (u32 bindingIndex{}; bindingIndex != vertexBufferHandles.size(); bindingIndex++) {
                    // We need to bind all non-null vertex buffers while skipping any null ones
                    if (vertexBufferHandles[bindingIndex]) {
@ -2572,24 +2606,26 @@ namespace skyline::gpu::interconnect {
                            bindingEndIndex++;

                        u32 bindingCount{bindingEndIndex - bindingIndex};
-                        commandBuffer.bindVertexBuffers(bindingIndex, span(vertexBufferHandles.data() + bindingIndex, bindingCount), span(vertexBufferOffsets.data() + bindingIndex, bindingCount));
+                        commandBuffer.bindVertexBuffers(bindingIndex, span(vertexBufferHandles.data() + bindingIndex, bindingCount), span(boundVertexBuffers->offsets.data() + bindingIndex, bindingCount));
                    }
                }

-                commandBuffer.bindDescriptorSets(vk::PipelineBindPoint::eGraphics, *storage->pipelineLayout, 0, storage->descriptorSet, nullptr);
+                vkDevice.updateDescriptorSets(**drawStorage->descriptorSetWrites, nullptr);
+                commandBuffer.bindDescriptorSets(vk::PipelineBindPoint::eGraphics, *drawStorage->pipelineLayout, 0, fenceStorage->descriptorSet, nullptr);

                if constexpr (IsIndexed) {
-                    commandBuffer.bindIndexBuffer(indexBufferHandle, indexBufferOffset, indexBufferType);
+                    commandBuffer.bindIndexBuffer(boundIndexBuffer->handle, boundIndexBuffer->offset, boundIndexBuffer->type);
                    commandBuffer.drawIndexed(count, 1, first, vertexOffset, 0);
                } else {
                    commandBuffer.draw(count, 1, first, 0);
                }

-                storage->pipeline = vk::raii::Pipeline(vkDevice, pipeline.value);
+                fenceStorage->drawStorage = drawStorage;
+                fenceStorage->pipeline = vk::raii::Pipeline(vkDevice, pipeline.value);

-                cycle->AttachObject(storage);
+                cycle->AttachObject(fenceStorage);
            }, vk::Rect2D{
-                .extent = activeColorRenderTargets[0]->texture->dimensions,
+                .extent = activeColorRenderTargets.front()->texture->dimensions,
            }, {}, activeColorRenderTargets, depthRenderTargetView);
        }