Introduce usage tracker for dirty tracking within an execution

This is neccessary as e.g. shaders can be updated through a mirror and never hit modification traps. By tracking which addresses have sequenced writes applied, the shader manager can then correctly detect if a given shader has been modified by the GPU.
2024-11-25 22:34:16 +01:00 · 2023-03-04 20:11:34 +00:00 · 2023-03-04 20:11:34 +00:00 · 090151f0c3
commit 090151f0c3
parent f64860c93e
16 changed files with 103 additions and 44 deletions
--- a/app/src/main/cpp/skyline/gpu/buffer.cpp
+++ b/app/src/main/cpp/skyline/gpu/buffer.cpp
@ -194,13 +194,15 @@ namespace skyline::gpu {
        return isDirect ? ValidateMegaBufferViewImplDirect(size) : ValidateMegaBufferViewImplStaged(size);
    }

-    void Buffer::CopyFromImplDirect(vk::DeviceSize dstOffset, Buffer *src, vk::DeviceSize srcOffset, vk::DeviceSize size, const std::function<void()> &gpuCopyCallback) {
+    void Buffer::CopyFromImplDirect(vk::DeviceSize dstOffset,
+                                    Buffer *src, vk::DeviceSize srcOffset, vk::DeviceSize size,
+                                    UsageTracker &usageTracker, const std::function<void()> &gpuCopyCallback) {
        everHadInlineUpdate = true;
        bool needsGpuTracking{src->RefreshGpuWritesActiveDirect() || RefreshGpuWritesActiveDirect()};
        bool needsCpuTracking{RefreshGpuReadsActiveDirect() && !needsGpuTracking};
        if (needsGpuTracking || needsCpuTracking) {
            if (needsGpuTracking) // Force buffer to be dirty for this cycle if either of the sources are dirty, this is needed as otherwise it could have just been dirty from the previous cycle
-                MarkGpuDirty();
+                MarkGpuDirty(usageTracker);
            gpuCopyCallback();

            if (needsCpuTracking)
@ -210,7 +212,9 @@ namespace skyline::gpu {
        }
    }

-    void Buffer::CopyFromImplStaged(vk::DeviceSize dstOffset, Buffer *src, vk::DeviceSize srcOffset, vk::DeviceSize size, const std::function<void()> &gpuCopyCallback) {
+    void Buffer::CopyFromImplStaged(vk::DeviceSize dstOffset,
+                                    Buffer *src, vk::DeviceSize srcOffset, vk::DeviceSize size,
+                                    UsageTracker &usageTracker, const std::function<void()> &gpuCopyCallback) {
        std::scoped_lock lock{stateMutex, src->stateMutex}; // Fine even if src and dst are same since recursive mutex

        if (dirtyState == DirtyState::CpuDirty && SequencedCpuBackingWritesBlocked())
@ -230,18 +234,19 @@ namespace skyline::gpu {
            else
                gpuCopyCallback();
        } else {
-            MarkGpuDirty();
+            MarkGpuDirty(usageTracker);
            gpuCopyCallback();
        }
    }

-    bool Buffer::WriteImplDirect(span<u8> data, vk::DeviceSize offset, const std::function<void()> &gpuCopyCallback) {
+    bool Buffer::WriteImplDirect(span<u8> data, vk::DeviceSize offset,
+                                 UsageTracker &usageTracker, const std::function<void()> &gpuCopyCallback) {
        // If the buffer is GPU dirty do the write on the GPU and we're done
        if (RefreshGpuWritesActiveDirect()) {
            if (gpuCopyCallback) {
                // Propagate dirtiness to the current cycle, since if this is only dirty in a previous cycle that could change at any time and we would need to have the write saved somewhere for CPU reads
                // By propagating the dirtiness to the current cycle we can avoid this and force a wait on any reads
-                MarkGpuDirty();
+                MarkGpuDirty(usageTracker);
                gpuCopyCallback();
                return false;
            } else {
@ -349,6 +354,15 @@ namespace skyline::gpu {
        AdvanceSequence(); // The GPU will modify buffer contents so advance to the next sequence
    }

+    void Buffer::MarkGpuDirtyImpl() {
+        currentExecutionGpuDirty = true;
+
+        if (isDirect)
+            MarkGpuDirtyImplDirect();
+        else
+            MarkGpuDirtyImplStaged();
+    }
+
    Buffer::Buffer(LinearAllocatorState<> &delegateAllocator, GPU &gpu, GuestBuffer guest, size_t id, bool direct)
        : gpu{gpu},
          guest{guest},
@ -382,16 +396,12 @@ namespace skyline::gpu {
        WaitOnFence();
    }

-    void Buffer::MarkGpuDirty() {
+    void Buffer::MarkGpuDirty(UsageTracker &usageTracker) {
        if (!guest)
            return;

-        currentExecutionGpuDirty = true;
-
-        if (isDirect)
-            MarkGpuDirtyImplDirect();
-        else
-            MarkGpuDirtyImplStaged();
+        usageTracker.dirtyIntervals.Insert(*guest);
+        MarkGpuDirtyImpl();
    }

    void Buffer::WaitOnFence() {
@ -493,24 +503,30 @@ namespace skyline::gpu {
            ReadImplStaged(isFirstUsage, flushHostCallback, data, offset);
    }

-    bool Buffer::Write(span<u8> data, vk::DeviceSize offset, const std::function<void()> &gpuCopyCallback) {
+    bool Buffer::Write(span<u8> data, vk::DeviceSize offset, UsageTracker &usageTracker, const std::function<void()> &gpuCopyCallback) {
        AdvanceSequence(); // We are modifying GPU backing contents so advance to the next sequence
        everHadInlineUpdate = true;

+        usageTracker.sequencedIntervals.Insert(*guest);
+
        if (isDirect)
-            return WriteImplDirect(data, offset, gpuCopyCallback);
+            return WriteImplDirect(data, offset, usageTracker, gpuCopyCallback);
        else
            return WriteImplStaged(data, offset, gpuCopyCallback);
    }

-    void Buffer::CopyFrom(vk::DeviceSize dstOffset, Buffer *src, vk::DeviceSize srcOffset, vk::DeviceSize size, const std::function<void()> &gpuCopyCallback) {
+    void Buffer::CopyFrom(vk::DeviceSize dstOffset,
+                          Buffer *src, vk::DeviceSize srcOffset, vk::DeviceSize size,
+                          UsageTracker &usageTracker, const std::function<void()> &gpuCopyCallback) {
        AdvanceSequence(); // We are modifying GPU backing contents so advance to the next sequence
        everHadInlineUpdate = true;

+        usageTracker.sequencedIntervals.Insert(*guest);
+
        if (isDirect)
-            CopyFromImplDirect(dstOffset, src, srcOffset, size, gpuCopyCallback);
+            CopyFromImplDirect(dstOffset, src, srcOffset, size, usageTracker, gpuCopyCallback);
        else
-            CopyFromImplStaged(dstOffset, src, srcOffset, size, gpuCopyCallback);
+            CopyFromImplStaged(dstOffset, src, srcOffset, size, usageTracker, gpuCopyCallback);
    }

    BufferView Buffer::GetView(vk::DeviceSize offset, vk::DeviceSize size) {
@ -676,8 +692,8 @@ namespace skyline::gpu {
        GetBuffer()->Read(isFirstUsage, flushHostCallback, data, readOffset + GetOffset());
    }

-    bool BufferView::Write(span<u8> data, vk::DeviceSize writeOffset, const std::function<void()> &gpuCopyCallback) const {
-        return GetBuffer()->Write(data, writeOffset + GetOffset(), gpuCopyCallback);
+    bool BufferView::Write(span<u8> data, vk::DeviceSize writeOffset, UsageTracker &usageTracker, const std::function<void()> &gpuCopyCallback) const {
+        return GetBuffer()->Write(data, writeOffset + GetOffset(), usageTracker, gpuCopyCallback);
    }

    BufferBinding BufferView::TryMegaBuffer(const std::shared_ptr<FenceCycle> &pCycle, MegaBufferAllocator &allocator, ContextTag executionTag, size_t sizeOverride) const {
@ -689,9 +705,9 @@ namespace skyline::gpu {
        return backing.subspan(GetOffset(), size);
    }

-    void BufferView::CopyFrom(BufferView src, const std::function<void()> &gpuCopyCallback) {
+    void BufferView::CopyFrom(BufferView src, UsageTracker &usageTracker, const std::function<void()> &gpuCopyCallback) {
        if (src.size != size)
            throw exception("Copy size mismatch!");
-        return GetBuffer()->CopyFrom(GetOffset(), src.GetBuffer(), src.GetOffset(), size, gpuCopyCallback);
+        return GetBuffer()->CopyFrom(GetOffset(), src.GetBuffer(), src.GetOffset(), size, usageTracker, gpuCopyCallback);
    }
 }
--- a/app/src/main/cpp/skyline/gpu/buffer.h
+++ b/app/src/main/cpp/skyline/gpu/buffer.h
@ -8,6 +8,7 @@
 #include <common/spin_lock.h>
 #include <nce.h>
 #include <gpu/tag_allocator.h>
+#include "usage_tracker.h"
 #include "megabuffer.h"
 #include "memory_manager.h"

@ -146,11 +147,16 @@ namespace skyline::gpu {
         */
        bool ValidateMegaBufferView(vk::DeviceSize size);

-        void CopyFromImplDirect(vk::DeviceSize dstOffset, Buffer *src, vk::DeviceSize srcOffset, vk::DeviceSize size, const std::function<void()> &gpuCopyCallback);
+        void CopyFromImplDirect(vk::DeviceSize dstOffset,
+                                Buffer *src, vk::DeviceSize srcOffset, vk::DeviceSize size,
+                                UsageTracker &usageTracker, const std::function<void()> &gpuCopyCallback);

-        void CopyFromImplStaged(vk::DeviceSize dstOffset, Buffer *src, vk::DeviceSize srcOffset, vk::DeviceSize size, const std::function<void()> &gpuCopyCallback);
+        void CopyFromImplStaged(vk::DeviceSize dstOffset,
+                                Buffer *src, vk::DeviceSize srcOffset, vk::DeviceSize size,
+                                UsageTracker &usageTracker, const std::function<void()> &gpuCopyCallback);

-        bool WriteImplDirect(span<u8> data, vk::DeviceSize offset, const std::function<void()> &gpuCopyCallback = {});
+        bool WriteImplDirect(span<u8> data, vk::DeviceSize offset,
+                             UsageTracker &usageTracker, const std::function<void()> &gpuCopyCallback = {});

        bool WriteImplStaged(span<u8> data, vk::DeviceSize offset, const std::function<void()> &gpuCopyCallback = {});

@ -162,6 +168,8 @@ namespace skyline::gpu {

        void MarkGpuDirtyImplStaged();

+        void MarkGpuDirtyImpl();
+
      public:
        void UpdateCycle(const std::shared_ptr<FenceCycle> &newCycle) {
            newCycle->ChainCycle(cycle);
@ -227,7 +235,7 @@ namespace skyline::gpu {
         * @note This **must** be called after syncing the buffer to the GPU not before
         * @note The buffer **must** be locked prior to calling this
         */
-        void MarkGpuDirty();
+        void MarkGpuDirty(UsageTracker &usageTracker);

        /**
         * @brief Prevents sequenced writes to this buffer's backing from occuring on the CPU, forcing sequencing on the GPU instead for the duration of the context. Unsequenced writes such as those from the guest can still occur however.
@ -365,13 +373,15 @@ namespace skyline::gpu {
         * @param gpuCopyCallback Optional callback to perform a GPU-side copy for this Write if necessary, if such a copy is needed and this is not supplied `true` will be returned to indicate that the write needs to be repeated with the callback present
         * @return Whether the write needs to be repeated with `gpuCopyCallback` provided, always false if `gpuCopyCallback` is provided
         */
-        bool Write(span<u8> data, vk::DeviceSize offset, const std::function<void()> &gpuCopyCallback = {});
+        bool Write(span<u8> data, vk::DeviceSize offset, UsageTracker &usageTracker, const std::function<void()> &gpuCopyCallback = {});

        /**
         * @brief Copies a region of the src buffer into a region of this buffer
         * @note The src/dst buffers **must** be locked prior to calling this
         */
-        void CopyFrom(vk::DeviceSize dstOffset, Buffer *src, vk::DeviceSize srcOffset, vk::DeviceSize size, const std::function<void()> &gpuCopyCallback);
+        void CopyFrom(vk::DeviceSize dstOffset,
+                      Buffer *src, vk::DeviceSize srcOffset, vk::DeviceSize size,
+                      UsageTracker &usageTracker, const std::function<void()> &gpuCopyCallback);

        /**
         * @return A view into this buffer with the supplied attributes
@ -528,7 +538,7 @@ namespace skyline::gpu {
         * @note The view **must** be locked prior to calling this
         * @note See Buffer::Write
         */
-        bool Write(span<u8> data, vk::DeviceSize writeOffset, const std::function<void()> &gpuCopyCallback = {}) const;
+        bool Write(span<u8> data, vk::DeviceSize writeOffset, UsageTracker &usageTracker, const std::function<void()> &gpuCopyCallback = {}) const;

        /*
         * @brief If megabuffering is determined to be beneficial for the underlying buffer, allocates and copies this view into the megabuffer (in case of cache miss), returning a binding of the allocated megabuffer region
@ -550,7 +560,7 @@ namespace skyline::gpu {
         * @brief Copies the contents of one view into this one
         * @note The src/dst views **must** be locked prior to calling this
         */
-        void CopyFrom(BufferView src, const std::function<void()> &gpuCopyCallback);
+        void CopyFrom(BufferView src, UsageTracker &usageTracker, const std::function<void()> &gpuCopyCallback);

        constexpr operator bool() {
            return delegate != nullptr;
--- a/app/src/main/cpp/skyline/gpu/buffer_manager.cpp
+++ b/app/src/main/cpp/skyline/gpu/buffer_manager.cpp
@ -113,7 +113,7 @@ namespace skyline::gpu {
                    if (srcBuffer.lock.IsFirstUsage() && newBuffer->dirtyState != Buffer::DirtyState::GpuDirty)
                        copyBuffer(*newBuffer->guest, *srcBuffer->guest, newBuffer->mirror.data(), srcBuffer->backing->data());
                    else
-                        newBuffer->MarkGpuDirty();
+                        newBuffer->MarkGpuDirtyImpl();

                    // Since we don't synchost source buffers and the source buffers here are GPU dirty their mirrors will be out of date, meaning the backing contents of this source buffer's region in the new buffer from the initial synchost call will be incorrect. By copying backings directly here we can ensure that no writes are lost and that if the newly created buffer needs to turn GPU dirty during recreation no copies need to be done since the backing is as up to date as the mirror at a minimum.
                    copyBuffer(*newBuffer->guest, *srcBuffer->guest, newBuffer->backing->data(), srcBuffer->backing->data());
@ -126,7 +126,7 @@ namespace skyline::gpu {
                }
            } else {
                if (srcBuffer->directGpuWritesActive) {
-                    newBuffer->MarkGpuDirty();
+                    newBuffer->MarkGpuDirtyImpl();
                } else if (srcBuffer->directTrackedShadowActive) {
                    newBuffer->EnableTrackedShadowDirect();
                    copyBuffer(*newBuffer->guest, *srcBuffer->guest, newBuffer->directTrackedShadow.data(), srcBuffer->directTrackedShadow.data());
--- a/app/src/main/cpp/skyline/gpu/interconnect/command_executor.cpp
+++ b/app/src/main/cpp/skyline/gpu/interconnect/command_executor.cpp
@ -562,6 +562,7 @@ namespace skyline::gpu::interconnect {
        attachedBuffers.clear();
        allocator->Reset();
        renderPassIndex = 0;
+        usageTracker.sequencedIntervals.Clear();

        // Periodically clear preserve attachments just in case there are new waiters which would otherwise end up waiting forever
        if ((submissionNumber % (2U << *state.settings->executorSlotCountScale)) == 0) {
@ -586,7 +587,6 @@ namespace skyline::gpu::interconnect {

            SubmitInternal();
            submissionNumber++;
-
        } else {
            if (callback && *state.settings->useDirectMemoryImport)
                waiterThread.Queue(nullptr, std::move(callback));
@ -598,6 +598,8 @@ namespace skyline::gpu::interconnect {
        ResetInternal();

        if (wait) {
+            usageTracker.dirtyIntervals.Clear();
+
            std::condition_variable cv;
            std::mutex mutex;
            bool gpuDone{};
--- a/app/src/main/cpp/skyline/gpu/interconnect/command_executor.h
+++ b/app/src/main/cpp/skyline/gpu/interconnect/command_executor.h
@ -6,6 +6,7 @@
 #include <boost/container/stable_vector.hpp>
 #include <renderdoc_app.h>
 #include <common/linear_allocator.h>
+#include <gpu/usage_tracker.h>
 #include <gpu/megabuffer.h>
 #include "command_nodes.h"
 #include "common/spin_lock.h"
@ -217,6 +218,7 @@ namespace skyline::gpu::interconnect {
        size_t submissionNumber{};
        ContextTag executionTag{};
        bool captureNextExecution{};
+        UsageTracker usageTracker;

        CommandExecutor(const DeviceState &state);

--- a/app/src/main/cpp/skyline/gpu/interconnect/common/pipeline.inc
+++ b/app/src/main/cpp/skyline/gpu/interconnect/common/pipeline.inc
@ -62,7 +62,7 @@ namespace skyline::gpu::interconnect {
                dstStageMask |= dstStage;
            }

-            view.GetBuffer()->MarkGpuDirty();
+            view.GetBuffer()->MarkGpuDirty(ctx.executor.usageTracker);
        } else {
            if (auto megaBufferBinding{view.TryMegaBuffer(ctx.executor.cycle, ctx.gpu.megaBufferAllocator, ctx.executor.executionTag)})
                return megaBufferBinding;
--- a/app/src/main/cpp/skyline/gpu/interconnect/common/shader_cache.cpp
+++ b/app/src/main/cpp/skyline/gpu/interconnect/common/shader_cache.cpp
@ -53,13 +53,13 @@ namespace skyline::gpu::interconnect {
            mirrorBlock = blockMapping;
        }

-        if (entry->trapCount > MirrorEntry::SkipTrapThreshold && entry->channelSequenceNumber != ctx.channelCtx.channelSequenceNumber) {
-            entry->channelSequenceNumber = ctx.channelCtx.channelSequenceNumber;
+        if (entry->trapCount > MirrorEntry::SkipTrapThreshold && entry->executionTag != ctx.executor.executionTag) {
+            entry->executionTag = ctx.executor.executionTag;
            entry->dirty = true;
        }

        // If the mirror entry has been written to, clear its shader binary cache and retrap to catch any future writes
-        if (entry->dirty) {
+        if (entry->dirty || ctx.executor.usageTracker.sequencedIntervals.Intersect(blockMapping.subspan(blockOffset))) {
            entry->cache.clear();
            entry->dirty = false;

@ -129,7 +129,7 @@ namespace skyline::gpu::interconnect {
        if (programBase != lastProgramBase || programOffset != lastProgramOffset)
            return true;

-        if (entry && entry->trapCount > MirrorEntry::SkipTrapThreshold && entry->channelSequenceNumber != ctx.channelCtx.channelSequenceNumber)
+        if (entry && entry->trapCount > MirrorEntry::SkipTrapThreshold && entry->executionTag != ctx.executor.executionTag)
            return true;
        else if (entry && entry->dirty)
            return true;
--- a/app/src/main/cpp/skyline/gpu/interconnect/common/shader_cache.h
+++ b/app/src/main/cpp/skyline/gpu/interconnect/common/shader_cache.h
@ -22,7 +22,7 @@ namespace skyline::gpu::interconnect {

            static constexpr u32 SkipTrapThreshold{20}; //!< Threshold for the number of times a mirror trap needs to be hit before we fallback to always hashing
            u32 trapCount{}; //!< The number of times the trap has been hit, used to avoid trapping in cases where the constant retraps would harm performance
-            size_t channelSequenceNumber{}; //!< For the case where `trapCount > SkipTrapThreshold`, the memory sequence number number used to clear the cache after every access
+            ContextTag executionTag{}; //!< For the case where `trapCount > SkipTrapThreshold`, the memory sequence number number used to clear the cache after every access
            bool dirty{}; //!< If the trap has been hit and the cache needs to be cleared

            MirrorEntry(span<u8> alignedMirror) : mirror{alignedMirror} {}
--- a/app/src/main/cpp/skyline/gpu/interconnect/fermi_2d.cpp
+++ b/app/src/main/cpp/skyline/gpu/interconnect/fermi_2d.cpp
@ -123,6 +123,7 @@ namespace skyline::gpu::interconnect {
        auto dstTextureView{gpu.texture.FindOrCreate(dstGuestTexture, executor.tag)};
        executor.AttachDependency(dstTextureView);
        executor.AttachTexture(dstTextureView.get());
+        dstTextureView->texture->MarkGpuDirty(executor.usageTracker);

        // Blit shader always samples from centre so adjust if necessary
        float centredSrcRectX{sampleOrigin == SampleModeOrigin::Corner ? srcRectX - 0.5f : srcRectX};
--- a/app/src/main/cpp/skyline/gpu/interconnect/inline2memory.cpp
+++ b/app/src/main/cpp/skyline/gpu/interconnect/inline2memory.cpp
@ -22,7 +22,7 @@ namespace skyline::gpu::interconnect {
        ContextLock dstBufLock{executor.tag, dstBuf};


-        dstBuf.Write(src, 0, [&]() {
+        dstBuf.Write(src, 0, executor.usageTracker, [&]() {
            executor.AttachLockedBufferView(dstBuf, std::move(dstBufLock));
            // This will prevent any CPU accesses to backing for the duration of the usage
            dstBuf.GetBuffer()->BlockAllCpuBackingWrites();
--- a/app/src/main/cpp/skyline/gpu/interconnect/maxwell_3d/active_state.cpp
+++ b/app/src/main/cpp/skyline/gpu/interconnect/maxwell_3d/active_state.cpp
@ -206,7 +206,7 @@ namespace skyline::gpu::interconnect::maxwell3d {
                        dstStageMask |=  vk::PipelineStageFlagBits::eTransformFeedbackEXT;
                    }

-                    view->GetBuffer()->MarkGpuDirty();
+                    view->GetBuffer()->MarkGpuDirty(ctx.executor.usageTracker);
                    builder.SetTransformFeedbackBuffer(index, *view);
                    return;
                } else {
--- a/app/src/main/cpp/skyline/gpu/interconnect/maxwell_3d/constant_buffers.cpp
+++ b/app/src/main/cpp/skyline/gpu/interconnect/maxwell_3d/constant_buffers.cpp
@ -46,7 +46,7 @@ namespace skyline::gpu::interconnect::maxwell3d {
        ContextLock lock{ctx.executor.tag, view};

        // First attempt the write without setting up the gpu copy callback as a fast path
-        if (view.Write(srcCpuBuf, offset)) [[unlikely]] {
+        if (view.Write(srcCpuBuf, offset, ctx.executor.usageTracker)) [[unlikely]] {
            // Store callback data in a stack allocated struct to avoid heap allocation for the gpu copy callback lambda
            struct GpuCopyCallbackData {
                InterconnectContext &ctx;
@ -56,7 +56,7 @@ namespace skyline::gpu::interconnect::maxwell3d {
                BufferView &view;
            } callbackData{ctx, srcCpuBuf, offset, lock, view};

-            view.Write(srcCpuBuf, offset, [&callbackData]() {
+            view.Write(srcCpuBuf, offset, ctx.executor.usageTracker, [&callbackData]() {
                callbackData.ctx.executor.AttachLockedBufferView(callbackData.view, std::move(callbackData.lock));
                // This will prevent any CPU accesses to backing for the duration of the usage
                callbackData.view.GetBuffer()->BlockAllCpuBackingWrites();
--- a/app/src/main/cpp/skyline/gpu/interconnect/maxwell_dma.cpp
+++ b/app/src/main/cpp/skyline/gpu/interconnect/maxwell_dma.cpp
@ -24,7 +24,7 @@ namespace skyline::gpu::interconnect {
        })};
        ContextLock dstBufLock{executor.tag, dstBuf};

-        dstBuf.CopyFrom(srcBuf, [&]() {
+        dstBuf.CopyFrom(srcBuf, executor.usageTracker, [&]() {
            executor.AttachLockedBufferView(srcBuf, std::move(srcBufLock));
            executor.AttachLockedBufferView(dstBuf, std::move(dstBufLock));
            // This will prevent any CPU accesses to backing for the duration of the usage
--- a/app/src/main/cpp/skyline/gpu/texture/texture.cpp
+++ b/app/src/main/cpp/skyline/gpu/texture/texture.cpp
@ -725,6 +725,12 @@ namespace skyline::gpu {
        }
    }

+    void Texture::MarkGpuDirty(UsageTracker &usageTracker) {
+        for (auto mapping : guest->mappings)
+            if (mapping.valid())
+                usageTracker.dirtyIntervals.Insert(mapping);
+    }
+
    void Texture::SynchronizeHost(bool gpuDirty) {
        if (!guest)
            return;
--- a/app/src/main/cpp/skyline/gpu/texture/texture.h
+++ b/app/src/main/cpp/skyline/gpu/texture/texture.h
@ -10,6 +10,7 @@
 #include <nce.h>
 #include <gpu/tag_allocator.h>
 #include <gpu/memory_manager.h>
+#include <gpu/usage_tracker.h>

 namespace skyline::gpu {
    namespace texture {
@ -560,6 +561,11 @@ namespace skyline::gpu {
         */
        void TransitionLayout(vk::ImageLayout layout);

+        /**
+         * @brief Marks the texture as being GPU dirty
+         */
+        void MarkGpuDirty(UsageTracker &usageTracker);
+
        /**
         * @brief Synchronizes the host texture with the guest after it has been modified
         * @param gpuDirty If true, the texture will be transitioned to being GpuDirty by this call
--- a/app/src/main/cpp/skyline/gpu/usage_tracker.h
+++ b/app/src/main/cpp/skyline/gpu/usage_tracker.h
@ -0,0 +1,16 @@
+// SPDX-License-Identifier: MPL-2.0
+// Copyright © 2022 Skyline Team and Contributors (https://github.com/skyline-emu/)
+
+#pragma once
+
+#include <common/interval_list.h>
+
+namespace skyline::gpu {
+    /**
+     * @brief Tracks the usage of GPU memory and buffers to allow for fine-grained flushing
+     */
+    struct UsageTracker {
+        IntervalList<u8 *> dirtyIntervals; //!< Intervals of GPU-dirty contents that requires a flush before accessing
+        IntervalList<u8 *> sequencedIntervals; //!< Intervals of GPFIFO-sequenced writes that occur within an execution
+    };
+}