From 8d7dbe2c4e68b038b0be128cbf60e627965ca389 Mon Sep 17 00:00:00 2001
From: Billy Laws <blaws05@gmail.com>
Date: Sat, 30 Apr 2022 15:21:55 +0100
Subject: [PATCH] Add a way to get a readonly span of Buffer contents

Avoids the need redundantly copy data when it is being directly processed on the CPU (e.g. quad coversion)
---
 app/src/main/cpp/skyline/gpu/buffer.cpp | 103 +++++++++++++-----------
 app/src/main/cpp/skyline/gpu/buffer.h   |  25 ++++++
 2 files changed, 81 insertions(+), 47 deletions(-)
diff --git a/app/src/main/cpp/skyline/gpu/buffer.cpp b/app/src/main/cpp/skyline/gpu/buffer.cpp
index 641726b1..0b9256c4 100644
--- a/app/src/main/cpp/skyline/gpu/buffer.cpp
+++ b/app/src/main/cpp/skyline/gpu/buffer.cpp
@@ -220,32 +220,29 @@ namespace skyline::gpu {
         cycle = pCycle;
     }
 
+    void Buffer::SynchronizeGuestImmediate(const std::shared_ptr<FenceCycle> &pCycle, const std::function<void()> &flushHostCallback) {
+        // If this buffer was attached to the current cycle, flush all pending host GPU work and wait to ensure that we read valid data
+        if (cycle.owner_before(pCycle))
+            flushHostCallback();
+
+        SynchronizeGuest();
+    }
+
     void Buffer::Read(const std::shared_ptr<FenceCycle> &pCycle, const std::function<void()> &flushHostCallback, span<u8> data, vk::DeviceSize offset) {
-        if (dirtyState == DirtyState::CpuDirty || dirtyState == DirtyState::Clean) {
-            std::memcpy(data.data(), mirror.data() + offset, data.size());
-        } else if (dirtyState == DirtyState::GpuDirty) {
-            // If this buffer was attached to the current cycle, flush all pending host GPU work and wait to ensure that we read valid data
-            if (cycle.owner_before(pCycle))
-                flushHostCallback();
+        if (dirtyState == DirtyState::GpuDirty)
+            SynchronizeGuestImmediate(pCycle, flushHostCallback);
 
-            SynchronizeGuest();
-
-            std::memcpy(data.data(), backing.data() + offset, data.size());
-        }
+        std::memcpy(data.data(), mirror.data() + offset, data.size());
     }
 
     void Buffer::Write(const std::shared_ptr<FenceCycle> &pCycle, const std::function<void()> &flushHostCallback, const std::function<void()> &gpuCopyCallback, span<u8> data, vk::DeviceSize offset) {
         InvalidateMegaBuffer(); // Since we're writing to the backing buffer the megabuffer contents will require refresh
 
-        if (dirtyState == DirtyState::CpuDirty) {
-            SynchronizeHostWithCycle(pCycle); // Perform a CPU -> GPU sync to ensure correct ordering of writes
-        } else if (dirtyState == DirtyState::GpuDirty) {
-            // If this buffer was attached to the current cycle, flush all pending host GPU work and wait to ensure that writes are correctly ordered
-            if (cycle.owner_before(pCycle))
-                flushHostCallback();
-
-            SynchronizeGuest();
-        }
+        // Perform a syncs in both directions to ensure correct ordering of writes
+        if (dirtyState == DirtyState::CpuDirty)
+            SynchronizeHostWithCycle(pCycle);
+        else if (dirtyState == DirtyState::GpuDirty)
+            SynchronizeGuestImmediate(pCycle, flushHostCallback);
 
         if (dirtyState != DirtyState::Clean)
             Logger::Error("Attempting to write to a dirty buffer"); // This should never happen since we do syncs in both directions above
@@ -261,6 +258,41 @@ namespace skyline::gpu {
         }
     }
 
+    BufferView Buffer::GetView(vk::DeviceSize offset, vk::DeviceSize size, vk::Format format) {
+        for (auto &view : views)
+            if (view.offset == offset && view.size == size && view.format == format)
+                return BufferView{shared_from_this(), &view};
+
+        views.emplace_back(offset, size, format);
+        return BufferView{shared_from_this(), &views.back()};
+    }
+
+    vk::DeviceSize Buffer::AcquireMegaBuffer() {
+        SynchronizeGuest(false, true); // First try and enable megabuffering by doing an immediate sync
+
+        if (!megaBufferingEnabled)
+            return 0; // Bail out if megabuffering is disabled for this buffer
+
+        SynchronizeHost(); // Since pushes to the megabuffer use the GPU backing contents ensure they're up-to-date by performing a CPU -> GPU sync
+
+        if (megaBufferOffset)
+            return megaBufferOffset; // If the current buffer contents haven't been changed since the last acquire, we can just return the existing offset
+
+        megaBufferOffset = gpu.buffer.megaBuffer.Push(backing, true); // Buffers are required to be page aligned in the megabuffer
+        return megaBufferOffset;
+    }
+
+    void Buffer::InvalidateMegaBuffer() {
+        megaBufferOffset = 0;
+    }
+
+    span<u8> Buffer::GetReadOnlyBackingSpan(const std::shared_ptr<FenceCycle> &pCycle, const std::function<void()> &flushHostCallback) {
+        if (dirtyState == DirtyState::GpuDirty)
+            SynchronizeGuestImmediate(pCycle, flushHostCallback);
+
+        return mirror;
+    }
+
     Buffer::BufferViewStorage::BufferViewStorage(vk::DeviceSize offset, vk::DeviceSize size, vk::Format format) : offset(offset), size(size), format(format) {}
 
     Buffer::BufferDelegate::BufferDelegate(std::shared_ptr<Buffer> pBuffer, Buffer::BufferViewStorage *view) : buffer(std::move(pBuffer)), view(view) {
@@ -307,34 +339,6 @@ namespace skyline::gpu {
         }
     }
 
-    BufferView Buffer::GetView(vk::DeviceSize offset, vk::DeviceSize size, vk::Format format) {
-        for (auto &view : views)
-            if (view.offset == offset && view.size == size && view.format == format)
-                return BufferView{shared_from_this(), &view};
-
-        views.emplace_back(offset, size, format);
-        return BufferView{shared_from_this(), &views.back()};
-    }
-
-    vk::DeviceSize Buffer::AcquireMegaBuffer() {
-        SynchronizeGuest(false, true); // First try and enable megabuffering by doing an immediate sync
-
-        if (!megaBufferingEnabled)
-            return 0; // Bail out if megabuffering is disabled for this buffer
-
-        SynchronizeHost(); // Since pushes to the megabuffer use the GPU backing contents ensure they're up-to-date by performing a CPU -> GPU sync
-
-        if (megaBufferOffset)
-            return megaBufferOffset; // If the current buffer contents haven't been changed since the last acquire, we can just return the existing offset
-
-        megaBufferOffset = gpu.buffer.megaBuffer.Push(backing, true); // Buffers are required to be page aligned in the megabuffer
-        return megaBufferOffset;
-    }
-
-    void Buffer::InvalidateMegaBuffer() {
-        megaBufferOffset = 0;
-    }
-
     BufferView::BufferView(std::shared_ptr<Buffer> buffer, Buffer::BufferViewStorage *view) : bufferDelegate(std::make_shared<Buffer::BufferDelegate>(std::move(buffer), view)) {}
 
     void BufferView::AttachCycle(const std::shared_ptr<FenceCycle> &cycle) {
@@ -375,4 +379,9 @@ namespace skyline::gpu {
         else
             return 0;
     }
+
+    span<u8> BufferView::GetReadOnlyBackingSpan(const std::shared_ptr<FenceCycle> &pCycle, const std::function<void()> &flushHostCallback) {
+        auto backing{bufferDelegate->buffer->GetReadOnlyBackingSpan(pCycle, flushHostCallback)};
+        return backing.subspan(bufferDelegate->view->offset, bufferDelegate->view->size);
+    }
 }
diff --git a/app/src/main/cpp/skyline/gpu/buffer.h b/app/src/main/cpp/skyline/gpu/buffer.h
index 090603d0..89c77175 100644
--- a/app/src/main/cpp/skyline/gpu/buffer.h
+++ b/app/src/main/cpp/skyline/gpu/buffer.h
@@ -198,6 +198,14 @@ namespace skyline::gpu {
          */
         void SynchronizeGuestWithCycle(const std::shared_ptr<FenceCycle> &cycle);
 
+        /**
+         * @brief Synchronizes the guest buffer with the host buffer immediately, flushing GPU work if necessary
+         * @note The buffer **must** be locked prior to calling this
+         * @param pCycle The FenceCycle associated with the current workload, utilised for waiting and flushing semantics
+         * @param flushHostCallback Callback to flush and execute all pending GPU work to allow for synchronisation of GPU dirty buffers
+         */
+        void SynchronizeGuestImmediate(const std::shared_ptr<FenceCycle> &pCycle, const std::function<void()> &flushHostCallback);
+
         /**
          * @brief Reads data at the specified offset in the buffer
          * @param pCycle The FenceCycle associated with the current workload, utilised for waiting and flushing semantics
@@ -234,6 +242,15 @@ namespace skyline::gpu {
          * @note This **must** be called after any modifications of the backing buffer data
          */
         void InvalidateMegaBuffer();
+
+        /**
+         * @param pCycle The FenceCycle associated with the current workload, utilised for waiting and flushing semantics
+         * @param flushHostCallback Callback to flush and execute all pending GPU work to allow for synchronisation of GPU dirty buffers
+         * @return A span of the backing buffer contents
+         * @note The returned span **must** not be written to
+         * @note The buffer **must** be kept locked until the span is no longer in use
+         */
+        span<u8> GetReadOnlyBackingSpan(const std::shared_ptr<FenceCycle> &pCycle, const std::function<void()> &flushHostCallback);
     };
 
     /**
@@ -317,5 +334,13 @@ namespace skyline::gpu {
          * @note See Buffer::AcquireMegaBuffer
          */
         vk::DeviceSize AcquireMegaBuffer() const;
+
+        /**
+         * @return A span of the backing buffer contents
+         * @note The returned span **must** not be written to
+         * @note The view **must** be kept locked until the span is no longer in use
+         * @note See Buffer::GetReadOnlyBackingSpan
+         */
+        span<u8> GetReadOnlyBackingSpan(const std::shared_ptr<FenceCycle> &pCycle, const std::function<void()> &flushHostCallback);
     };
 }