From 37ff0ab814afd19496362c198072804e32af0d57 Mon Sep 17 00:00:00 2001
From: Billy Laws <blaws05@gmail.com>
Date: Tue, 25 Oct 2022 21:03:29 +0100
Subject: [PATCH] Add buffer manager support for accelerated copies

These will be sequenced on the GPU/CPU depending on what's optimal and avoid any serialisation
---
 app/src/main/cpp/skyline/gpu/buffer.cpp | 38 +++++++++++++++++++++++++
 app/src/main/cpp/skyline/gpu/buffer.h   | 12 ++++++++
 2 files changed, 50 insertions(+)
diff --git a/app/src/main/cpp/skyline/gpu/buffer.cpp b/app/src/main/cpp/skyline/gpu/buffer.cpp
index d0ffd7ba..b67649e2 100644
--- a/app/src/main/cpp/skyline/gpu/buffer.cpp
+++ b/app/src/main/cpp/skyline/gpu/buffer.cpp
@@ -287,6 +287,37 @@ namespace skyline::gpu {
         return false;
     }
 
+    void Buffer::CopyFrom(vk::DeviceSize dstOffset, Buffer *src, vk::DeviceSize srcOffset, vk::DeviceSize size, const std::function<void()> &gpuCopyCallback) {
+        AdvanceSequence(); // We are modifying GPU backing contents so advance to the next sequence
+        everHadInlineUpdate = true;
+
+        std::scoped_lock dstLock{stateMutex};
+        std::scoped_lock srcLock{src->stateMutex}; // Fine even if src and dst are same since recursive mutex
+
+        if (dirtyState == DirtyState::CpuDirty && SequencedCpuBackingWritesBlocked())
+            // If the buffer is used in sequence directly on the GPU, SynchronizeHost before modifying the mirror contents to ensure proper sequencing. This write will then be sequenced on the GPU instead (the buffer will be kept clean for the rest of the execution due to gpuCopyCallback blocking all writes)
+            SynchronizeHost();
+
+        if (dirtyState != DirtyState::GpuDirty && src->dirtyState != DirtyState::GpuDirty) {
+            std::memcpy(mirror.data() + dstOffset, src->mirror.data() + srcOffset, size);
+
+            if (dirtyState == DirtyState::CpuDirty && !SequencedCpuBackingWritesBlocked())
+                // Skip updating backing if the changes are gonna be updated later by SynchroniseHost in executor anyway
+                return;
+
+            if (!SequencedCpuBackingWritesBlocked() && PollFence()) {
+                // We can write directly to the backing as long as this resource isn't being actively used by a past workload (in the current context or another)
+                std::memcpy(backing.data() + dstOffset, src->mirror.data() + srcOffset, size);
+            } else {
+                gpuCopyCallback();
+            }
+        } else {
+            MarkGpuDirty();
+            gpuCopyCallback();
+        }
+    }
+
+
     BufferView Buffer::GetView(vk::DeviceSize offset, vk::DeviceSize size) {
         return BufferView{delegate, offset, size};
     }
@@ -447,4 +478,11 @@ namespace skyline::gpu {
         auto backing{delegate->GetBuffer()->GetReadOnlyBackingSpan(isFirstUsage, flushHostCallback)};
         return backing.subspan(GetOffset(), size);
     }
+
+    void BufferView::CopyFrom(BufferView src, const std::function<void()> &gpuCopyCallback) {
+        if (src.size != size)
+            throw exception("Copy size mismatch!");
+        return GetBuffer()->CopyFrom(GetOffset(), src.GetBuffer(), src.GetOffset(), size, gpuCopyCallback);
+    }
+
 }
diff --git a/app/src/main/cpp/skyline/gpu/buffer.h b/app/src/main/cpp/skyline/gpu/buffer.h
index ee5e2289..4d2493bd 100644
--- a/app/src/main/cpp/skyline/gpu/buffer.h
+++ b/app/src/main/cpp/skyline/gpu/buffer.h
@@ -290,6 +290,12 @@ namespace skyline::gpu {
          */
         bool Write(span<u8> data, vk::DeviceSize offset, const std::function<void()> &gpuCopyCallback = {});
 
+        /**
+         * @brief Copies a region of the src buffer into a region of this buffer
+         * @note The src/dst buffers **must** be locked prior to calling this
+         */
+        void CopyFrom(vk::DeviceSize dstOffset, Buffer *src, vk::DeviceSize srcOffset, vk::DeviceSize size, const std::function<void()> &gpuCopyCallback);
+
         /**
          * @return A view into this buffer with the supplied attributes
          * @note The buffer **must** be locked prior to calling this
@@ -451,6 +457,12 @@ namespace skyline::gpu {
          */
         span<u8> GetReadOnlyBackingSpan(bool isFirstUsage, const std::function<void()> &flushHostCallback);
 
+        /**
+         * @brief Copies the contents of one view into this one
+         * @note The src/dst views **must** be locked prior to calling this
+         */
+        void CopyFrom(BufferView src, const std::function<void()> &gpuCopyCallback);
+
         constexpr operator bool() {
             return delegate != nullptr;
         }