From 8d7dbe2c4e68b038b0be128cbf60e627965ca389 Mon Sep 17 00:00:00 2001 From: Billy Laws Date: Sat, 30 Apr 2022 15:21:55 +0100 Subject: [PATCH] Add a way to get a readonly span of Buffer contents Avoids the need redundantly copy data when it is being directly processed on the CPU (e.g. quad coversion) --- app/src/main/cpp/skyline/gpu/buffer.cpp | 103 +++++++++++++----------- app/src/main/cpp/skyline/gpu/buffer.h | 25 ++++++ 2 files changed, 81 insertions(+), 47 deletions(-) diff --git a/app/src/main/cpp/skyline/gpu/buffer.cpp b/app/src/main/cpp/skyline/gpu/buffer.cpp index 641726b1..0b9256c4 100644 --- a/app/src/main/cpp/skyline/gpu/buffer.cpp +++ b/app/src/main/cpp/skyline/gpu/buffer.cpp @@ -220,32 +220,29 @@ namespace skyline::gpu { cycle = pCycle; } + void Buffer::SynchronizeGuestImmediate(const std::shared_ptr &pCycle, const std::function &flushHostCallback) { + // If this buffer was attached to the current cycle, flush all pending host GPU work and wait to ensure that we read valid data + if (cycle.owner_before(pCycle)) + flushHostCallback(); + + SynchronizeGuest(); + } + void Buffer::Read(const std::shared_ptr &pCycle, const std::function &flushHostCallback, span data, vk::DeviceSize offset) { - if (dirtyState == DirtyState::CpuDirty || dirtyState == DirtyState::Clean) { - std::memcpy(data.data(), mirror.data() + offset, data.size()); - } else if (dirtyState == DirtyState::GpuDirty) { - // If this buffer was attached to the current cycle, flush all pending host GPU work and wait to ensure that we read valid data - if (cycle.owner_before(pCycle)) - flushHostCallback(); + if (dirtyState == DirtyState::GpuDirty) + SynchronizeGuestImmediate(pCycle, flushHostCallback); - SynchronizeGuest(); - - std::memcpy(data.data(), backing.data() + offset, data.size()); - } + std::memcpy(data.data(), mirror.data() + offset, data.size()); } void Buffer::Write(const std::shared_ptr &pCycle, const std::function &flushHostCallback, const std::function &gpuCopyCallback, span data, vk::DeviceSize offset) { InvalidateMegaBuffer(); // Since we're writing to the backing buffer the megabuffer contents will require refresh - if (dirtyState == DirtyState::CpuDirty) { - SynchronizeHostWithCycle(pCycle); // Perform a CPU -> GPU sync to ensure correct ordering of writes - } else if (dirtyState == DirtyState::GpuDirty) { - // If this buffer was attached to the current cycle, flush all pending host GPU work and wait to ensure that writes are correctly ordered - if (cycle.owner_before(pCycle)) - flushHostCallback(); - - SynchronizeGuest(); - } + // Perform a syncs in both directions to ensure correct ordering of writes + if (dirtyState == DirtyState::CpuDirty) + SynchronizeHostWithCycle(pCycle); + else if (dirtyState == DirtyState::GpuDirty) + SynchronizeGuestImmediate(pCycle, flushHostCallback); if (dirtyState != DirtyState::Clean) Logger::Error("Attempting to write to a dirty buffer"); // This should never happen since we do syncs in both directions above @@ -261,6 +258,41 @@ namespace skyline::gpu { } } + BufferView Buffer::GetView(vk::DeviceSize offset, vk::DeviceSize size, vk::Format format) { + for (auto &view : views) + if (view.offset == offset && view.size == size && view.format == format) + return BufferView{shared_from_this(), &view}; + + views.emplace_back(offset, size, format); + return BufferView{shared_from_this(), &views.back()}; + } + + vk::DeviceSize Buffer::AcquireMegaBuffer() { + SynchronizeGuest(false, true); // First try and enable megabuffering by doing an immediate sync + + if (!megaBufferingEnabled) + return 0; // Bail out if megabuffering is disabled for this buffer + + SynchronizeHost(); // Since pushes to the megabuffer use the GPU backing contents ensure they're up-to-date by performing a CPU -> GPU sync + + if (megaBufferOffset) + return megaBufferOffset; // If the current buffer contents haven't been changed since the last acquire, we can just return the existing offset + + megaBufferOffset = gpu.buffer.megaBuffer.Push(backing, true); // Buffers are required to be page aligned in the megabuffer + return megaBufferOffset; + } + + void Buffer::InvalidateMegaBuffer() { + megaBufferOffset = 0; + } + + span Buffer::GetReadOnlyBackingSpan(const std::shared_ptr &pCycle, const std::function &flushHostCallback) { + if (dirtyState == DirtyState::GpuDirty) + SynchronizeGuestImmediate(pCycle, flushHostCallback); + + return mirror; + } + Buffer::BufferViewStorage::BufferViewStorage(vk::DeviceSize offset, vk::DeviceSize size, vk::Format format) : offset(offset), size(size), format(format) {} Buffer::BufferDelegate::BufferDelegate(std::shared_ptr pBuffer, Buffer::BufferViewStorage *view) : buffer(std::move(pBuffer)), view(view) { @@ -307,34 +339,6 @@ namespace skyline::gpu { } } - BufferView Buffer::GetView(vk::DeviceSize offset, vk::DeviceSize size, vk::Format format) { - for (auto &view : views) - if (view.offset == offset && view.size == size && view.format == format) - return BufferView{shared_from_this(), &view}; - - views.emplace_back(offset, size, format); - return BufferView{shared_from_this(), &views.back()}; - } - - vk::DeviceSize Buffer::AcquireMegaBuffer() { - SynchronizeGuest(false, true); // First try and enable megabuffering by doing an immediate sync - - if (!megaBufferingEnabled) - return 0; // Bail out if megabuffering is disabled for this buffer - - SynchronizeHost(); // Since pushes to the megabuffer use the GPU backing contents ensure they're up-to-date by performing a CPU -> GPU sync - - if (megaBufferOffset) - return megaBufferOffset; // If the current buffer contents haven't been changed since the last acquire, we can just return the existing offset - - megaBufferOffset = gpu.buffer.megaBuffer.Push(backing, true); // Buffers are required to be page aligned in the megabuffer - return megaBufferOffset; - } - - void Buffer::InvalidateMegaBuffer() { - megaBufferOffset = 0; - } - BufferView::BufferView(std::shared_ptr buffer, Buffer::BufferViewStorage *view) : bufferDelegate(std::make_shared(std::move(buffer), view)) {} void BufferView::AttachCycle(const std::shared_ptr &cycle) { @@ -375,4 +379,9 @@ namespace skyline::gpu { else return 0; } + + span BufferView::GetReadOnlyBackingSpan(const std::shared_ptr &pCycle, const std::function &flushHostCallback) { + auto backing{bufferDelegate->buffer->GetReadOnlyBackingSpan(pCycle, flushHostCallback)}; + return backing.subspan(bufferDelegate->view->offset, bufferDelegate->view->size); + } } diff --git a/app/src/main/cpp/skyline/gpu/buffer.h b/app/src/main/cpp/skyline/gpu/buffer.h index 090603d0..89c77175 100644 --- a/app/src/main/cpp/skyline/gpu/buffer.h +++ b/app/src/main/cpp/skyline/gpu/buffer.h @@ -198,6 +198,14 @@ namespace skyline::gpu { */ void SynchronizeGuestWithCycle(const std::shared_ptr &cycle); + /** + * @brief Synchronizes the guest buffer with the host buffer immediately, flushing GPU work if necessary + * @note The buffer **must** be locked prior to calling this + * @param pCycle The FenceCycle associated with the current workload, utilised for waiting and flushing semantics + * @param flushHostCallback Callback to flush and execute all pending GPU work to allow for synchronisation of GPU dirty buffers + */ + void SynchronizeGuestImmediate(const std::shared_ptr &pCycle, const std::function &flushHostCallback); + /** * @brief Reads data at the specified offset in the buffer * @param pCycle The FenceCycle associated with the current workload, utilised for waiting and flushing semantics @@ -234,6 +242,15 @@ namespace skyline::gpu { * @note This **must** be called after any modifications of the backing buffer data */ void InvalidateMegaBuffer(); + + /** + * @param pCycle The FenceCycle associated with the current workload, utilised for waiting and flushing semantics + * @param flushHostCallback Callback to flush and execute all pending GPU work to allow for synchronisation of GPU dirty buffers + * @return A span of the backing buffer contents + * @note The returned span **must** not be written to + * @note The buffer **must** be kept locked until the span is no longer in use + */ + span GetReadOnlyBackingSpan(const std::shared_ptr &pCycle, const std::function &flushHostCallback); }; /** @@ -317,5 +334,13 @@ namespace skyline::gpu { * @note See Buffer::AcquireMegaBuffer */ vk::DeviceSize AcquireMegaBuffer() const; + + /** + * @return A span of the backing buffer contents + * @note The returned span **must** not be written to + * @note The view **must** be kept locked until the span is no longer in use + * @note See Buffer::GetReadOnlyBackingSpan + */ + span GetReadOnlyBackingSpan(const std::shared_ptr &pCycle, const std::function &flushHostCallback); }; }