From 2b282ece1a3711da7dd29aa68b957f7574afc09b Mon Sep 17 00:00:00 2001 From: Billy Laws Date: Mon, 9 Jan 2023 21:16:23 +0000 Subject: [PATCH] Add more fine-grained buffer recreation locking --- app/src/main/cpp/skyline/gpu/buffer.cpp | 5 ++ app/src/main/cpp/skyline/gpu/buffer.h | 7 +++ app/src/main/cpp/skyline/gpu/buffer_manager.h | 2 +- .../gpu/interconnect/command_executor.cpp | 1 - .../gpu/interconnect/common/state_updater.h | 46 ++++++++++--------- .../gpu/interconnect/inline2memory.cpp | 7 +-- .../maxwell_3d/constant_buffers.cpp | 7 +-- .../skyline/gpu/interconnect/maxwell_dma.cpp | 10 ++-- 8 files changed, 52 insertions(+), 33 deletions(-) diff --git a/app/src/main/cpp/skyline/gpu/buffer.cpp b/app/src/main/cpp/skyline/gpu/buffer.cpp index 8159456f..277a84d2 100644 --- a/app/src/main/cpp/skyline/gpu/buffer.cpp +++ b/app/src/main/cpp/skyline/gpu/buffer.cpp @@ -697,6 +697,11 @@ namespace skyline::gpu { return delegate->GetBuffer(); } + BufferBinding BufferView::GetBinding(GPU &gpu) const { + std::scoped_lock lock{gpu.buffer.recreationMutex}; + return {delegate->GetBuffer()->GetBacking(), offset + delegate->GetOffset(), size}; + } + vk::DeviceSize BufferView::GetOffset() const { return offset + delegate->GetOffset(); } diff --git a/app/src/main/cpp/skyline/gpu/buffer.h b/app/src/main/cpp/skyline/gpu/buffer.h index 66d69b23..102508e5 100644 --- a/app/src/main/cpp/skyline/gpu/buffer.h +++ b/app/src/main/cpp/skyline/gpu/buffer.h @@ -492,6 +492,13 @@ namespace skyline::gpu { */ vk::DeviceSize GetOffset() const; + /** + * @return A binding describing the underlying buffer state at a given moment in time + * @note The view **must** be locked prior to calling this + * @note This is the **ONLY** function in BufferView that can be called from non-GPFIFO threads + */ + BufferBinding GetBinding(GPU &gpu) const; + /** * @note The buffer manager **MUST** be locked prior to calling this */ diff --git a/app/src/main/cpp/skyline/gpu/buffer_manager.h b/app/src/main/cpp/skyline/gpu/buffer_manager.h index 05a24314..bcab6bc9 100644 --- a/app/src/main/cpp/skyline/gpu/buffer_manager.h +++ b/app/src/main/cpp/skyline/gpu/buffer_manager.h @@ -69,7 +69,7 @@ namespace skyline::gpu { static bool BufferLessThan(const std::shared_ptr &it, u8 *pointer); public: - std::mutex recreationMutex; + SpinLock recreationMutex; BufferManager(GPU &gpu); diff --git a/app/src/main/cpp/skyline/gpu/interconnect/command_executor.cpp b/app/src/main/cpp/skyline/gpu/interconnect/command_executor.cpp index aef49b66..46c42bf9 100644 --- a/app/src/main/cpp/skyline/gpu/interconnect/command_executor.cpp +++ b/app/src/main/cpp/skyline/gpu/interconnect/command_executor.cpp @@ -94,7 +94,6 @@ namespace skyline::gpu::interconnect { vk::RenderPass lRenderPass; u32 subpassIndex; - std::scoped_lock bufferLock{gpu.buffer.recreationMutex}; using namespace node; for (NodeVariant &node : slot->nodes) { #define NODE(name) [&](name& node) { node(slot->commandBuffer, slot->cycle, gpu); } diff --git a/app/src/main/cpp/skyline/gpu/interconnect/common/state_updater.h b/app/src/main/cpp/skyline/gpu/interconnect/common/state_updater.h index 3e17ef11..025fd9fc 100644 --- a/app/src/main/cpp/skyline/gpu/interconnect/common/state_updater.h +++ b/app/src/main/cpp/skyline/gpu/interconnect/common/state_updater.h @@ -66,9 +66,10 @@ namespace skyline::gpu::interconnect { struct SetVertexBuffersDynamicCmdImpl { void Record(GPU &gpu, vk::raii::CommandBuffer &commandBuffer) { for (u32 i{base.firstBinding}; i < base.firstBinding + base.bindingCount; i++) { - base.buffers[i] = views[i].GetBuffer()->GetBacking(); - base.offsets[i] = views[i].GetOffset(); - base.sizes[i] = views[i].size; + auto binding{views[i].GetBinding(gpu)}; + base.buffers[i] = binding.buffer; + base.offsets[i] = binding.offset; + base.sizes[i] = binding.size; } base.Record(gpu, commandBuffer); @@ -92,8 +93,9 @@ namespace skyline::gpu::interconnect { struct SetIndexBufferDynamicCmdImpl { void Record(GPU &gpu, vk::raii::CommandBuffer &commandBuffer) { - base.buffer = view.GetBuffer()->GetBacking(); - base.offset = view.GetOffset(); + auto binding{view.GetBinding(gpu)}; + base.buffer = binding.buffer; + base.offset = binding.offset; base.Record(gpu, commandBuffer); } @@ -116,9 +118,10 @@ namespace skyline::gpu::interconnect { struct SetTransformFeedbackBufferDynamicCmdImpl { void Record(GPU &gpu, vk::raii::CommandBuffer &commandBuffer) { - base.buffer = view.GetBuffer()->GetBacking(); - base.offset = view.GetOffset(); - base.size = view.size; + auto binding{view.GetBinding(gpu)}; + base.buffer = binding.buffer; + base.offset = binding.offset; + base.size = binding.size; base.Record(gpu, commandBuffer); } @@ -206,19 +209,20 @@ namespace skyline::gpu::interconnect { // Resolve descriptor infos from dynamic bindings for (size_t i{}; i < updateInfo->bufferDescDynamicBindings.size(); i++) { auto &dynamicBinding{updateInfo->bufferDescDynamicBindings[i]}; - if (auto view{std::get_if(&dynamicBinding)}) { - updateInfo->bufferDescs[i] = vk::DescriptorBufferInfo{ - .buffer = view->GetBuffer()->GetBacking(), - .offset = view->GetOffset(), - .range = view->size - }; - } else if (auto binding{std::get_if(&dynamicBinding)}) { - updateInfo->bufferDescs[i] = vk::DescriptorBufferInfo{ - .buffer = binding->buffer, - .offset = binding->offset, - .range = binding->size - }; - } + BufferBinding binding{[&dynamicBinding, &gpu]() { + if (auto view{std::get_if(&dynamicBinding)}) + return view->GetBinding(gpu); + else if (auto binding{std::get_if(&dynamicBinding)}) + return *binding; + else + return BufferBinding{}; + }()}; + + updateInfo->bufferDescs[i] = vk::DescriptorBufferInfo{ + .buffer = binding.buffer, + .offset = binding.offset, + .range = binding.size + }; } if constexpr (PushDescriptor) { diff --git a/app/src/main/cpp/skyline/gpu/interconnect/inline2memory.cpp b/app/src/main/cpp/skyline/gpu/interconnect/inline2memory.cpp index d7f1c14b..e0fd4976 100644 --- a/app/src/main/cpp/skyline/gpu/interconnect/inline2memory.cpp +++ b/app/src/main/cpp/skyline/gpu/interconnect/inline2memory.cpp @@ -28,13 +28,14 @@ namespace skyline::gpu::interconnect { dstBuf.GetBuffer()->BlockAllCpuBackingWrites(); auto srcGpuAllocation{gpu.megaBufferAllocator.Push(executor.cycle, src)}; - executor.AddOutsideRpCommand([srcGpuAllocation, dstBuf, src](vk::raii::CommandBuffer &commandBuffer, const std::shared_ptr &, GPU &) { + executor.AddOutsideRpCommand([srcGpuAllocation, dstBuf, src](vk::raii::CommandBuffer &commandBuffer, const std::shared_ptr &, GPU &pGpu) { + auto dstBufBinding{dstBuf.GetBinding(pGpu)}; vk::BufferCopy copyRegion{ .size = src.size_bytes(), .srcOffset = srcGpuAllocation.offset, - .dstOffset = dstBuf.GetOffset() + .dstOffset = dstBufBinding.offset, }; - commandBuffer.copyBuffer(srcGpuAllocation.buffer, dstBuf.GetBuffer()->GetBacking(), copyRegion); + commandBuffer.copyBuffer(srcGpuAllocation.buffer, dstBufBinding.buffer, copyRegion); commandBuffer.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, vk::PipelineStageFlagBits::eAllCommands, {}, vk::MemoryBarrier{ .srcAccessMask = vk::AccessFlagBits::eTransferWrite, .dstAccessMask = vk::AccessFlagBits::eMemoryRead | vk::AccessFlagBits::eMemoryWrite diff --git a/app/src/main/cpp/skyline/gpu/interconnect/maxwell_3d/constant_buffers.cpp b/app/src/main/cpp/skyline/gpu/interconnect/maxwell_3d/constant_buffers.cpp index 4a08aa18..c69d5d0d 100644 --- a/app/src/main/cpp/skyline/gpu/interconnect/maxwell_3d/constant_buffers.cpp +++ b/app/src/main/cpp/skyline/gpu/interconnect/maxwell_3d/constant_buffers.cpp @@ -62,13 +62,14 @@ namespace skyline::gpu::interconnect::maxwell3d { callbackData.view.GetBuffer()->BlockAllCpuBackingWrites(); auto srcGpuAllocation{callbackData.ctx.gpu.megaBufferAllocator.Push(callbackData.ctx.executor.cycle, callbackData.srcCpuBuf)}; - callbackData.ctx.executor.AddOutsideRpCommand([=, srcCpuBuf = callbackData.srcCpuBuf, view = callbackData.view, offset = callbackData.offset](vk::raii::CommandBuffer &commandBuffer, const std::shared_ptr &, GPU &) { + callbackData.ctx.executor.AddOutsideRpCommand([=, srcCpuBuf = callbackData.srcCpuBuf, view = callbackData.view, offset = callbackData.offset](vk::raii::CommandBuffer &commandBuffer, const std::shared_ptr &, GPU &gpu) { + auto binding{view.GetBinding(gpu)}; vk::BufferCopy copyRegion{ .size = srcCpuBuf.size_bytes(), .srcOffset = srcGpuAllocation.offset, - .dstOffset = view.GetOffset() + offset + .dstOffset = binding.offset + offset }; - commandBuffer.copyBuffer(srcGpuAllocation.buffer, view.GetBuffer()->GetBacking(), copyRegion); + commandBuffer.copyBuffer(srcGpuAllocation.buffer, binding.buffer, copyRegion); commandBuffer.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, vk::PipelineStageFlagBits::eAllCommands, {}, vk::MemoryBarrier{ .srcAccessMask = vk::AccessFlagBits::eTransferWrite, .dstAccessMask = vk::AccessFlagBits::eMemoryRead | vk::AccessFlagBits::eMemoryWrite diff --git a/app/src/main/cpp/skyline/gpu/interconnect/maxwell_dma.cpp b/app/src/main/cpp/skyline/gpu/interconnect/maxwell_dma.cpp index 7887f7da..f5ac9c9f 100644 --- a/app/src/main/cpp/skyline/gpu/interconnect/maxwell_dma.cpp +++ b/app/src/main/cpp/skyline/gpu/interconnect/maxwell_dma.cpp @@ -40,17 +40,19 @@ namespace skyline::gpu::interconnect { srcBuf.GetBuffer()->BlockAllCpuBackingWrites(); dstBuf.GetBuffer()->BlockAllCpuBackingWrites(); - executor.AddOutsideRpCommand([srcBuf, dstBuf](vk::raii::CommandBuffer &commandBuffer, const std::shared_ptr &, GPU &) { + executor.AddOutsideRpCommand([srcBuf, dstBuf](vk::raii::CommandBuffer &commandBuffer, const std::shared_ptr &, GPU &gpu) { commandBuffer.pipelineBarrier(vk::PipelineStageFlagBits::eAllCommands, vk::PipelineStageFlagBits::eTransfer, {}, vk::MemoryBarrier{ .srcAccessMask = vk::AccessFlagBits::eMemoryRead, .dstAccessMask = vk::AccessFlagBits::eTransferRead | vk::AccessFlagBits::eTransferWrite }, {}, {}); + auto srcBufBinding{srcBuf.GetBinding(gpu)}; + auto dstBufBinding{dstBuf.GetBinding(gpu)}; vk::BufferCopy copyRegion{ .size = srcBuf.size, - .srcOffset = srcBuf.GetOffset(), - .dstOffset = dstBuf.GetOffset() + .srcOffset = srcBufBinding.offset, + .dstOffset = dstBufBinding.offset }; - commandBuffer.copyBuffer(srcBuf.GetBuffer()->GetBacking(), dstBuf.GetBuffer()->GetBacking(), copyRegion); + commandBuffer.copyBuffer(srcBufBinding.buffer, dstBufBinding.buffer, copyRegion); commandBuffer.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, vk::PipelineStageFlagBits::eAllCommands, {}, vk::MemoryBarrier{ .srcAccessMask = vk::AccessFlagBits::eTransferWrite, .dstAccessMask = vk::AccessFlagBits::eMemoryRead | vk::AccessFlagBits::eMemoryWrite,