Add more fine-grained buffer recreation locking

This commit is contained in:
Billy Laws 2023-01-09 21:16:23 +00:00
parent 85a23e73ba
commit 2b282ece1a
8 changed files with 52 additions and 33 deletions

View File

@ -697,6 +697,11 @@ namespace skyline::gpu {
return delegate->GetBuffer(); return delegate->GetBuffer();
} }
BufferBinding BufferView::GetBinding(GPU &gpu) const {
std::scoped_lock lock{gpu.buffer.recreationMutex};
return {delegate->GetBuffer()->GetBacking(), offset + delegate->GetOffset(), size};
}
vk::DeviceSize BufferView::GetOffset() const { vk::DeviceSize BufferView::GetOffset() const {
return offset + delegate->GetOffset(); return offset + delegate->GetOffset();
} }

View File

@ -492,6 +492,13 @@ namespace skyline::gpu {
*/ */
vk::DeviceSize GetOffset() const; vk::DeviceSize GetOffset() const;
/**
* @return A binding describing the underlying buffer state at a given moment in time
* @note The view **must** be locked prior to calling this
* @note This is the **ONLY** function in BufferView that can be called from non-GPFIFO threads
*/
BufferBinding GetBinding(GPU &gpu) const;
/** /**
* @note The buffer manager **MUST** be locked prior to calling this * @note The buffer manager **MUST** be locked prior to calling this
*/ */

View File

@ -69,7 +69,7 @@ namespace skyline::gpu {
static bool BufferLessThan(const std::shared_ptr<Buffer> &it, u8 *pointer); static bool BufferLessThan(const std::shared_ptr<Buffer> &it, u8 *pointer);
public: public:
std::mutex recreationMutex; SpinLock recreationMutex;
BufferManager(GPU &gpu); BufferManager(GPU &gpu);

View File

@ -94,7 +94,6 @@ namespace skyline::gpu::interconnect {
vk::RenderPass lRenderPass; vk::RenderPass lRenderPass;
u32 subpassIndex; u32 subpassIndex;
std::scoped_lock bufferLock{gpu.buffer.recreationMutex};
using namespace node; using namespace node;
for (NodeVariant &node : slot->nodes) { for (NodeVariant &node : slot->nodes) {
#define NODE(name) [&](name& node) { node(slot->commandBuffer, slot->cycle, gpu); } #define NODE(name) [&](name& node) { node(slot->commandBuffer, slot->cycle, gpu); }

View File

@ -66,9 +66,10 @@ namespace skyline::gpu::interconnect {
struct SetVertexBuffersDynamicCmdImpl { struct SetVertexBuffersDynamicCmdImpl {
void Record(GPU &gpu, vk::raii::CommandBuffer &commandBuffer) { void Record(GPU &gpu, vk::raii::CommandBuffer &commandBuffer) {
for (u32 i{base.firstBinding}; i < base.firstBinding + base.bindingCount; i++) { for (u32 i{base.firstBinding}; i < base.firstBinding + base.bindingCount; i++) {
base.buffers[i] = views[i].GetBuffer()->GetBacking(); auto binding{views[i].GetBinding(gpu)};
base.offsets[i] = views[i].GetOffset(); base.buffers[i] = binding.buffer;
base.sizes[i] = views[i].size; base.offsets[i] = binding.offset;
base.sizes[i] = binding.size;
} }
base.Record(gpu, commandBuffer); base.Record(gpu, commandBuffer);
@ -92,8 +93,9 @@ namespace skyline::gpu::interconnect {
struct SetIndexBufferDynamicCmdImpl { struct SetIndexBufferDynamicCmdImpl {
void Record(GPU &gpu, vk::raii::CommandBuffer &commandBuffer) { void Record(GPU &gpu, vk::raii::CommandBuffer &commandBuffer) {
base.buffer = view.GetBuffer()->GetBacking(); auto binding{view.GetBinding(gpu)};
base.offset = view.GetOffset(); base.buffer = binding.buffer;
base.offset = binding.offset;
base.Record(gpu, commandBuffer); base.Record(gpu, commandBuffer);
} }
@ -116,9 +118,10 @@ namespace skyline::gpu::interconnect {
struct SetTransformFeedbackBufferDynamicCmdImpl { struct SetTransformFeedbackBufferDynamicCmdImpl {
void Record(GPU &gpu, vk::raii::CommandBuffer &commandBuffer) { void Record(GPU &gpu, vk::raii::CommandBuffer &commandBuffer) {
base.buffer = view.GetBuffer()->GetBacking(); auto binding{view.GetBinding(gpu)};
base.offset = view.GetOffset(); base.buffer = binding.buffer;
base.size = view.size; base.offset = binding.offset;
base.size = binding.size;
base.Record(gpu, commandBuffer); base.Record(gpu, commandBuffer);
} }
@ -206,19 +209,20 @@ namespace skyline::gpu::interconnect {
// Resolve descriptor infos from dynamic bindings // Resolve descriptor infos from dynamic bindings
for (size_t i{}; i < updateInfo->bufferDescDynamicBindings.size(); i++) { for (size_t i{}; i < updateInfo->bufferDescDynamicBindings.size(); i++) {
auto &dynamicBinding{updateInfo->bufferDescDynamicBindings[i]}; auto &dynamicBinding{updateInfo->bufferDescDynamicBindings[i]};
if (auto view{std::get_if<BufferView>(&dynamicBinding)}) { BufferBinding binding{[&dynamicBinding, &gpu]() {
updateInfo->bufferDescs[i] = vk::DescriptorBufferInfo{ if (auto view{std::get_if<BufferView>(&dynamicBinding)})
.buffer = view->GetBuffer()->GetBacking(), return view->GetBinding(gpu);
.offset = view->GetOffset(), else if (auto binding{std::get_if<BufferBinding>(&dynamicBinding)})
.range = view->size return *binding;
}; else
} else if (auto binding{std::get_if<BufferBinding>(&dynamicBinding)}) { return BufferBinding{};
updateInfo->bufferDescs[i] = vk::DescriptorBufferInfo{ }()};
.buffer = binding->buffer,
.offset = binding->offset, updateInfo->bufferDescs[i] = vk::DescriptorBufferInfo{
.range = binding->size .buffer = binding.buffer,
}; .offset = binding.offset,
} .range = binding.size
};
} }
if constexpr (PushDescriptor) { if constexpr (PushDescriptor) {

View File

@ -28,13 +28,14 @@ namespace skyline::gpu::interconnect {
dstBuf.GetBuffer()->BlockAllCpuBackingWrites(); dstBuf.GetBuffer()->BlockAllCpuBackingWrites();
auto srcGpuAllocation{gpu.megaBufferAllocator.Push(executor.cycle, src)}; auto srcGpuAllocation{gpu.megaBufferAllocator.Push(executor.cycle, src)};
executor.AddOutsideRpCommand([srcGpuAllocation, dstBuf, src](vk::raii::CommandBuffer &commandBuffer, const std::shared_ptr<FenceCycle> &, GPU &) { executor.AddOutsideRpCommand([srcGpuAllocation, dstBuf, src](vk::raii::CommandBuffer &commandBuffer, const std::shared_ptr<FenceCycle> &, GPU &pGpu) {
auto dstBufBinding{dstBuf.GetBinding(pGpu)};
vk::BufferCopy copyRegion{ vk::BufferCopy copyRegion{
.size = src.size_bytes(), .size = src.size_bytes(),
.srcOffset = srcGpuAllocation.offset, .srcOffset = srcGpuAllocation.offset,
.dstOffset = dstBuf.GetOffset() .dstOffset = dstBufBinding.offset,
}; };
commandBuffer.copyBuffer(srcGpuAllocation.buffer, dstBuf.GetBuffer()->GetBacking(), copyRegion); commandBuffer.copyBuffer(srcGpuAllocation.buffer, dstBufBinding.buffer, copyRegion);
commandBuffer.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, vk::PipelineStageFlagBits::eAllCommands, {}, vk::MemoryBarrier{ commandBuffer.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, vk::PipelineStageFlagBits::eAllCommands, {}, vk::MemoryBarrier{
.srcAccessMask = vk::AccessFlagBits::eTransferWrite, .srcAccessMask = vk::AccessFlagBits::eTransferWrite,
.dstAccessMask = vk::AccessFlagBits::eMemoryRead | vk::AccessFlagBits::eMemoryWrite .dstAccessMask = vk::AccessFlagBits::eMemoryRead | vk::AccessFlagBits::eMemoryWrite

View File

@ -62,13 +62,14 @@ namespace skyline::gpu::interconnect::maxwell3d {
callbackData.view.GetBuffer()->BlockAllCpuBackingWrites(); callbackData.view.GetBuffer()->BlockAllCpuBackingWrites();
auto srcGpuAllocation{callbackData.ctx.gpu.megaBufferAllocator.Push(callbackData.ctx.executor.cycle, callbackData.srcCpuBuf)}; auto srcGpuAllocation{callbackData.ctx.gpu.megaBufferAllocator.Push(callbackData.ctx.executor.cycle, callbackData.srcCpuBuf)};
callbackData.ctx.executor.AddOutsideRpCommand([=, srcCpuBuf = callbackData.srcCpuBuf, view = callbackData.view, offset = callbackData.offset](vk::raii::CommandBuffer &commandBuffer, const std::shared_ptr<FenceCycle> &, GPU &) { callbackData.ctx.executor.AddOutsideRpCommand([=, srcCpuBuf = callbackData.srcCpuBuf, view = callbackData.view, offset = callbackData.offset](vk::raii::CommandBuffer &commandBuffer, const std::shared_ptr<FenceCycle> &, GPU &gpu) {
auto binding{view.GetBinding(gpu)};
vk::BufferCopy copyRegion{ vk::BufferCopy copyRegion{
.size = srcCpuBuf.size_bytes(), .size = srcCpuBuf.size_bytes(),
.srcOffset = srcGpuAllocation.offset, .srcOffset = srcGpuAllocation.offset,
.dstOffset = view.GetOffset() + offset .dstOffset = binding.offset + offset
}; };
commandBuffer.copyBuffer(srcGpuAllocation.buffer, view.GetBuffer()->GetBacking(), copyRegion); commandBuffer.copyBuffer(srcGpuAllocation.buffer, binding.buffer, copyRegion);
commandBuffer.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, vk::PipelineStageFlagBits::eAllCommands, {}, vk::MemoryBarrier{ commandBuffer.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, vk::PipelineStageFlagBits::eAllCommands, {}, vk::MemoryBarrier{
.srcAccessMask = vk::AccessFlagBits::eTransferWrite, .srcAccessMask = vk::AccessFlagBits::eTransferWrite,
.dstAccessMask = vk::AccessFlagBits::eMemoryRead | vk::AccessFlagBits::eMemoryWrite .dstAccessMask = vk::AccessFlagBits::eMemoryRead | vk::AccessFlagBits::eMemoryWrite

View File

@ -40,17 +40,19 @@ namespace skyline::gpu::interconnect {
srcBuf.GetBuffer()->BlockAllCpuBackingWrites(); srcBuf.GetBuffer()->BlockAllCpuBackingWrites();
dstBuf.GetBuffer()->BlockAllCpuBackingWrites(); dstBuf.GetBuffer()->BlockAllCpuBackingWrites();
executor.AddOutsideRpCommand([srcBuf, dstBuf](vk::raii::CommandBuffer &commandBuffer, const std::shared_ptr<FenceCycle> &, GPU &) { executor.AddOutsideRpCommand([srcBuf, dstBuf](vk::raii::CommandBuffer &commandBuffer, const std::shared_ptr<FenceCycle> &, GPU &gpu) {
commandBuffer.pipelineBarrier(vk::PipelineStageFlagBits::eAllCommands, vk::PipelineStageFlagBits::eTransfer, {}, vk::MemoryBarrier{ commandBuffer.pipelineBarrier(vk::PipelineStageFlagBits::eAllCommands, vk::PipelineStageFlagBits::eTransfer, {}, vk::MemoryBarrier{
.srcAccessMask = vk::AccessFlagBits::eMemoryRead, .srcAccessMask = vk::AccessFlagBits::eMemoryRead,
.dstAccessMask = vk::AccessFlagBits::eTransferRead | vk::AccessFlagBits::eTransferWrite .dstAccessMask = vk::AccessFlagBits::eTransferRead | vk::AccessFlagBits::eTransferWrite
}, {}, {}); }, {}, {});
auto srcBufBinding{srcBuf.GetBinding(gpu)};
auto dstBufBinding{dstBuf.GetBinding(gpu)};
vk::BufferCopy copyRegion{ vk::BufferCopy copyRegion{
.size = srcBuf.size, .size = srcBuf.size,
.srcOffset = srcBuf.GetOffset(), .srcOffset = srcBufBinding.offset,
.dstOffset = dstBuf.GetOffset() .dstOffset = dstBufBinding.offset
}; };
commandBuffer.copyBuffer(srcBuf.GetBuffer()->GetBacking(), dstBuf.GetBuffer()->GetBacking(), copyRegion); commandBuffer.copyBuffer(srcBufBinding.buffer, dstBufBinding.buffer, copyRegion);
commandBuffer.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, vk::PipelineStageFlagBits::eAllCommands, {}, vk::MemoryBarrier{ commandBuffer.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, vk::PipelineStageFlagBits::eAllCommands, {}, vk::MemoryBarrier{
.srcAccessMask = vk::AccessFlagBits::eTransferWrite, .srcAccessMask = vk::AccessFlagBits::eTransferWrite,
.dstAccessMask = vk::AccessFlagBits::eMemoryRead | vk::AccessFlagBits::eMemoryWrite, .dstAccessMask = vk::AccessFlagBits::eMemoryRead | vk::AccessFlagBits::eMemoryWrite,