Add more fine-grained buffer recreation locking

This commit is contained in:
Billy Laws 2023-01-09 21:16:23 +00:00
parent 85a23e73ba
commit 2b282ece1a
8 changed files with 52 additions and 33 deletions

View File

@ -697,6 +697,11 @@ namespace skyline::gpu {
return delegate->GetBuffer();
}
BufferBinding BufferView::GetBinding(GPU &gpu) const {
std::scoped_lock lock{gpu.buffer.recreationMutex};
return {delegate->GetBuffer()->GetBacking(), offset + delegate->GetOffset(), size};
}
vk::DeviceSize BufferView::GetOffset() const {
return offset + delegate->GetOffset();
}

View File

@ -492,6 +492,13 @@ namespace skyline::gpu {
*/
vk::DeviceSize GetOffset() const;
/**
* @return A binding describing the underlying buffer state at a given moment in time
* @note The view **must** be locked prior to calling this
* @note This is the **ONLY** function in BufferView that can be called from non-GPFIFO threads
*/
BufferBinding GetBinding(GPU &gpu) const;
/**
* @note The buffer manager **MUST** be locked prior to calling this
*/

View File

@ -69,7 +69,7 @@ namespace skyline::gpu {
static bool BufferLessThan(const std::shared_ptr<Buffer> &it, u8 *pointer);
public:
std::mutex recreationMutex;
SpinLock recreationMutex;
BufferManager(GPU &gpu);

View File

@ -94,7 +94,6 @@ namespace skyline::gpu::interconnect {
vk::RenderPass lRenderPass;
u32 subpassIndex;
std::scoped_lock bufferLock{gpu.buffer.recreationMutex};
using namespace node;
for (NodeVariant &node : slot->nodes) {
#define NODE(name) [&](name& node) { node(slot->commandBuffer, slot->cycle, gpu); }

View File

@ -66,9 +66,10 @@ namespace skyline::gpu::interconnect {
struct SetVertexBuffersDynamicCmdImpl {
void Record(GPU &gpu, vk::raii::CommandBuffer &commandBuffer) {
for (u32 i{base.firstBinding}; i < base.firstBinding + base.bindingCount; i++) {
base.buffers[i] = views[i].GetBuffer()->GetBacking();
base.offsets[i] = views[i].GetOffset();
base.sizes[i] = views[i].size;
auto binding{views[i].GetBinding(gpu)};
base.buffers[i] = binding.buffer;
base.offsets[i] = binding.offset;
base.sizes[i] = binding.size;
}
base.Record(gpu, commandBuffer);
@ -92,8 +93,9 @@ namespace skyline::gpu::interconnect {
struct SetIndexBufferDynamicCmdImpl {
void Record(GPU &gpu, vk::raii::CommandBuffer &commandBuffer) {
base.buffer = view.GetBuffer()->GetBacking();
base.offset = view.GetOffset();
auto binding{view.GetBinding(gpu)};
base.buffer = binding.buffer;
base.offset = binding.offset;
base.Record(gpu, commandBuffer);
}
@ -116,9 +118,10 @@ namespace skyline::gpu::interconnect {
struct SetTransformFeedbackBufferDynamicCmdImpl {
void Record(GPU &gpu, vk::raii::CommandBuffer &commandBuffer) {
base.buffer = view.GetBuffer()->GetBacking();
base.offset = view.GetOffset();
base.size = view.size;
auto binding{view.GetBinding(gpu)};
base.buffer = binding.buffer;
base.offset = binding.offset;
base.size = binding.size;
base.Record(gpu, commandBuffer);
}
@ -206,19 +209,20 @@ namespace skyline::gpu::interconnect {
// Resolve descriptor infos from dynamic bindings
for (size_t i{}; i < updateInfo->bufferDescDynamicBindings.size(); i++) {
auto &dynamicBinding{updateInfo->bufferDescDynamicBindings[i]};
if (auto view{std::get_if<BufferView>(&dynamicBinding)}) {
updateInfo->bufferDescs[i] = vk::DescriptorBufferInfo{
.buffer = view->GetBuffer()->GetBacking(),
.offset = view->GetOffset(),
.range = view->size
};
} else if (auto binding{std::get_if<BufferBinding>(&dynamicBinding)}) {
updateInfo->bufferDescs[i] = vk::DescriptorBufferInfo{
.buffer = binding->buffer,
.offset = binding->offset,
.range = binding->size
};
}
BufferBinding binding{[&dynamicBinding, &gpu]() {
if (auto view{std::get_if<BufferView>(&dynamicBinding)})
return view->GetBinding(gpu);
else if (auto binding{std::get_if<BufferBinding>(&dynamicBinding)})
return *binding;
else
return BufferBinding{};
}()};
updateInfo->bufferDescs[i] = vk::DescriptorBufferInfo{
.buffer = binding.buffer,
.offset = binding.offset,
.range = binding.size
};
}
if constexpr (PushDescriptor) {

View File

@ -28,13 +28,14 @@ namespace skyline::gpu::interconnect {
dstBuf.GetBuffer()->BlockAllCpuBackingWrites();
auto srcGpuAllocation{gpu.megaBufferAllocator.Push(executor.cycle, src)};
executor.AddOutsideRpCommand([srcGpuAllocation, dstBuf, src](vk::raii::CommandBuffer &commandBuffer, const std::shared_ptr<FenceCycle> &, GPU &) {
executor.AddOutsideRpCommand([srcGpuAllocation, dstBuf, src](vk::raii::CommandBuffer &commandBuffer, const std::shared_ptr<FenceCycle> &, GPU &pGpu) {
auto dstBufBinding{dstBuf.GetBinding(pGpu)};
vk::BufferCopy copyRegion{
.size = src.size_bytes(),
.srcOffset = srcGpuAllocation.offset,
.dstOffset = dstBuf.GetOffset()
.dstOffset = dstBufBinding.offset,
};
commandBuffer.copyBuffer(srcGpuAllocation.buffer, dstBuf.GetBuffer()->GetBacking(), copyRegion);
commandBuffer.copyBuffer(srcGpuAllocation.buffer, dstBufBinding.buffer, copyRegion);
commandBuffer.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, vk::PipelineStageFlagBits::eAllCommands, {}, vk::MemoryBarrier{
.srcAccessMask = vk::AccessFlagBits::eTransferWrite,
.dstAccessMask = vk::AccessFlagBits::eMemoryRead | vk::AccessFlagBits::eMemoryWrite

View File

@ -62,13 +62,14 @@ namespace skyline::gpu::interconnect::maxwell3d {
callbackData.view.GetBuffer()->BlockAllCpuBackingWrites();
auto srcGpuAllocation{callbackData.ctx.gpu.megaBufferAllocator.Push(callbackData.ctx.executor.cycle, callbackData.srcCpuBuf)};
callbackData.ctx.executor.AddOutsideRpCommand([=, srcCpuBuf = callbackData.srcCpuBuf, view = callbackData.view, offset = callbackData.offset](vk::raii::CommandBuffer &commandBuffer, const std::shared_ptr<FenceCycle> &, GPU &) {
callbackData.ctx.executor.AddOutsideRpCommand([=, srcCpuBuf = callbackData.srcCpuBuf, view = callbackData.view, offset = callbackData.offset](vk::raii::CommandBuffer &commandBuffer, const std::shared_ptr<FenceCycle> &, GPU &gpu) {
auto binding{view.GetBinding(gpu)};
vk::BufferCopy copyRegion{
.size = srcCpuBuf.size_bytes(),
.srcOffset = srcGpuAllocation.offset,
.dstOffset = view.GetOffset() + offset
.dstOffset = binding.offset + offset
};
commandBuffer.copyBuffer(srcGpuAllocation.buffer, view.GetBuffer()->GetBacking(), copyRegion);
commandBuffer.copyBuffer(srcGpuAllocation.buffer, binding.buffer, copyRegion);
commandBuffer.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, vk::PipelineStageFlagBits::eAllCommands, {}, vk::MemoryBarrier{
.srcAccessMask = vk::AccessFlagBits::eTransferWrite,
.dstAccessMask = vk::AccessFlagBits::eMemoryRead | vk::AccessFlagBits::eMemoryWrite

View File

@ -40,17 +40,19 @@ namespace skyline::gpu::interconnect {
srcBuf.GetBuffer()->BlockAllCpuBackingWrites();
dstBuf.GetBuffer()->BlockAllCpuBackingWrites();
executor.AddOutsideRpCommand([srcBuf, dstBuf](vk::raii::CommandBuffer &commandBuffer, const std::shared_ptr<FenceCycle> &, GPU &) {
executor.AddOutsideRpCommand([srcBuf, dstBuf](vk::raii::CommandBuffer &commandBuffer, const std::shared_ptr<FenceCycle> &, GPU &gpu) {
commandBuffer.pipelineBarrier(vk::PipelineStageFlagBits::eAllCommands, vk::PipelineStageFlagBits::eTransfer, {}, vk::MemoryBarrier{
.srcAccessMask = vk::AccessFlagBits::eMemoryRead,
.dstAccessMask = vk::AccessFlagBits::eTransferRead | vk::AccessFlagBits::eTransferWrite
}, {}, {});
auto srcBufBinding{srcBuf.GetBinding(gpu)};
auto dstBufBinding{dstBuf.GetBinding(gpu)};
vk::BufferCopy copyRegion{
.size = srcBuf.size,
.srcOffset = srcBuf.GetOffset(),
.dstOffset = dstBuf.GetOffset()
.srcOffset = srcBufBinding.offset,
.dstOffset = dstBufBinding.offset
};
commandBuffer.copyBuffer(srcBuf.GetBuffer()->GetBacking(), dstBuf.GetBuffer()->GetBacking(), copyRegion);
commandBuffer.copyBuffer(srcBufBinding.buffer, dstBufBinding.buffer, copyRegion);
commandBuffer.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, vk::PipelineStageFlagBits::eAllCommands, {}, vk::MemoryBarrier{
.srcAccessMask = vk::AccessFlagBits::eTransferWrite,
.dstAccessMask = vk::AccessFlagBits::eMemoryRead | vk::AccessFlagBits::eMemoryWrite,