Improve Vulkan Texture Synchronization

The Vulkan Pipeline Barriers were unoptimal and incorrect to some degree prior as we purely synchronized images and not staging buffers. This has now been fixed and improved in general with more relevant synchronization.
This commit is contained in:
PixelyIon 2021-11-09 21:08:03 +05:30
parent bf71804089
commit 79ceb2cf23
2 changed files with 42 additions and 68 deletions

View File

@ -54,13 +54,13 @@ namespace skyline::gpu {
void Texture::CopyFromStagingBuffer(const vk::raii::CommandBuffer &commandBuffer, const std::shared_ptr<memory::StagingBuffer> &stagingBuffer) { void Texture::CopyFromStagingBuffer(const vk::raii::CommandBuffer &commandBuffer, const std::shared_ptr<memory::StagingBuffer> &stagingBuffer) {
auto image{GetBacking()}; auto image{GetBacking()};
if (layout != vk::ImageLayout::eTransferDstOptimal) { if (layout == vk::ImageLayout::eUndefined)
commandBuffer.pipelineBarrier(layout != vk::ImageLayout::eUndefined ? vk::PipelineStageFlagBits::eTopOfPipe : vk::PipelineStageFlagBits::eBottomOfPipe, vk::PipelineStageFlagBits::eTransfer, {}, {}, {}, vk::ImageMemoryBarrier{ commandBuffer.pipelineBarrier(vk::PipelineStageFlagBits::eTopOfPipe, vk::PipelineStageFlagBits::eTransfer, {}, {}, {}, vk::ImageMemoryBarrier{
.image = image, .image = image,
.srcAccessMask = vk::AccessFlagBits::eMemoryRead | vk::AccessFlagBits::eMemoryWrite, .srcAccessMask = vk::AccessFlagBits::eMemoryRead,
.dstAccessMask = vk::AccessFlagBits::eTransferWrite, .dstAccessMask = vk::AccessFlagBits::eTransferWrite,
.oldLayout = layout, .oldLayout = std::exchange(layout, vk::ImageLayout::eGeneral),
.newLayout = vk::ImageLayout::eTransferDstOptimal, .newLayout = vk::ImageLayout::eGeneral,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.subresourceRange = { .subresourceRange = {
@ -70,71 +70,22 @@ namespace skyline::gpu {
}, },
}); });
if (layout == vk::ImageLayout::eUndefined) commandBuffer.copyBufferToImage(stagingBuffer->vkBuffer, image, layout, vk::BufferImageCopy{
layout = vk::ImageLayout::eTransferDstOptimal;
}
commandBuffer.copyBufferToImage(stagingBuffer->vkBuffer, image, vk::ImageLayout::eTransferDstOptimal, vk::BufferImageCopy{
.imageExtent = dimensions, .imageExtent = dimensions,
.imageSubresource = { .imageSubresource = {
.aspectMask = format->vkAspect, .aspectMask = format->vkAspect,
.layerCount = layerCount, .layerCount = layerCount,
}, },
}); });
if (layout != vk::ImageLayout::eTransferDstOptimal)
commandBuffer.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, vk::PipelineStageFlagBits::eTransfer, {}, {}, {}, vk::ImageMemoryBarrier{
.image = image,
.srcAccessMask = vk::AccessFlagBits::eTransferWrite,
.dstAccessMask = vk::AccessFlagBits::eMemoryRead,
.oldLayout = vk::ImageLayout::eTransferDstOptimal,
.newLayout = layout,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.subresourceRange = {
.aspectMask = format->vkAspect,
.levelCount = mipLevels,
.layerCount = layerCount,
},
});
} }
void Texture::CopyIntoStagingBuffer(const vk::raii::CommandBuffer &commandBuffer, const std::shared_ptr<memory::StagingBuffer> &stagingBuffer) { void Texture::CopyIntoStagingBuffer(const vk::raii::CommandBuffer &commandBuffer, const std::shared_ptr<memory::StagingBuffer> &stagingBuffer) {
auto image{GetBacking()}; auto image{GetBacking()};
if (layout != vk::ImageLayout::eTransferSrcOptimal) { commandBuffer.pipelineBarrier(vk::PipelineStageFlagBits::eTopOfPipe, vk::PipelineStageFlagBits::eTransfer, {}, {}, {}, vk::ImageMemoryBarrier{
commandBuffer.pipelineBarrier(layout != vk::ImageLayout::eUndefined ? vk::PipelineStageFlagBits::eTopOfPipe : vk::PipelineStageFlagBits::eBottomOfPipe, vk::PipelineStageFlagBits::eTransfer, {}, {}, {}, vk::ImageMemoryBarrier{
.image = image, .image = image,
.srcAccessMask = vk::AccessFlagBits::eMemoryRead | vk::AccessFlagBits::eMemoryWrite, .srcAccessMask = vk::AccessFlagBits::eMemoryWrite,
.dstAccessMask = vk::AccessFlagBits::eTransferRead, .dstAccessMask = vk::AccessFlagBits::eTransferRead,
.oldLayout = layout, .oldLayout = layout,
.newLayout = vk::ImageLayout::eTransferSrcOptimal,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.subresourceRange = {
.aspectMask = format->vkAspect,
.levelCount = mipLevels,
.layerCount = layerCount,
},
});
if (layout == vk::ImageLayout::eUndefined)
layout = vk::ImageLayout::eTransferSrcOptimal;
}
commandBuffer.copyImageToBuffer(image, vk::ImageLayout::eTransferSrcOptimal, stagingBuffer->vkBuffer, vk::BufferImageCopy{
.imageExtent = dimensions,
.imageSubresource = {
.aspectMask = format->vkAspect,
.layerCount = layerCount,
},
});
if (layout != vk::ImageLayout::eTransferSrcOptimal)
commandBuffer.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, vk::PipelineStageFlagBits::eTransfer, {}, {}, {}, vk::ImageMemoryBarrier{
.image = image,
.srcAccessMask = vk::AccessFlagBits::eTransferRead,
.dstAccessMask = vk::AccessFlagBits::eMemoryWrite,
.oldLayout = vk::ImageLayout::eTransferSrcOptimal,
.newLayout = layout, .newLayout = layout,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
@ -144,6 +95,24 @@ namespace skyline::gpu {
.layerCount = layerCount, .layerCount = layerCount,
}, },
}); });
commandBuffer.copyImageToBuffer(image, layout, stagingBuffer->vkBuffer, vk::BufferImageCopy{
.imageExtent = dimensions,
.imageSubresource = {
.aspectMask = format->vkAspect,
.layerCount = layerCount,
},
});
commandBuffer.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, vk::PipelineStageFlagBits::eHost, {}, {}, vk::BufferMemoryBarrier{
.srcAccessMask = vk::AccessFlagBits::eTransferWrite,
.dstAccessMask = vk::AccessFlagBits::eHostRead,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.buffer = stagingBuffer->vkBuffer,
.offset = 0,
.size = stagingBuffer->size(),
}, {});
} }
void Texture::CopyToGuest(u8 *hostBuffer) { void Texture::CopyToGuest(u8 *hostBuffer) {
@ -284,13 +253,13 @@ namespace skyline::gpu {
TRACE_EVENT("gpu", "Texture::TransitionLayout"); TRACE_EVENT("gpu", "Texture::TransitionLayout");
if (layout != pLayout) { if (layout != pLayout)
cycle = gpu.scheduler.Submit([&](vk::raii::CommandBuffer &commandBuffer) { cycle = gpu.scheduler.Submit([&](vk::raii::CommandBuffer &commandBuffer) {
commandBuffer.pipelineBarrier(layout != vk::ImageLayout::eUndefined ? vk::PipelineStageFlagBits::eTopOfPipe : vk::PipelineStageFlagBits::eBottomOfPipe, vk::PipelineStageFlagBits::eBottomOfPipe, {}, {}, {}, vk::ImageMemoryBarrier{ commandBuffer.pipelineBarrier(vk::PipelineStageFlagBits::eTopOfPipe, vk::PipelineStageFlagBits::eBottomOfPipe, {}, {}, {}, vk::ImageMemoryBarrier{
.image = GetBacking(), .image = GetBacking(),
.srcAccessMask = vk::AccessFlagBits::eMemoryWrite, .srcAccessMask = vk::AccessFlagBits::eNoneKHR,
.dstAccessMask = vk::AccessFlagBits::eMemoryRead, .dstAccessMask = vk::AccessFlagBits::eNoneKHR,
.oldLayout = layout, .oldLayout = std::exchange(layout, pLayout),
.newLayout = pLayout, .newLayout = pLayout,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
@ -301,8 +270,6 @@ namespace skyline::gpu {
}, },
}); });
}); });
layout = pLayout;
}
} }
void Texture::SynchronizeHost() { void Texture::SynchronizeHost() {
@ -339,6 +306,9 @@ namespace skyline::gpu {
TRACE_EVENT("gpu", "Texture::SynchronizeGuest"); TRACE_EVENT("gpu", "Texture::SynchronizeGuest");
if (layout == vk::ImageLayout::eUndefined)
return; // We don't need to synchronize the image if it is in an undefined state on the host
WaitOnBacking(); WaitOnBacking();
WaitOnFence(); WaitOnFence();
@ -369,6 +339,9 @@ namespace skyline::gpu {
TRACE_EVENT("gpu", "Texture::SynchronizeGuestWithBuffer"); TRACE_EVENT("gpu", "Texture::SynchronizeGuestWithBuffer");
if (layout == vk::ImageLayout::eUndefined)
return;
WaitOnBacking(); WaitOnBacking();
if (cycle.lock() != pCycle) if (cycle.lock() != pCycle)
WaitOnFence(); WaitOnFence();

View File

@ -347,6 +347,7 @@ namespace skyline::gpu {
/** /**
* @brief Records commands for copying data from the texture's backing to a staging buffer into the supplied command buffer * @brief Records commands for copying data from the texture's backing to a staging buffer into the supplied command buffer
* @note Any caller **must** ensure that the layout is not `eUndefined`
*/ */
void CopyIntoStagingBuffer(const vk::raii::CommandBuffer &commandBuffer, const std::shared_ptr<memory::StagingBuffer> &stagingBuffer); void CopyIntoStagingBuffer(const vk::raii::CommandBuffer &commandBuffer, const std::shared_ptr<memory::StagingBuffer> &stagingBuffer);