Use semaphores for presentation engine frame synchronisation

Avoids waits on the CPU which can be costly and confuse the scheduler, also reduces latency significantly.
This commit is contained in:
Billy Laws 2022-10-16 20:47:17 +01:00
parent 0670e0e0dc
commit 1a0819fb76
4 changed files with 106 additions and 87 deletions

View File

@ -24,7 +24,8 @@ namespace skyline::gpu {
PresentationEngine::PresentationEngine(const DeviceState &state, GPU &gpu) PresentationEngine::PresentationEngine(const DeviceState &state, GPU &gpu)
: state{state}, : state{state},
gpu{gpu}, gpu{gpu},
acquireFence{gpu.vkDevice, vk::FenceCreateInfo{}}, presentSemaphores{util::MakeFilledArray<vk::raii::Semaphore, MaxSwapchainImageCount>(gpu.vkDevice, vk::SemaphoreCreateInfo{})},
acquireSemaphores{util::MakeFilledArray<vk::raii::Semaphore, MaxSwapchainImageCount>(gpu.vkDevice, vk::SemaphoreCreateInfo{})},
presentationTrack{static_cast<u64>(trace::TrackIds::Presentation), perfetto::ProcessTrack::Current()}, presentationTrack{static_cast<u64>(trace::TrackIds::Presentation), perfetto::ProcessTrack::Current()},
vsyncEvent{std::make_shared<kernel::type::KEvent>(state, true)}, vsyncEvent{std::make_shared<kernel::type::KEvent>(state, true)},
choreographerThread{&PresentationEngine::ChoreographerThread, this}, choreographerThread{&PresentationEngine::ChoreographerThread, this},
@ -116,35 +117,31 @@ namespace skyline::gpu {
windowScalingMode = frame.scalingMode; windowScalingMode = frame.scalingMode;
} }
if (frame.transform != windowTransform) {
if ((result = window->perform(window, NATIVE_WINDOW_SET_BUFFERS_TRANSFORM, static_cast<i32>(frame.transform)))) if ((result = window->perform(window, NATIVE_WINDOW_SET_BUFFERS_TRANSFORM, static_cast<i32>(frame.transform))))
throw exception("Setting the buffer transform to '{}' failed with {}", ToString(frame.transform), result); throw exception("Setting the buffer transform to '{}' failed with {}", ToString(frame.transform), result);
windowTransform = frame.transform; windowTransform = frame.transform;
}
gpu.vkDevice.resetFences(*acquireFence); auto &acquireSemaphore{acquireSemaphores[acquireSemaphoreIndex]};
acquireSemaphoreIndex = (acquireSemaphoreIndex + 1) % swapchainImageCount;
std::pair<vk::Result, u32> nextImage; std::pair<vk::Result, u32> nextImage;
while (nextImage = vkSwapchain->acquireNextImage(std::numeric_limits<u64>::max(), {}, *acquireFence), nextImage.first != vk::Result::eSuccess) [[unlikely]] { while (nextImage = vkSwapchain->acquireNextImage(std::numeric_limits<u64>::max(), *acquireSemaphore, {}), nextImage.first != vk::Result::eSuccess) [[unlikely]] {
if (nextImage.first == vk::Result::eSuboptimalKHR) if (nextImage.first == vk::Result::eSuboptimalKHR)
surfaceCondition.wait(lock, [this]() { return vkSurface.has_value(); }); surfaceCondition.wait(lock, [this]() { return vkSurface.has_value(); });
else else
throw exception("vkAcquireNextImageKHR returned an unhandled result '{}'", vk::to_string(nextImage.first)); throw exception("vkAcquireNextImageKHR returned an unhandled result '{}'", vk::to_string(nextImage.first));
} }
auto &nextImageTexture{images.at(nextImage.second)};
std::ignore = gpu.vkDevice.waitForFences(*acquireFence, true, std::numeric_limits<u64>::max()); auto &nextImageTexture{images.at(nextImage.second)};
auto &presentSemaphore{presentSemaphores[nextImage.second]};
texture->SynchronizeHost(); texture->SynchronizeHost();
nextImageTexture->CopyFrom(texture, vk::ImageSubresourceRange{ nextImageTexture->CopyFrom(texture, *acquireSemaphore, *presentSemaphore, vk::ImageSubresourceRange{
.aspectMask = vk::ImageAspectFlagBits::eColor, .aspectMask = vk::ImageAspectFlagBits::eColor,
.levelCount = 1, .levelCount = 1,
.layerCount = 1, .layerCount = 1,
}); });
// Wait on the copy to the swapchain image to complete before submitting for presentation
nextImageTexture->WaitOnFence();
auto getMonotonicNsNow{[]() -> i64 { auto getMonotonicNsNow{[]() -> i64 {
timespec time; timespec time;
if (clock_gettime(CLOCK_MONOTONIC, &time)) if (clock_gettime(CLOCK_MONOTONIC, &time))
@ -194,6 +191,8 @@ namespace skyline::gpu {
.swapchainCount = 1, .swapchainCount = 1,
.pSwapchains = &**vkSwapchain, .pSwapchains = &**vkSwapchain,
.pImageIndices = &nextImage.second, .pImageIndices = &nextImage.second,
.waitSemaphoreCount = 1,
.pWaitSemaphores = &*presentSemaphore,
}); // We don't care about suboptimal images as they are caused by not respecting the transform hint, we handle transformations externally }); // We don't care about suboptimal images as they are caused by not respecting the transform hint, we handle transformations externally
} }
@ -328,6 +327,7 @@ namespace skyline::gpu {
swapchainFormat = format; swapchainFormat = format;
swapchainExtent = extent; swapchainExtent = extent;
swapchainImageCount = vkImages.size();
} }
void PresentationEngine::UpdateSurface(jobject newSurface) { void PresentationEngine::UpdateSurface(jobject newSurface) {

View File

@ -35,12 +35,15 @@ namespace skyline::gpu {
vk::SurfaceCapabilitiesKHR vkSurfaceCapabilities{}; //!< The capabilities of the current Vulkan Surface vk::SurfaceCapabilitiesKHR vkSurfaceCapabilities{}; //!< The capabilities of the current Vulkan Surface
std::optional<vk::raii::SwapchainKHR> vkSwapchain; //!< The Vulkan swapchain and the properties associated with it std::optional<vk::raii::SwapchainKHR> vkSwapchain; //!< The Vulkan swapchain and the properties associated with it
vk::raii::Fence acquireFence; //!< A fence for acquiring an image from the swapchain
texture::Format swapchainFormat{}; //!< The image format of the textures in the current swapchain texture::Format swapchainFormat{}; //!< The image format of the textures in the current swapchain
texture::Dimensions swapchainExtent{}; //!< The extent of images in the current swapchain texture::Dimensions swapchainExtent{}; //!< The extent of images in the current swapchain
static constexpr size_t MaxSwapchainImageCount{6}; //!< The maximum amount of swapchain textures, this affects the amount of images that can be in the swapchain static constexpr size_t MaxSwapchainImageCount{6}; //!< The maximum amount of swapchain textures, this affects the amount of images that can be in the swapchain
std::array<std::shared_ptr<Texture>, MaxSwapchainImageCount> images; //!< All the swapchain textures in the same order as supplied by the host swapchain std::array<std::shared_ptr<Texture>, MaxSwapchainImageCount> images; //!< All the swapchain textures in the same order as supplied by the host swapchain
std::array<vk::raii::Semaphore, MaxSwapchainImageCount> presentSemaphores; //!< Array of semaphores used to signal that swapchain images are ready to be completed, indexed by Vulkan swapchain index
std::array<vk::raii::Semaphore, MaxSwapchainImageCount> acquireSemaphores; //!< Array of semaphores used to wait on the GPU for swapchain images to be acquired, indexed by `acquireSemaphoreIndex`
size_t acquireSemaphoreIndex{}; //!< The index of the next semaphore to be used for acquiring swapchain images
size_t swapchainImageCount{}; //!< The number of images in the current swapchain
i64 frameTimestamp{}; //!< The timestamp of the last frame being shown in nanoseconds i64 frameTimestamp{}; //!< The timestamp of the last frame being shown in nanoseconds
i64 averageFrametimeNs{}; //!< The average time between frames in nanoseconds i64 averageFrametimeNs{}; //!< The average time between frames in nanoseconds

View File

@ -839,14 +839,16 @@ namespace skyline::gpu {
return std::make_shared<TextureView>(shared_from_this(), type, range, pFormat, mapping); return std::make_shared<TextureView>(shared_from_this(), type, range, pFormat, mapping);
} }
void Texture::CopyFrom(std::shared_ptr<Texture> source, const vk::ImageSubresourceRange &subresource) { void Texture::CopyFrom(std::shared_ptr<Texture> source, vk::Semaphore waitSemaphore, vk::Semaphore signalSemaphore, const vk::ImageSubresourceRange &subresource) {
WaitOnBacking();
source->WaitOnBacking();
if (cycle) if (cycle)
cycle->WaitSubmit(); cycle->WaitSubmit();
if (source->cycle) if (source->cycle)
source->cycle->WaitSubmit(); source->cycle->WaitSubmit();
WaitOnBacking();
source->WaitOnBacking();
WaitOnFence();
if (source->layout == vk::ImageLayout::eUndefined) if (source->layout == vk::ImageLayout::eUndefined)
throw exception("Cannot copy from image with undefined layout"); throw exception("Cannot copy from image with undefined layout");
else if (source->dimensions != dimensions) else if (source->dimensions != dimensions)
@ -854,7 +856,15 @@ namespace skyline::gpu {
TRACE_EVENT("gpu", "Texture::CopyFrom"); TRACE_EVENT("gpu", "Texture::CopyFrom");
auto lCycle{gpu.scheduler.Submit([&](vk::raii::CommandBuffer &commandBuffer) { auto submitFunc{[&](vk::Semaphore extraWaitSemaphore){
boost::container::small_vector<vk::Semaphore, 2> waitSemaphores;
if (waitSemaphore)
waitSemaphores.push_back(waitSemaphore);
if (extraWaitSemaphore)
waitSemaphores.push_back(extraWaitSemaphore);
return gpu.scheduler.Submit([&](vk::raii::CommandBuffer &commandBuffer) {
auto sourceBacking{source->GetBacking()}; auto sourceBacking{source->GetBacking()};
if (source->layout != vk::ImageLayout::eTransferSrcOptimal) { if (source->layout != vk::ImageLayout::eTransferSrcOptimal) {
commandBuffer.pipelineBarrier(vk::PipelineStageFlagBits::eTopOfPipe, vk::PipelineStageFlagBits::eTransfer, {}, {}, {}, vk::ImageMemoryBarrier{ commandBuffer.pipelineBarrier(vk::PipelineStageFlagBits::eTopOfPipe, vk::PipelineStageFlagBits::eTransfer, {}, {}, {}, vk::ImageMemoryBarrier{
@ -871,7 +881,7 @@ namespace skyline::gpu {
auto destinationBacking{GetBacking()}; auto destinationBacking{GetBacking()};
if (layout != vk::ImageLayout::eTransferDstOptimal) { if (layout != vk::ImageLayout::eTransferDstOptimal) {
commandBuffer.pipelineBarrier(layout != vk::ImageLayout::eUndefined ? vk::PipelineStageFlagBits::eTopOfPipe : vk::PipelineStageFlagBits::eBottomOfPipe, vk::PipelineStageFlagBits::eTransfer, {}, {}, {}, vk::ImageMemoryBarrier{ commandBuffer.pipelineBarrier(vk::PipelineStageFlagBits::eAllCommands, vk::PipelineStageFlagBits::eAllCommands, {}, {}, {}, vk::ImageMemoryBarrier{
.image = destinationBacking, .image = destinationBacking,
.srcAccessMask = vk::AccessFlagBits::eMemoryRead, .srcAccessMask = vk::AccessFlagBits::eMemoryRead,
.dstAccessMask = vk::AccessFlagBits::eTransferWrite, .dstAccessMask = vk::AccessFlagBits::eTransferWrite,
@ -900,7 +910,7 @@ namespace skyline::gpu {
}); });
if (layout != vk::ImageLayout::eTransferDstOptimal) if (layout != vk::ImageLayout::eTransferDstOptimal)
commandBuffer.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, vk::PipelineStageFlagBits::eTransfer, {}, {}, {}, vk::ImageMemoryBarrier{ commandBuffer.pipelineBarrier(vk::PipelineStageFlagBits::eAllCommands, vk::PipelineStageFlagBits::eAllCommands, {}, {}, {}, vk::ImageMemoryBarrier{
.image = destinationBacking, .image = destinationBacking,
.srcAccessMask = vk::AccessFlagBits::eTransferWrite, .srcAccessMask = vk::AccessFlagBits::eTransferWrite,
.dstAccessMask = vk::AccessFlagBits::eMemoryRead, .dstAccessMask = vk::AccessFlagBits::eMemoryRead,
@ -912,7 +922,7 @@ namespace skyline::gpu {
}); });
if (source->layout != vk::ImageLayout::eTransferSrcOptimal) if (source->layout != vk::ImageLayout::eTransferSrcOptimal)
commandBuffer.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, vk::PipelineStageFlagBits::eTransfer, {}, {}, {}, vk::ImageMemoryBarrier{ commandBuffer.pipelineBarrier(vk::PipelineStageFlagBits::eAllCommands, vk::PipelineStageFlagBits::eAllCommands, {}, {}, {}, vk::ImageMemoryBarrier{
.image = sourceBacking, .image = sourceBacking,
.srcAccessMask = vk::AccessFlagBits::eTransferRead, .srcAccessMask = vk::AccessFlagBits::eTransferRead,
.dstAccessMask = vk::AccessFlagBits::eMemoryWrite, .dstAccessMask = vk::AccessFlagBits::eMemoryWrite,
@ -922,10 +932,16 @@ namespace skyline::gpu {
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.subresourceRange = subresource, .subresourceRange = subresource,
}); });
})}; }, waitSemaphores, span<vk::Semaphore>{signalSemaphore});
lCycle->AttachObjects(std::move(source), shared_from_this()); }};
lCycle->ChainCycle(cycle);
lCycle->ChainCycle(source->cycle); auto newCycle{[&]{
cycle = lCycle; if (source->cycle)
return source->cycle->RecordSemaphoreWaitUsage(std::move(submitFunc));
else
return submitFunc({});
}()};
newCycle->AttachObjects(std::move(source), shared_from_this());
cycle = newCycle;
} }
} }

View File

@ -559,7 +559,7 @@ namespace skyline::gpu {
/** /**
* @brief Copies the contents of the supplied source texture into the current texture * @brief Copies the contents of the supplied source texture into the current texture
*/ */
void CopyFrom(std::shared_ptr<Texture> source, const vk::ImageSubresourceRange &subresource = vk::ImageSubresourceRange{ void CopyFrom(std::shared_ptr<Texture> source, vk::Semaphore waitSemaphore, vk::Semaphore signalSemaphore, const vk::ImageSubresourceRange &subresource = vk::ImageSubresourceRange{
.aspectMask = vk::ImageAspectFlagBits::eColor, .aspectMask = vk::ImageAspectFlagBits::eColor,
.levelCount = VK_REMAINING_MIP_LEVELS, .levelCount = VK_REMAINING_MIP_LEVELS,
.layerCount = VK_REMAINING_ARRAY_LAYERS, .layerCount = VK_REMAINING_ARRAY_LAYERS,