mirror of
https://github.com/skyline-emu/skyline.git
synced 2024-12-26 18:41:49 +01:00
Use semaphores for presentation engine frame synchronisation
Avoids waits on the CPU which can be costly and confuse the scheduler, also reduces latency significantly.
This commit is contained in:
parent
0670e0e0dc
commit
1a0819fb76
@ -24,7 +24,8 @@ namespace skyline::gpu {
|
||||
PresentationEngine::PresentationEngine(const DeviceState &state, GPU &gpu)
|
||||
: state{state},
|
||||
gpu{gpu},
|
||||
acquireFence{gpu.vkDevice, vk::FenceCreateInfo{}},
|
||||
presentSemaphores{util::MakeFilledArray<vk::raii::Semaphore, MaxSwapchainImageCount>(gpu.vkDevice, vk::SemaphoreCreateInfo{})},
|
||||
acquireSemaphores{util::MakeFilledArray<vk::raii::Semaphore, MaxSwapchainImageCount>(gpu.vkDevice, vk::SemaphoreCreateInfo{})},
|
||||
presentationTrack{static_cast<u64>(trace::TrackIds::Presentation), perfetto::ProcessTrack::Current()},
|
||||
vsyncEvent{std::make_shared<kernel::type::KEvent>(state, true)},
|
||||
choreographerThread{&PresentationEngine::ChoreographerThread, this},
|
||||
@ -116,35 +117,31 @@ namespace skyline::gpu {
|
||||
windowScalingMode = frame.scalingMode;
|
||||
}
|
||||
|
||||
if (frame.transform != windowTransform) {
|
||||
if ((result = window->perform(window, NATIVE_WINDOW_SET_BUFFERS_TRANSFORM, static_cast<i32>(frame.transform))))
|
||||
throw exception("Setting the buffer transform to '{}' failed with {}", ToString(frame.transform), result);
|
||||
windowTransform = frame.transform;
|
||||
}
|
||||
if ((result = window->perform(window, NATIVE_WINDOW_SET_BUFFERS_TRANSFORM, static_cast<i32>(frame.transform))))
|
||||
throw exception("Setting the buffer transform to '{}' failed with {}", ToString(frame.transform), result);
|
||||
windowTransform = frame.transform;
|
||||
|
||||
gpu.vkDevice.resetFences(*acquireFence);
|
||||
auto &acquireSemaphore{acquireSemaphores[acquireSemaphoreIndex]};
|
||||
acquireSemaphoreIndex = (acquireSemaphoreIndex + 1) % swapchainImageCount;
|
||||
|
||||
std::pair<vk::Result, u32> nextImage;
|
||||
while (nextImage = vkSwapchain->acquireNextImage(std::numeric_limits<u64>::max(), {}, *acquireFence), nextImage.first != vk::Result::eSuccess) [[unlikely]] {
|
||||
while (nextImage = vkSwapchain->acquireNextImage(std::numeric_limits<u64>::max(), *acquireSemaphore, {}), nextImage.first != vk::Result::eSuccess) [[unlikely]] {
|
||||
if (nextImage.first == vk::Result::eSuboptimalKHR)
|
||||
surfaceCondition.wait(lock, [this]() { return vkSurface.has_value(); });
|
||||
else
|
||||
throw exception("vkAcquireNextImageKHR returned an unhandled result '{}'", vk::to_string(nextImage.first));
|
||||
}
|
||||
auto &nextImageTexture{images.at(nextImage.second)};
|
||||
|
||||
std::ignore = gpu.vkDevice.waitForFences(*acquireFence, true, std::numeric_limits<u64>::max());
|
||||
auto &nextImageTexture{images.at(nextImage.second)};
|
||||
auto &presentSemaphore{presentSemaphores[nextImage.second]};
|
||||
|
||||
texture->SynchronizeHost();
|
||||
nextImageTexture->CopyFrom(texture, vk::ImageSubresourceRange{
|
||||
nextImageTexture->CopyFrom(texture, *acquireSemaphore, *presentSemaphore, vk::ImageSubresourceRange{
|
||||
.aspectMask = vk::ImageAspectFlagBits::eColor,
|
||||
.levelCount = 1,
|
||||
.layerCount = 1,
|
||||
});
|
||||
|
||||
// Wait on the copy to the swapchain image to complete before submitting for presentation
|
||||
nextImageTexture->WaitOnFence();
|
||||
|
||||
auto getMonotonicNsNow{[]() -> i64 {
|
||||
timespec time;
|
||||
if (clock_gettime(CLOCK_MONOTONIC, &time))
|
||||
@ -194,6 +191,8 @@ namespace skyline::gpu {
|
||||
.swapchainCount = 1,
|
||||
.pSwapchains = &**vkSwapchain,
|
||||
.pImageIndices = &nextImage.second,
|
||||
.waitSemaphoreCount = 1,
|
||||
.pWaitSemaphores = &*presentSemaphore,
|
||||
}); // We don't care about suboptimal images as they are caused by not respecting the transform hint, we handle transformations externally
|
||||
}
|
||||
|
||||
@ -328,6 +327,7 @@ namespace skyline::gpu {
|
||||
|
||||
swapchainFormat = format;
|
||||
swapchainExtent = extent;
|
||||
swapchainImageCount = vkImages.size();
|
||||
}
|
||||
|
||||
void PresentationEngine::UpdateSurface(jobject newSurface) {
|
||||
|
@ -35,12 +35,15 @@ namespace skyline::gpu {
|
||||
vk::SurfaceCapabilitiesKHR vkSurfaceCapabilities{}; //!< The capabilities of the current Vulkan Surface
|
||||
|
||||
std::optional<vk::raii::SwapchainKHR> vkSwapchain; //!< The Vulkan swapchain and the properties associated with it
|
||||
vk::raii::Fence acquireFence; //!< A fence for acquiring an image from the swapchain
|
||||
texture::Format swapchainFormat{}; //!< The image format of the textures in the current swapchain
|
||||
texture::Dimensions swapchainExtent{}; //!< The extent of images in the current swapchain
|
||||
|
||||
static constexpr size_t MaxSwapchainImageCount{6}; //!< The maximum amount of swapchain textures, this affects the amount of images that can be in the swapchain
|
||||
std::array<std::shared_ptr<Texture>, MaxSwapchainImageCount> images; //!< All the swapchain textures in the same order as supplied by the host swapchain
|
||||
std::array<vk::raii::Semaphore, MaxSwapchainImageCount> presentSemaphores; //!< Array of semaphores used to signal that swapchain images are ready to be completed, indexed by Vulkan swapchain index
|
||||
std::array<vk::raii::Semaphore, MaxSwapchainImageCount> acquireSemaphores; //!< Array of semaphores used to wait on the GPU for swapchain images to be acquired, indexed by `acquireSemaphoreIndex`
|
||||
size_t acquireSemaphoreIndex{}; //!< The index of the next semaphore to be used for acquiring swapchain images
|
||||
size_t swapchainImageCount{}; //!< The number of images in the current swapchain
|
||||
|
||||
i64 frameTimestamp{}; //!< The timestamp of the last frame being shown in nanoseconds
|
||||
i64 averageFrametimeNs{}; //!< The average time between frames in nanoseconds
|
||||
|
@ -839,14 +839,16 @@ namespace skyline::gpu {
|
||||
return std::make_shared<TextureView>(shared_from_this(), type, range, pFormat, mapping);
|
||||
}
|
||||
|
||||
void Texture::CopyFrom(std::shared_ptr<Texture> source, const vk::ImageSubresourceRange &subresource) {
|
||||
WaitOnBacking();
|
||||
source->WaitOnBacking();
|
||||
void Texture::CopyFrom(std::shared_ptr<Texture> source, vk::Semaphore waitSemaphore, vk::Semaphore signalSemaphore, const vk::ImageSubresourceRange &subresource) {
|
||||
if (cycle)
|
||||
cycle->WaitSubmit();
|
||||
if (source->cycle)
|
||||
source->cycle->WaitSubmit();
|
||||
|
||||
WaitOnBacking();
|
||||
source->WaitOnBacking();
|
||||
WaitOnFence();
|
||||
|
||||
if (source->layout == vk::ImageLayout::eUndefined)
|
||||
throw exception("Cannot copy from image with undefined layout");
|
||||
else if (source->dimensions != dimensions)
|
||||
@ -854,78 +856,92 @@ namespace skyline::gpu {
|
||||
|
||||
TRACE_EVENT("gpu", "Texture::CopyFrom");
|
||||
|
||||
auto lCycle{gpu.scheduler.Submit([&](vk::raii::CommandBuffer &commandBuffer) {
|
||||
auto sourceBacking{source->GetBacking()};
|
||||
if (source->layout != vk::ImageLayout::eTransferSrcOptimal) {
|
||||
commandBuffer.pipelineBarrier(vk::PipelineStageFlagBits::eTopOfPipe, vk::PipelineStageFlagBits::eTransfer, {}, {}, {}, vk::ImageMemoryBarrier{
|
||||
.image = sourceBacking,
|
||||
.srcAccessMask = vk::AccessFlagBits::eMemoryWrite,
|
||||
.dstAccessMask = vk::AccessFlagBits::eTransferRead,
|
||||
.oldLayout = source->layout,
|
||||
.newLayout = vk::ImageLayout::eTransferSrcOptimal,
|
||||
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.subresourceRange = subresource,
|
||||
});
|
||||
}
|
||||
auto submitFunc{[&](vk::Semaphore extraWaitSemaphore){
|
||||
boost::container::small_vector<vk::Semaphore, 2> waitSemaphores;
|
||||
if (waitSemaphore)
|
||||
waitSemaphores.push_back(waitSemaphore);
|
||||
|
||||
auto destinationBacking{GetBacking()};
|
||||
if (layout != vk::ImageLayout::eTransferDstOptimal) {
|
||||
commandBuffer.pipelineBarrier(layout != vk::ImageLayout::eUndefined ? vk::PipelineStageFlagBits::eTopOfPipe : vk::PipelineStageFlagBits::eBottomOfPipe, vk::PipelineStageFlagBits::eTransfer, {}, {}, {}, vk::ImageMemoryBarrier{
|
||||
.image = destinationBacking,
|
||||
.srcAccessMask = vk::AccessFlagBits::eMemoryRead,
|
||||
.dstAccessMask = vk::AccessFlagBits::eTransferWrite,
|
||||
.oldLayout = layout,
|
||||
.newLayout = vk::ImageLayout::eTransferDstOptimal,
|
||||
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.subresourceRange = subresource,
|
||||
});
|
||||
if (extraWaitSemaphore)
|
||||
waitSemaphores.push_back(extraWaitSemaphore);
|
||||
|
||||
if (layout == vk::ImageLayout::eUndefined)
|
||||
layout = vk::ImageLayout::eTransferDstOptimal;
|
||||
}
|
||||
return gpu.scheduler.Submit([&](vk::raii::CommandBuffer &commandBuffer) {
|
||||
auto sourceBacking{source->GetBacking()};
|
||||
if (source->layout != vk::ImageLayout::eTransferSrcOptimal) {
|
||||
commandBuffer.pipelineBarrier(vk::PipelineStageFlagBits::eTopOfPipe, vk::PipelineStageFlagBits::eTransfer, {}, {}, {}, vk::ImageMemoryBarrier{
|
||||
.image = sourceBacking,
|
||||
.srcAccessMask = vk::AccessFlagBits::eMemoryWrite,
|
||||
.dstAccessMask = vk::AccessFlagBits::eTransferRead,
|
||||
.oldLayout = source->layout,
|
||||
.newLayout = vk::ImageLayout::eTransferSrcOptimal,
|
||||
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.subresourceRange = subresource,
|
||||
});
|
||||
}
|
||||
|
||||
vk::ImageSubresourceLayers subresourceLayers{
|
||||
.aspectMask = subresource.aspectMask,
|
||||
.mipLevel = subresource.baseMipLevel,
|
||||
.baseArrayLayer = subresource.baseArrayLayer,
|
||||
.layerCount = subresource.layerCount == VK_REMAINING_ARRAY_LAYERS ? layerCount - subresource.baseArrayLayer : subresource.layerCount,
|
||||
};
|
||||
for (; subresourceLayers.mipLevel < (subresource.levelCount == VK_REMAINING_MIP_LEVELS ? levelCount - subresource.baseMipLevel : subresource.levelCount); subresourceLayers.mipLevel++)
|
||||
commandBuffer.copyImage(sourceBacking, vk::ImageLayout::eTransferSrcOptimal, destinationBacking, vk::ImageLayout::eTransferDstOptimal, vk::ImageCopy{
|
||||
.srcSubresource = subresourceLayers,
|
||||
.dstSubresource = subresourceLayers,
|
||||
.extent = dimensions,
|
||||
});
|
||||
auto destinationBacking{GetBacking()};
|
||||
if (layout != vk::ImageLayout::eTransferDstOptimal) {
|
||||
commandBuffer.pipelineBarrier(vk::PipelineStageFlagBits::eAllCommands, vk::PipelineStageFlagBits::eAllCommands, {}, {}, {}, vk::ImageMemoryBarrier{
|
||||
.image = destinationBacking,
|
||||
.srcAccessMask = vk::AccessFlagBits::eMemoryRead,
|
||||
.dstAccessMask = vk::AccessFlagBits::eTransferWrite,
|
||||
.oldLayout = layout,
|
||||
.newLayout = vk::ImageLayout::eTransferDstOptimal,
|
||||
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.subresourceRange = subresource,
|
||||
});
|
||||
|
||||
if (layout != vk::ImageLayout::eTransferDstOptimal)
|
||||
commandBuffer.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, vk::PipelineStageFlagBits::eTransfer, {}, {}, {}, vk::ImageMemoryBarrier{
|
||||
.image = destinationBacking,
|
||||
.srcAccessMask = vk::AccessFlagBits::eTransferWrite,
|
||||
.dstAccessMask = vk::AccessFlagBits::eMemoryRead,
|
||||
.oldLayout = vk::ImageLayout::eTransferDstOptimal,
|
||||
.newLayout = layout,
|
||||
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.subresourceRange = subresource,
|
||||
});
|
||||
if (layout == vk::ImageLayout::eUndefined)
|
||||
layout = vk::ImageLayout::eTransferDstOptimal;
|
||||
}
|
||||
|
||||
if (source->layout != vk::ImageLayout::eTransferSrcOptimal)
|
||||
commandBuffer.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, vk::PipelineStageFlagBits::eTransfer, {}, {}, {}, vk::ImageMemoryBarrier{
|
||||
.image = sourceBacking,
|
||||
.srcAccessMask = vk::AccessFlagBits::eTransferRead,
|
||||
.dstAccessMask = vk::AccessFlagBits::eMemoryWrite,
|
||||
.oldLayout = vk::ImageLayout::eTransferSrcOptimal,
|
||||
.newLayout = source->layout,
|
||||
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.subresourceRange = subresource,
|
||||
});
|
||||
})};
|
||||
lCycle->AttachObjects(std::move(source), shared_from_this());
|
||||
lCycle->ChainCycle(cycle);
|
||||
lCycle->ChainCycle(source->cycle);
|
||||
cycle = lCycle;
|
||||
vk::ImageSubresourceLayers subresourceLayers{
|
||||
.aspectMask = subresource.aspectMask,
|
||||
.mipLevel = subresource.baseMipLevel,
|
||||
.baseArrayLayer = subresource.baseArrayLayer,
|
||||
.layerCount = subresource.layerCount == VK_REMAINING_ARRAY_LAYERS ? layerCount - subresource.baseArrayLayer : subresource.layerCount,
|
||||
};
|
||||
for (; subresourceLayers.mipLevel < (subresource.levelCount == VK_REMAINING_MIP_LEVELS ? levelCount - subresource.baseMipLevel : subresource.levelCount); subresourceLayers.mipLevel++)
|
||||
commandBuffer.copyImage(sourceBacking, vk::ImageLayout::eTransferSrcOptimal, destinationBacking, vk::ImageLayout::eTransferDstOptimal, vk::ImageCopy{
|
||||
.srcSubresource = subresourceLayers,
|
||||
.dstSubresource = subresourceLayers,
|
||||
.extent = dimensions,
|
||||
});
|
||||
|
||||
if (layout != vk::ImageLayout::eTransferDstOptimal)
|
||||
commandBuffer.pipelineBarrier(vk::PipelineStageFlagBits::eAllCommands, vk::PipelineStageFlagBits::eAllCommands, {}, {}, {}, vk::ImageMemoryBarrier{
|
||||
.image = destinationBacking,
|
||||
.srcAccessMask = vk::AccessFlagBits::eTransferWrite,
|
||||
.dstAccessMask = vk::AccessFlagBits::eMemoryRead,
|
||||
.oldLayout = vk::ImageLayout::eTransferDstOptimal,
|
||||
.newLayout = layout,
|
||||
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.subresourceRange = subresource,
|
||||
});
|
||||
|
||||
if (source->layout != vk::ImageLayout::eTransferSrcOptimal)
|
||||
commandBuffer.pipelineBarrier(vk::PipelineStageFlagBits::eAllCommands, vk::PipelineStageFlagBits::eAllCommands, {}, {}, {}, vk::ImageMemoryBarrier{
|
||||
.image = sourceBacking,
|
||||
.srcAccessMask = vk::AccessFlagBits::eTransferRead,
|
||||
.dstAccessMask = vk::AccessFlagBits::eMemoryWrite,
|
||||
.oldLayout = vk::ImageLayout::eTransferSrcOptimal,
|
||||
.newLayout = source->layout,
|
||||
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.subresourceRange = subresource,
|
||||
});
|
||||
}, waitSemaphores, span<vk::Semaphore>{signalSemaphore});
|
||||
}};
|
||||
|
||||
auto newCycle{[&]{
|
||||
if (source->cycle)
|
||||
return source->cycle->RecordSemaphoreWaitUsage(std::move(submitFunc));
|
||||
else
|
||||
return submitFunc({});
|
||||
}()};
|
||||
newCycle->AttachObjects(std::move(source), shared_from_this());
|
||||
cycle = newCycle;
|
||||
}
|
||||
}
|
||||
|
@ -559,7 +559,7 @@ namespace skyline::gpu {
|
||||
/**
|
||||
* @brief Copies the contents of the supplied source texture into the current texture
|
||||
*/
|
||||
void CopyFrom(std::shared_ptr<Texture> source, const vk::ImageSubresourceRange &subresource = vk::ImageSubresourceRange{
|
||||
void CopyFrom(std::shared_ptr<Texture> source, vk::Semaphore waitSemaphore, vk::Semaphore signalSemaphore, const vk::ImageSubresourceRange &subresource = vk::ImageSubresourceRange{
|
||||
.aspectMask = vk::ImageAspectFlagBits::eColor,
|
||||
.levelCount = VK_REMAINING_MIP_LEVELS,
|
||||
.layerCount = VK_REMAINING_ARRAY_LAYERS,
|
||||
|
Loading…
Reference in New Issue
Block a user