From 216e5cee818d8f20f5efe781c0d42f55fa861d1d Mon Sep 17 00:00:00 2001 From: PixelyIon Date: Fri, 18 Jun 2021 16:25:19 +0530 Subject: [PATCH] Separate Guest and Host Presentation + AChoreographer V-Sync Event We had issues when combining host and guest presentation since certain configurations in guest presentation such as double buffering were very unoptimal for the host and would significantly affect the FPS. As a result of this, we've now made host presentation have its own presentation textures which are copied into from the guest at presentation time, allowing us to change parameters of the host presentation independently of the guest. We've implemented the infrastructure for this which includes being able to create images from host GPU memory using VMA, an optimized linear texture sync and a method to do on-GPU texture-to-texture copies. We've also moved to driving the V-Sync event using AChoreographer on its on thread in this PR, which more accurately encapsulates HOS behavior and allows games such as ARMS to boot as they depend on the V-Sync event being signalled even when the game isn't presenting. --- .idea/inspectionProfiles/Project_Default.xml | 13 +- .idea/misc.xml | 2 +- .idea/scopes/SkylineLibraries.xml | 2 +- app/src/main/cpp/skyline/common/settings.cpp | 4 +- app/src/main/cpp/skyline/common/settings.h | 3 +- app/src/main/cpp/skyline/gpu/fence_cycle.h | 4 +- .../main/cpp/skyline/gpu/memory_manager.cpp | 53 ++++- app/src/main/cpp/skyline/gpu/memory_manager.h | 58 ++++- .../cpp/skyline/gpu/presentation_engine.cpp | 158 ++++++------- .../cpp/skyline/gpu/presentation_engine.h | 46 ++-- app/src/main/cpp/skyline/gpu/texture/format.h | 14 ++ .../main/cpp/skyline/gpu/texture/texture.cpp | 223 +++++++++++++++--- .../main/cpp/skyline/gpu/texture/texture.h | 84 +++++-- .../hosbinder/GraphicBufferProducer.cpp | 69 ++++-- .../hosbinder/GraphicBufferProducer.h | 7 +- .../services/hosbinder/IHOSBinderDriver.cpp | 2 +- .../services/hosbinder/android_types.h | 12 +- .../visrv/IApplicationDisplayService.h | 2 +- .../main/cpp/skyline/soc/host1x/syncpoint.cpp | 9 +- .../main/cpp/skyline/soc/host1x/syncpoint.h | 1 + 20 files changed, 535 insertions(+), 231 deletions(-) diff --git a/.idea/inspectionProfiles/Project_Default.xml b/.idea/inspectionProfiles/Project_Default.xml index 34e1abac..813a03b5 100644 --- a/.idea/inspectionProfiles/Project_Default.xml +++ b/.idea/inspectionProfiles/Project_Default.xml @@ -171,7 +171,7 @@ - - + diff --git a/.idea/scopes/SkylineLibraries.xml b/.idea/scopes/SkylineLibraries.xml index c13db64e..2733c9e3 100644 --- a/.idea/scopes/SkylineLibraries.xml +++ b/.idea/scopes/SkylineLibraries.xml @@ -1,3 +1,3 @@ - + \ No newline at end of file diff --git a/app/src/main/cpp/skyline/common/settings.cpp b/app/src/main/cpp/skyline/common/settings.cpp index 6f40a689..5c6cbd1f 100644 --- a/app/src/main/cpp/skyline/common/settings.cpp +++ b/app/src/main/cpp/skyline/common/settings.cpp @@ -16,9 +16,9 @@ namespace skyline { #define PREF_ELEM(name, memberName, rhs) std::make_pair(std::string(name), [](Settings &settings, const pugi::xml_node &element) { settings.memberName = rhs; }) std::tuple preferences{ - PREF_ELEM("operation_mode", operationMode, element.attribute("value").as_bool()), - PREF_ELEM("username_value", username, element.text().as_string()), PREF_ELEM("log_level", logLevel, static_cast(element.text().as_uint(static_cast(Logger::LogLevel::Info)))), + PREF_ELEM("username_value", username, element.text().as_string()), + PREF_ELEM("operation_mode", operationMode, element.attribute("value").as_bool()), }; #undef PREF_ELEM diff --git a/app/src/main/cpp/skyline/common/settings.h b/app/src/main/cpp/skyline/common/settings.h index c4975586..682874b7 100644 --- a/app/src/main/cpp/skyline/common/settings.h +++ b/app/src/main/cpp/skyline/common/settings.h @@ -12,8 +12,9 @@ namespace skyline { class Settings { public: Logger::LogLevel logLevel; //!< The minimum level that logs need to be for them to be printed - bool operationMode; //!< If the emulated Switch should be handheld or docked std::string username; //!< The name set by the user to be supplied to the guest + bool operationMode; //!< If the emulated Switch should be handheld or docked + bool forceTripleBuffering{true}; //!< If the presentation should always triple buffer even if the game double buffers /** * @param fd An FD to the preference XML file diff --git a/app/src/main/cpp/skyline/gpu/fence_cycle.h b/app/src/main/cpp/skyline/gpu/fence_cycle.h index 099d5e00..a9f04ec6 100644 --- a/app/src/main/cpp/skyline/gpu/fence_cycle.h +++ b/app/src/main/cpp/skyline/gpu/fence_cycle.h @@ -123,8 +123,8 @@ namespace skyline::gpu { } } - const auto& first{*dependencies.begin()}; - const auto& last{*dependencies.end()}; + const auto &first{*dependencies.begin()}; + const auto &last{*dependencies.end()}; std::shared_ptr next{std::atomic_load_explicit(&list, std::memory_order_consume)}; do { last->next = next; diff --git a/app/src/main/cpp/skyline/gpu/memory_manager.cpp b/app/src/main/cpp/skyline/gpu/memory_manager.cpp index a247464c..018f465c 100644 --- a/app/src/main/cpp/skyline/gpu/memory_manager.cpp +++ b/app/src/main/cpp/skyline/gpu/memory_manager.cpp @@ -5,13 +5,32 @@ #include "memory_manager.h" namespace skyline::gpu::memory { + /** + * @brief If the result isn't VK_SUCCESS then an exception is thrown + */ + void ThrowOnFail(VkResult result, const char *function = __builtin_FUNCTION()) { + if (result != VK_SUCCESS) + vk::throwResultException(vk::Result(result), function); + } + StagingBuffer::~StagingBuffer() { + if (vmaAllocator && vmaAllocation && vkBuffer) vmaDestroyBuffer(vmaAllocator, vkBuffer, vmaAllocation); } - void MemoryManager::ThrowOnFail(VkResult result, const char *function) { - if (result != VK_SUCCESS) - vk::throwResultException(vk::Result(result), function); + Image::~Image() { + if (vmaAllocator && vmaAllocation && vkImage) { + if (pointer) + vmaUnmapMemory(vmaAllocator, vmaAllocation); + vmaDestroyImage(vmaAllocator, vkImage, vmaAllocation); + } + } + + u8 *Image::data() { + if (pointer) [[likely]] + return pointer; + ThrowOnFail(vmaMapMemory(vmaAllocator, vmaAllocation, reinterpret_cast(&pointer))); + return pointer; } MemoryManager::MemoryManager(const GPU &pGpu) : gpu(pGpu) { @@ -75,4 +94,32 @@ namespace skyline::gpu::memory { return std::make_shared(reinterpret_cast(allocationInfo.pMappedData), allocationInfo.size, vmaAllocator, buffer, allocation); } + + Image MemoryManager::AllocateImage(const vk::ImageCreateInfo &createInfo) { + VmaAllocationCreateInfo allocationCreateInfo{ + .usage = VMA_MEMORY_USAGE_GPU_ONLY, + }; + + VkImage image; + VmaAllocation allocation; + VmaAllocationInfo allocationInfo; + ThrowOnFail(vmaCreateImage(vmaAllocator, &static_cast(createInfo), &allocationCreateInfo, &image, &allocation, &allocationInfo)); + + return Image(vmaAllocator, image, allocation); + } + + Image MemoryManager::AllocateMappedImage(const vk::ImageCreateInfo &createInfo) { + VmaAllocationCreateInfo allocationCreateInfo{ + .flags = VMA_ALLOCATION_CREATE_MAPPED_BIT, + .usage = VMA_MEMORY_USAGE_UNKNOWN, + .memoryTypeBits = static_cast(vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent | vk::MemoryPropertyFlagBits::eDeviceLocal), + }; + + VkImage image; + VmaAllocation allocation; + VmaAllocationInfo allocationInfo; + ThrowOnFail(vmaCreateImage(vmaAllocator, &static_cast(createInfo), &allocationCreateInfo, &image, &allocation, &allocationInfo)); + + return Image(reinterpret_cast(allocationInfo.pMappedData), vmaAllocator, image, allocation); + } } diff --git a/app/src/main/cpp/skyline/gpu/memory_manager.h b/app/src/main/cpp/skyline/gpu/memory_manager.h index 7e60dd17..c4207dec 100644 --- a/app/src/main/cpp/skyline/gpu/memory_manager.h +++ b/app/src/main/cpp/skyline/gpu/memory_manager.h @@ -11,16 +11,57 @@ namespace skyline::gpu::memory { * @brief A view into a CPU mapping of a Vulkan buffer * @note The mapping **should not** be used after the lifetime of the object has ended */ - struct StagingBuffer : public span, FenceCycleDependency { + struct StagingBuffer : public span, public FenceCycleDependency { VmaAllocator vmaAllocator; VmaAllocation vmaAllocation; vk::Buffer vkBuffer; constexpr StagingBuffer(u8 *pointer, size_t size, VmaAllocator vmaAllocator, vk::Buffer vkBuffer, VmaAllocation vmaAllocation) : vmaAllocator(vmaAllocator), vkBuffer(vkBuffer), vmaAllocation(vmaAllocation), span(pointer, size) {} + StagingBuffer(const StagingBuffer &) = delete; + + constexpr StagingBuffer(StagingBuffer &&other) : vmaAllocator(std::exchange(other.vmaAllocator, nullptr)), vmaAllocation(std::exchange(other.vmaAllocation, nullptr)), vkBuffer(std::exchange(other.vkBuffer, {})) {} + + StagingBuffer &operator=(const StagingBuffer &) = delete; + + StagingBuffer &operator=(StagingBuffer &&) = default; + ~StagingBuffer(); }; + /** + * @brief A Vulkan image which VMA allocates and manages the backing memory for + */ + struct Image { + private: + u8 *pointer{}; + + public: + VmaAllocator vmaAllocator; + VmaAllocation vmaAllocation; + vk::Image vkImage; + + constexpr Image(VmaAllocator vmaAllocator, vk::Image vkImage, VmaAllocation vmaAllocation) : vmaAllocator(vmaAllocator), vkImage(vkImage), vmaAllocation(vmaAllocation) {} + + constexpr Image(u8 *pointer, VmaAllocator vmaAllocator, vk::Image vkImage, VmaAllocation vmaAllocation) : pointer(pointer), vmaAllocator(vmaAllocator), vkImage(vkImage), vmaAllocation(vmaAllocation) {} + + Image(const Image &) = delete; + + constexpr Image(Image &&other) : pointer(std::exchange(other.pointer, nullptr)), vmaAllocator(std::exchange(other.vmaAllocator, nullptr)), vmaAllocation(std::exchange(other.vmaAllocation, nullptr)), vkImage(std::exchange(other.vkImage, {})) {} + + Image &operator=(const Image &) = delete; + + Image &operator=(Image &&) = default; + + ~Image(); + + /** + * @return A pointer to a mapping of the image on the CPU + * @note If the image isn't already mapped on the CPU, this creates a mapping for it + */ + u8 *data(); + }; + /** * @brief An abstraction over memory operations done in Vulkan, it's used for all allocations on the host GPU */ @@ -29,11 +70,6 @@ namespace skyline::gpu::memory { const GPU &gpu; VmaAllocator vmaAllocator{VK_NULL_HANDLE}; - /** - * @brief If the result isn't VK_SUCCESS then an exception is thrown - */ - static void ThrowOnFail(VkResult result, const char *function = __builtin_FUNCTION()); - public: MemoryManager(const GPU &gpu); @@ -43,5 +79,15 @@ namespace skyline::gpu::memory { * @brief Creates a buffer which is optimized for staging (Transfer Source) */ std::shared_ptr AllocateStagingBuffer(vk::DeviceSize size); + + /** + * @brief Creates an image which is allocated and deallocated using RAII + */ + Image AllocateImage(const vk::ImageCreateInfo &createInfo); + + /** + * @brief Creates an image which is allocated and deallocated using RAII and is optimal for being mapped on the CPU + */ + Image AllocateMappedImage(const vk::ImageCreateInfo &createInfo); }; } diff --git a/app/src/main/cpp/skyline/gpu/presentation_engine.cpp b/app/src/main/cpp/skyline/gpu/presentation_engine.cpp index 508887c6..ff7ddfa8 100644 --- a/app/src/main/cpp/skyline/gpu/presentation_engine.cpp +++ b/app/src/main/cpp/skyline/gpu/presentation_engine.cpp @@ -2,15 +2,18 @@ // Copyright © 2020 Skyline Team and Contributors (https://github.com/skyline-emu/) #include -#include +#include +#include #include +#include #include "presentation_engine.h" +#include "texture/format.h" extern skyline::i32 Fps; extern skyline::i32 FrameTime; namespace skyline::gpu { - PresentationEngine::PresentationEngine(const DeviceState &state, GPU &gpu) : state(state), gpu(gpu), vsyncEvent(std::make_shared(state, true)), bufferEvent(std::make_shared(state, true)), presentationTrack(static_cast(trace::TrackIds::Presentation), perfetto::ProcessTrack::Current()) { + PresentationEngine::PresentationEngine(const DeviceState &state, GPU &gpu) : state(state), gpu(gpu), acquireFence(gpu.vkDevice, vk::FenceCreateInfo{}), presentationTrack(static_cast(trace::TrackIds::Presentation), perfetto::ProcessTrack::Current()), choreographerThread(&PresentationEngine::ChoreographerThread, this), vsyncEvent(std::make_shared(state, true)) { auto desc{presentationTrack.Serialize()}; desc.set_name("Presentation"); perfetto::TrackEvent::SetTrackDescriptor(presentationTrack, desc); @@ -20,6 +23,26 @@ namespace skyline::gpu { auto env{state.jvm->GetEnv()}; if (!env->IsSameObject(jSurface, nullptr)) env->DeleteGlobalRef(jSurface); + + if (choreographerThread.joinable()) { + if (choreographerLooper) + ALooper_wake(choreographerLooper); + choreographerThread.join(); + } + } + + /** + * @url https://developer.android.com/ndk/reference/group/choreographer#achoreographer_framecallback + */ + void ChoreographerCallback(long frameTimeNanos, kernel::type::KEvent* vsyncEvent) { + vsyncEvent->Signal(); + AChoreographer_postFrameCallback(AChoreographer_getInstance(), reinterpret_cast(&ChoreographerCallback), vsyncEvent); + } + + void PresentationEngine::ChoreographerThread() { + choreographerLooper = ALooper_prepare(0); + AChoreographer_postFrameCallback(AChoreographer_getInstance(), reinterpret_cast(&ChoreographerCallback), vsyncEvent.get()); + ALooper_pollAll(-1, nullptr, nullptr, nullptr); } service::hosbinder::NativeWindowTransform GetAndroidTransform(vk::SurfaceTransformFlagBitsKHR transform) { @@ -45,58 +68,55 @@ namespace skyline::gpu { } } - void PresentationEngine::UpdateSwapchain(u16 imageCount, vk::Format imageFormat, vk::Extent2D imageExtent, bool newSurface) { - if (!imageCount) - return; - else if (imageCount > service::hosbinder::GraphicBufferProducer::MaxSlotCount) - throw exception("Requesting swapchain with higher image count ({}) than maximum slot count ({})", imageCount, service::hosbinder::GraphicBufferProducer::MaxSlotCount); + void PresentationEngine::UpdateSwapchain(texture::Format format, texture::Dimensions extent) { + auto minImageCount{std::max(vkSurfaceCapabilities.minImageCount, state.settings->forceTripleBuffering ? 3U : 0U)}; + if (minImageCount > MaxSlotCount) + throw exception("Requesting swapchain with higher image count ({}) than maximum slot count ({})", minImageCount, MaxSlotCount); const auto &capabilities{vkSurfaceCapabilities}; - if (imageCount < capabilities.minImageCount || (capabilities.maxImageCount && imageCount > capabilities.maxImageCount)) - throw exception("Cannot update swapchain to accomodate image count: {} ({}-{})", imageCount, capabilities.minImageCount, capabilities.maxImageCount); - if (capabilities.minImageExtent.height > imageExtent.height || capabilities.minImageExtent.width > imageExtent.width || capabilities.maxImageExtent.height < imageExtent.height || capabilities.maxImageExtent.width < imageExtent.width) - throw exception("Cannot update swapchain to accomodate image extent: {}x{} ({}x{}-{}x{})", imageExtent.width, imageExtent.height, capabilities.minImageExtent.width, capabilities.minImageExtent.height, capabilities.maxImageExtent.width, capabilities.maxImageExtent.height); + if (minImageCount < capabilities.minImageCount || (capabilities.maxImageCount && minImageCount > capabilities.maxImageCount)) + throw exception("Cannot update swapchain to accomodate image count: {} ({}-{})", minImageCount, capabilities.minImageCount, capabilities.maxImageCount); + else if (capabilities.minImageExtent.height > extent.height || capabilities.minImageExtent.width > extent.width || capabilities.maxImageExtent.height < extent.height || capabilities.maxImageExtent.width < extent.width) + throw exception("Cannot update swapchain to accomodate image extent: {}x{} ({}x{}-{}x{})", extent.width, extent.height, capabilities.minImageExtent.width, capabilities.minImageExtent.height, capabilities.maxImageExtent.width, capabilities.maxImageExtent.height); - if (swapchain.imageFormat != imageFormat || newSurface) { + if (swapchainFormat != format) { auto formats{gpu.vkPhysicalDevice.getSurfaceFormatsKHR(**vkSurface)}; - if (std::find(formats.begin(), formats.end(), vk::SurfaceFormatKHR{imageFormat, vk::ColorSpaceKHR::eSrgbNonlinear}) == formats.end()) - throw exception("Surface doesn't support requested image format '{}' with colorspace '{}'", vk::to_string(imageFormat), vk::to_string(vk::ColorSpaceKHR::eSrgbNonlinear)); + if (std::find(formats.begin(), formats.end(), vk::SurfaceFormatKHR{format, vk::ColorSpaceKHR::eSrgbNonlinear}) == formats.end()) + throw exception("Surface doesn't support requested image format '{}' with colorspace '{}'", vk::to_string(format), vk::to_string(vk::ColorSpaceKHR::eSrgbNonlinear)); } constexpr vk::ImageUsageFlags presentUsage{vk::ImageUsageFlagBits::eColorAttachment | vk::ImageUsageFlagBits::eTransferSrc | vk::ImageUsageFlagBits::eTransferDst}; if ((capabilities.supportedUsageFlags & presentUsage) != presentUsage) throw exception("Swapchain doesn't support image usage '{}': {}", vk::to_string(presentUsage), vk::to_string(capabilities.supportedUsageFlags)); - vkSwapchain = vk::raii::SwapchainKHR(gpu.vkDevice, vk::SwapchainCreateInfoKHR{ + vkSwapchain.emplace(gpu.vkDevice, vk::SwapchainCreateInfoKHR{ .surface = **vkSurface, - .minImageCount = imageCount, - .imageFormat = imageFormat, + .minImageCount = minImageCount, + .imageFormat = format, .imageColorSpace = vk::ColorSpaceKHR::eSrgbNonlinear, - .imageExtent = imageExtent, + .imageExtent = extent, .imageArrayLayers = 1, .imageUsage = presentUsage, .imageSharingMode = vk::SharingMode::eExclusive, .compositeAlpha = vk::CompositeAlphaFlagBitsKHR::eInherit, - .presentMode = vk::PresentModeKHR::eFifo, - .clipped = false, - .oldSwapchain = vkSwapchain ? **vkSwapchain : vk::SwapchainKHR{}, + .presentMode = vk::PresentModeKHR::eMailbox, + .clipped = true, }); auto vkImages{vkSwapchain->getImages()}; - for (u16 slot{}; slot < imageCount; slot++) { - auto &vkImage{vkImages[slot]}; - swapchain.vkImages[slot] = vkImage; - auto &image{swapchain.textures[slot]}; - if (image) { - std::scoped_lock lock(*image); - image->SwapBacking(vkImage); - image->TransitionLayout(vk::ImageLayout::ePresentSrcKHR); - image->SynchronizeHost(); // Synchronize the new host backing with guest memory - } + if (vkImages.size() > MaxSlotCount) + throw exception("Swapchain has higher image count ({}) than maximum slot count ({})", minImageCount, MaxSlotCount); + + for (size_t index{}; index < vkImages.size(); index++) { + auto &slot{slots[index]}; + slot = std::make_shared(*state.gpu, vkImages[index], extent, format::GetFormat(format), vk::ImageLayout::eUndefined, vk::ImageTiling::eOptimal); + slot->TransitionLayout(vk::ImageLayout::ePresentSrcKHR); } - swapchain.imageCount = imageCount; - swapchain.imageFormat = imageFormat; - swapchain.imageExtent = imageExtent; + for (size_t index{vkImages.size()}; index < MaxSlotCount; index++) + slots[index] = {}; + + swapchainFormat = format; + swapchainExtent = extent; } void PresentationEngine::UpdateSurface(jobject newSurface) { @@ -110,18 +130,7 @@ namespace skyline::gpu { if (!env->IsSameObject(newSurface, nullptr)) jSurface = env->NewGlobalRef(newSurface); - if (vkSwapchain) { - for (u16 slot{}; slot < swapchain.imageCount; slot++) { - auto &image{swapchain.textures[slot]}; - if (image) { - std::scoped_lock lock(*image); - image->SynchronizeGuest(); // Synchronize host backing to guest memory prior to being destroyed - image->SwapBacking(nullptr); - } - } - swapchain.vkImages = {}; - vkSwapchain.reset(); - } + vkSwapchain.reset(); if (jSurface) { vkSurface.emplace(gpu.vkInstance, vk::AndroidSurfaceCreateInfoKHR{ @@ -131,7 +140,8 @@ namespace skyline::gpu { throw exception("Vulkan Queue doesn't support presentation with surface"); vkSurfaceCapabilities = gpu.vkPhysicalDevice.getSurfaceCapabilitiesKHR(**vkSurface); - UpdateSwapchain(swapchain.imageCount, swapchain.imageFormat, swapchain.imageExtent, true); + if (swapchainExtent && swapchainFormat) + UpdateSwapchain(swapchainFormat, swapchainExtent); surfaceCondition.notify_all(); } else { @@ -139,60 +149,32 @@ namespace skyline::gpu { } } - std::shared_ptr PresentationEngine::CreatePresentationTexture(const std::shared_ptr &texture, u8 slot) { - std::lock_guard guard(mutex); - if (swapchain.imageCount <= slot && slot + 1 >= vkSurfaceCapabilities.minImageCount) - UpdateSwapchain(slot + 1, texture->format.vkFormat, texture->dimensions); - auto host{texture->InitializeTexture(swapchain.vkImages.at(slot), vk::ImageTiling::eOptimal)}; - swapchain.textures[slot] = host; - return host; - } - - service::hosbinder::AndroidStatus PresentationEngine::GetFreeTexture(bool async, i32 &slot) { - using AndroidStatus = service::hosbinder::AndroidStatus; - + void PresentationEngine::Present(const std::shared_ptr &texture, u64 presentId) { std::unique_lock lock(mutex); surfaceCondition.wait(lock, [this]() { return vkSurface.has_value(); }); - if (swapchain.dequeuedCount < swapchain.imageCount) { - static vk::raii::Fence fence(gpu.vkDevice, vk::FenceCreateInfo{}); - auto timeout{async ? 0ULL : std::numeric_limits::max()}; // We cannot block for a buffer to be retrieved in async mode - auto nextImage{vkSwapchain->acquireNextImage(timeout, {}, *fence)}; - if (nextImage.first == vk::Result::eSuccess) { - swapchain.dequeuedCount++; - while (gpu.vkDevice.waitForFences(*fence, true, std::numeric_limits::max()) == vk::Result::eTimeout); - slot = nextImage.second; - return AndroidStatus::Ok; - } else if (nextImage.first == vk::Result::eNotReady || nextImage.first == vk::Result::eTimeout) { - return AndroidStatus::WouldBlock; - } else if (nextImage.first == vk::Result::eSuboptimalKHR) { + + if (texture->format != swapchainFormat || texture->dimensions != swapchainExtent) + UpdateSwapchain(texture->format, texture->dimensions); + + std::pair nextImage; + while ((nextImage = vkSwapchain->acquireNextImage(std::numeric_limits::max(), {}, *acquireFence)).first != vk::Result::eSuccess) [[unlikely]] + if (nextImage.first == vk::Result::eSuboptimalKHR) surfaceCondition.wait(lock, [this]() { return vkSurface.has_value(); }); - return GetFreeTexture(async, slot); - } else { - throw exception("VkAcquireNextImageKHR returned an unhandled result '{}'", vk::to_string(nextImage.first)); - } - } - return AndroidStatus::Busy; - } + else + throw exception("vkAcquireNextImageKHR returned an unhandled result '{}'", vk::to_string(nextImage.first)); + while (gpu.vkDevice.waitForFences(*acquireFence, true, std::numeric_limits::max()) == vk::Result::eTimeout); - void PresentationEngine::Present(u32 slot) { - std::unique_lock lock(mutex); - surfaceCondition.wait(lock, [this]() { return vkSurface.has_value(); }); - - if (--swapchain.dequeuedCount < 0) [[unlikely]] { - throw exception("Swapchain has been presented more times than images from it have been acquired: {} (Image Count: {})", swapchain.dequeuedCount, swapchain.imageCount); - } + slots.at(nextImage.second)->CopyFrom(texture); { std::lock_guard queueLock(gpu.queueMutex); static_cast(gpu.vkQueue.presentKHR(vk::PresentInfoKHR{ .swapchainCount = 1, .pSwapchains = &**vkSwapchain, - .pImageIndices = &slot, + .pImageIndices = &nextImage.second, })); // We explicitly discard the result here as suboptimal images are expected when the game doesn't respect the transform hint } - vsyncEvent->Signal(); - if (frameTimestamp) { auto now{util::GetTimeNs()}; FrameTime = static_cast((now - frameTimestamp) / 10000); // frametime / 100 is the real ms value, this is to retain the first two decimals diff --git a/app/src/main/cpp/skyline/gpu/presentation_engine.h b/app/src/main/cpp/skyline/gpu/presentation_engine.h index 65a6ee81..b0696537 100644 --- a/app/src/main/cpp/skyline/gpu/presentation_engine.h +++ b/app/src/main/cpp/skyline/gpu/presentation_engine.h @@ -3,6 +3,7 @@ #pragma once +#include #include #include #include @@ -27,31 +28,31 @@ namespace skyline::gpu { vk::SurfaceCapabilitiesKHR vkSurfaceCapabilities; //!< The capabilities of the current Vulkan Surface std::optional vkSwapchain; //!< The Vulkan swapchain and the properties associated with it - struct SwapchainContext { - std::array, service::hosbinder::GraphicBufferProducer::MaxSlotCount> textures{}; - std::array vkImages{VK_NULL_HANDLE}; - u8 imageCount{}; - i8 dequeuedCount{}; - vk::Format imageFormat{}; - vk::Extent2D imageExtent{}; + vk::raii::Fence acquireFence; //!< A fence for acquiring an image from the swapchain + texture::Format swapchainFormat{}; //!< The image format of the textures in the current swapchain + texture::Dimensions swapchainExtent{}; //!< The extent of images in the current swapchain - static_assert(std::numeric_limits::max() >= service::hosbinder::GraphicBufferProducer::MaxSlotCount); - static_assert(std::numeric_limits::max() >= service::hosbinder::GraphicBufferProducer::MaxSlotCount); - } swapchain; //!< The properties of the currently created swapchain + static constexpr size_t MaxSlotCount{6}; //!< The maximum amount of queue slots, this affects the amount of images that can be in the swapchain + std::array, MaxSlotCount> slots; //!< The backing for storing all slots and sorted in the same order as supplied by the Vulkan swapchain u64 frameTimestamp{}; //!< The timestamp of the last frame being shown perfetto::Track presentationTrack; //!< Perfetto track used for presentation events + std::thread choreographerThread; //!< A thread for signalling the V-Sync event using AChoreographer + ALooper* choreographerLooper{}; //!< The looper object associated with the Choreographer thread + + /** + * @brief The entry point for the the Choreographer thread, the function runs ALooper on the thread + */ + void ChoreographerThread(); + /** * @note 'PresentationEngine::mutex' **must** be locked prior to calling this */ - void UpdateSwapchain(u16 imageCount, vk::Format imageFormat, vk::Extent2D imageExtent, bool newSurface = false); + void UpdateSwapchain(texture::Format format, texture::Dimensions extent); public: - texture::Dimensions resolution{}; - i32 format{}; std::shared_ptr vsyncEvent; //!< Signalled every time a frame is drawn - std::shared_ptr bufferEvent; //!< Signalled every time a buffer is freed PresentationEngine(const DeviceState &state, GPU &gpu); @@ -63,20 +64,11 @@ namespace skyline::gpu { void UpdateSurface(jobject newSurface); /** - * @brief Creates a Texture object from a GuestTexture as a part of the Vulkan swapchain + * @brief Queue the supplied texture to be presented to the screen + * @param presentId A UUID used to tag this frame for presentation timing readouts + * @note The texture **must** be locked prior to calling this */ - std::shared_ptr CreatePresentationTexture(const std::shared_ptr &texture, u8 slot); - - /** - * @param async If to return immediately when a texture is not available - * @param slot The slot the freed texture is in is written into this, it is untouched if there's an error - */ - service::hosbinder::AndroidStatus GetFreeTexture(bool async, i32 &slot); - - /** - * @brief Send a texture from a slot to the presentation queue to be displayed - */ - void Present(u32 slot); + void Present(const std::shared_ptr &texture, u64 presentId); /** * @return A transform that the application should render with to elide costly transforms later diff --git a/app/src/main/cpp/skyline/gpu/texture/format.h b/app/src/main/cpp/skyline/gpu/texture/format.h index 666b64f0..cbbf3321 100644 --- a/app/src/main/cpp/skyline/gpu/texture/format.h +++ b/app/src/main/cpp/skyline/gpu/texture/format.h @@ -10,4 +10,18 @@ namespace skyline::gpu::format { constexpr Format RGBA8888Unorm{sizeof(u8) * 4, 1, 1, vk::Format::eR8G8B8A8Unorm}; //!< 8-bits per channel 4-channel pixels constexpr Format RGB565Unorm{sizeof(u8) * 2, 1, 1, vk::Format::eR5G6B5UnormPack16}; //!< Red channel: 5-bit, Green channel: 6-bit, Blue channel: 5-bit + + /** + * @brief Converts a format from Vulkan to a Skyline format + */ + constexpr const Format &GetFormat(vk::Format format) { + switch (format) { + case vk::Format::eR8G8B8A8Unorm: + return RGBA8888Unorm; + case vk::Format::eR5G6B5UnormPack16: + return RGB565Unorm; + default: + throw exception("Vulkan format not supported: '{}'", vk::to_string(format)); + } + } } diff --git a/app/src/main/cpp/skyline/gpu/texture/texture.cpp b/app/src/main/cpp/skyline/gpu/texture/texture.cpp index c693f762..700330f5 100644 --- a/app/src/main/cpp/skyline/gpu/texture/texture.cpp +++ b/app/src/main/cpp/skyline/gpu/texture/texture.cpp @@ -7,29 +7,76 @@ #include "texture.h" namespace skyline::gpu { - GuestTexture::GuestTexture(const DeviceState &state, u8 *pointer, texture::Dimensions dimensions, texture::Format format, texture::TileMode tiling, texture::TileConfig layout) : state(state), pointer(pointer), dimensions(dimensions), format(format), tileMode(tiling), tileConfig(layout) {} + GuestTexture::GuestTexture(const DeviceState &state, u8 *pointer, texture::Dimensions dimensions, const texture::Format &format, texture::TileMode tiling, texture::TileConfig layout) : state(state), pointer(pointer), dimensions(dimensions), format(format), tileMode(tiling), tileConfig(layout) {} - std::shared_ptr GuestTexture::InitializeTexture(vk::Image backing, std::optional tiling, vk::ImageLayout pLayout, std::optional pFormat, std::optional pDimensions, texture::Swizzle swizzle) { + std::shared_ptr GuestTexture::InitializeTexture(vk::Image backing, texture::Dimensions pDimensions, const texture::Format &pFormat, std::optional tiling, vk::ImageLayout layout, texture::Swizzle swizzle) { if (!host.expired()) throw exception("Trying to create multiple Texture objects from a single GuestTexture"); - auto sharedHost{std::make_shared(*state.gpu, backing, pLayout, shared_from_this(), pDimensions ? *pDimensions : dimensions, pFormat ? *pFormat : format, tiling ? *tiling : (tileMode == texture::TileMode::Block) ? vk::ImageTiling::eOptimal : vk::ImageTiling::eLinear, swizzle)}; + auto sharedHost{std::make_shared(*state.gpu, backing, shared_from_this(), pDimensions ? pDimensions : dimensions, pFormat ? pFormat : format, layout, tiling ? *tiling : (tileMode == texture::TileMode::Block) ? vk::ImageTiling::eOptimal : vk::ImageTiling::eLinear, swizzle)}; host = sharedHost; return sharedHost; } - std::shared_ptr GuestTexture::InitializeTexture(vk::raii::Image &&backing, std::optional tiling, vk::ImageLayout pLayout, std::optional pFormat, std::optional pDimensions, texture::Swizzle swizzle) { + std::shared_ptr GuestTexture::InitializeTexture(vk::raii::Image &&backing, std::optional tiling, vk::ImageLayout layout, const texture::Format &pFormat, texture::Dimensions pDimensions, texture::Swizzle swizzle) { if (!host.expired()) throw exception("Trying to create multiple Texture objects from a single GuestTexture"); - auto sharedHost{std::make_shared(*state.gpu, std::move(backing), pLayout, shared_from_this(), pDimensions ? *pDimensions : dimensions, pFormat ? *pFormat : format, tiling ? *tiling : (tileMode == texture::TileMode::Block) ? vk::ImageTiling::eOptimal : vk::ImageTiling::eLinear, swizzle)}; + auto sharedHost{std::make_shared(*state.gpu, std::move(backing), shared_from_this(), pDimensions ? pDimensions : dimensions, pFormat ? pFormat : format, layout, tiling ? *tiling : (tileMode == texture::TileMode::Block) ? vk::ImageTiling::eOptimal : vk::ImageTiling::eLinear, swizzle)}; host = sharedHost; return sharedHost; } - Texture::Texture(GPU &gpu, BackingType &&backing, vk::ImageLayout layout, std::shared_ptr guest, texture::Dimensions dimensions, texture::Format format, vk::ImageTiling tiling, vk::ComponentMapping mapping) : gpu(gpu), backing(std::move(backing)), layout(layout), guest(std::move(guest)), dimensions(dimensions), format(format), tiling(tiling), mapping(mapping) { + std::shared_ptr GuestTexture::CreateTexture(vk::ImageUsageFlags usage, std::optional pTiling, vk::ImageLayout initialLayout, const texture::Format &pFormat, texture::Dimensions pDimensions, texture::Swizzle swizzle) { + if (!host.expired()) + throw exception("Trying to create multiple Texture objects from a single GuestTexture"); + + pDimensions = pDimensions ? pDimensions : dimensions; + const auto &lFormat{pFormat ? pFormat : format}; + auto tiling{pTiling ? *pTiling : (tileMode == texture::TileMode::Block) ? vk::ImageTiling::eOptimal : vk::ImageTiling::eLinear}; + vk::ImageCreateInfo imageCreateInfo{ + .imageType = pDimensions.GetType(), + .format = lFormat, + .extent = pDimensions, + .mipLevels = 1, + .arrayLayers = 1, + .samples = vk::SampleCountFlagBits::e1, + .tiling = tiling, + .usage = usage | vk::ImageUsageFlagBits::eTransferSrc | vk::ImageUsageFlagBits::eTransferDst, + .sharingMode = vk::SharingMode::eExclusive, + .queueFamilyIndexCount = 1, + .pQueueFamilyIndices = &state.gpu->vkQueueFamilyIndex, + .initialLayout = initialLayout, + }; + + auto sharedHost{std::make_shared(*state.gpu, tiling != vk::ImageTiling::eLinear ? state.gpu->memory.AllocateImage(imageCreateInfo) : state.gpu->memory.AllocateMappedImage(imageCreateInfo), shared_from_this(), pDimensions, lFormat, initialLayout, tiling, swizzle)}; + host = sharedHost; + return sharedHost; + } + + Texture::Texture(GPU &gpu, BackingType &&backing, std::shared_ptr guest, texture::Dimensions dimensions, const texture::Format &format, vk::ImageLayout layout, vk::ImageTiling tiling, vk::ComponentMapping mapping) : gpu(gpu), backing(std::move(backing)), layout(layout), guest(std::move(guest)), dimensions(dimensions), format(format), tiling(tiling), mapping(mapping) { if (GetBacking()) SynchronizeHost(); } + Texture::Texture(GPU &gpu, BackingType &&backing, texture::Dimensions dimensions, const texture::Format &format, vk::ImageLayout layout, vk::ImageTiling tiling, vk::ComponentMapping mapping) : gpu(gpu), backing(std::move(backing)), guest(nullptr), dimensions(dimensions), format(format), layout(layout), tiling(tiling), mapping(mapping) {} + + Texture::Texture(GPU &gpu, texture::Dimensions dimensions, const texture::Format &format, vk::ImageLayout initialLayout, vk::ImageUsageFlags usage, vk::ImageTiling tiling, vk::ComponentMapping mapping) : gpu(gpu), guest(nullptr), dimensions(dimensions), format(format), layout(initialLayout), tiling(tiling), mapping(mapping) { + vk::ImageCreateInfo imageCreateInfo{ + .imageType = dimensions.GetType(), + .format = format, + .extent = dimensions, + .mipLevels = 1, + .arrayLayers = 1, + .samples = vk::SampleCountFlagBits::e1, + .tiling = tiling, + .usage = usage | vk::ImageUsageFlagBits::eTransferSrc | vk::ImageUsageFlagBits::eTransferDst, + .sharingMode = vk::SharingMode::eExclusive, + .queueFamilyIndexCount = 1, + .pQueueFamilyIndices = &gpu.vkQueueFamilyIndex, + .initialLayout = initialLayout, + }; + backing = tiling != vk::ImageTiling::eLinear ? gpu.memory.AllocateImage(imageCreateInfo) : gpu.memory.AllocateMappedImage(imageCreateInfo); + } + bool Texture::WaitOnBacking() { if (GetBacking()) [[likely]] { return false; @@ -83,13 +130,23 @@ namespace skyline::gpu { } void Texture::SynchronizeHost() { + if (!guest) + throw exception("Synchronization of host textures requires a valid guest texture to synchronize from"); + TRACE_EVENT("gpu", "Texture::SynchronizeHost"); auto pointer{guest->pointer}; auto size{format.GetSize(dimensions)}; - auto stagingBuffer{[&]() { - if (tiling == vk::ImageTiling::eOptimal) { - return gpu.memory.AllocateStagingBuffer(size); + u8 *bufferData; + auto stagingBuffer{[&]() -> std::shared_ptr { + if (tiling == vk::ImageTiling::eOptimal || !std::holds_alternative(backing)) { + auto stagingBuffer{gpu.memory.AllocateStagingBuffer(size)}; + bufferData = stagingBuffer->data(); + return stagingBuffer; + } else if (tiling == vk::ImageTiling::eLinear) { + bufferData = std::get(backing).data(); + WaitOnFence(); + return nullptr; } else { throw exception("Guest -> Host synchronization of images tiled as '{}' isn't implemented", vk::to_string(tiling)); } @@ -112,7 +169,7 @@ namespace skyline::gpu { auto gobYOffset{robWidthBytes * gobHeight}; // The offset of the next Y-axis GOB from the current one in linear space auto inputSector{pointer}; // The address of the input sector - auto outputRob{stagingBuffer->data()}; // The address of the output block + auto outputRob{bufferData}; // The address of the output block for (u32 rob{}, y{}, paddingY{}; rob < surfaceHeightRobs; rob++) { // Every Surface contains `surfaceHeightRobs` ROBs auto outputBlock{outputRob}; // We iterate through a block independently of the ROB @@ -141,7 +198,7 @@ namespace skyline::gpu { auto sizeStride{guest->format.GetSize(guest->tileConfig.pitch, 1)}; // The size of a single stride of pixel data auto inputLine{pointer}; // The address of the input line - auto outputLine{stagingBuffer->data()}; // The address of the output line + auto outputLine{bufferData}; // The address of the output line for (u32 line{}; line < dimensions.height; line++) { std::memcpy(outputLine, inputLine, sizeLine); @@ -149,18 +206,113 @@ namespace skyline::gpu { outputLine += sizeLine; } } else if (guest->tileMode == texture::TileMode::Linear) { - std::memcpy(stagingBuffer->data(), pointer, size); + std::memcpy(bufferData, pointer, size); } - if (WaitOnBacking() && size != format.GetSize(dimensions)) - throw exception("Backing properties changing during sync is not supported"); + if (stagingBuffer) { + if (WaitOnBacking() && size != format.GetSize(dimensions)) + throw exception("Backing properties changing during sync is not supported"); + WaitOnFence(); + + cycle = gpu.scheduler.Submit([&](vk::raii::CommandBuffer &commandBuffer) { + auto image{GetBacking()}; + if (layout != vk::ImageLayout::eTransferDstOptimal) { + commandBuffer.pipelineBarrier(layout != vk::ImageLayout::eUndefined ? vk::PipelineStageFlagBits::eTopOfPipe : vk::PipelineStageFlagBits::eBottomOfPipe, vk::PipelineStageFlagBits::eTransfer, {}, {}, {}, vk::ImageMemoryBarrier{ + .image = image, + .srcAccessMask = vk::AccessFlagBits::eMemoryRead | vk::AccessFlagBits::eMemoryWrite, + .dstAccessMask = vk::AccessFlagBits::eTransferWrite, + .oldLayout = layout, + .newLayout = vk::ImageLayout::eTransferDstOptimal, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .subresourceRange = { + .aspectMask = vk::ImageAspectFlagBits::eColor, + .levelCount = 1, + .layerCount = 1, + }, + }); + + if (layout == vk::ImageLayout::eUndefined) + layout = vk::ImageLayout::eTransferDstOptimal; + } + + commandBuffer.copyBufferToImage(stagingBuffer->vkBuffer, image, vk::ImageLayout::eTransferDstOptimal, vk::BufferImageCopy{ + .imageExtent = dimensions, + .imageSubresource = { + .aspectMask = vk::ImageAspectFlagBits::eColor, + .layerCount = 1, + }, + }); + + if (layout != vk::ImageLayout::eTransferDstOptimal) + commandBuffer.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, vk::PipelineStageFlagBits::eTransfer, {}, {}, {}, vk::ImageMemoryBarrier{ + .image = image, + .srcAccessMask = vk::AccessFlagBits::eTransferWrite, + .dstAccessMask = vk::AccessFlagBits::eMemoryRead, + .oldLayout = vk::ImageLayout::eTransferDstOptimal, + .newLayout = layout, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .subresourceRange = { + .aspectMask = vk::ImageAspectFlagBits::eColor, + .levelCount = 1, + .layerCount = 1, + }, + }); + }); + + cycle->AttachObject(stagingBuffer); + } + } + + void Texture::SynchronizeGuest() { + if (!guest) + throw exception("Synchronization of guest textures requires a valid guest texture to synchronize to"); + + WaitOnBacking(); WaitOnFence(); + TRACE_EVENT("gpu", "Texture::SynchronizeGuest"); + // TODO: Write Host -> Guest Synchronization + } + + void Texture::CopyFrom(std::shared_ptr source) { + WaitOnBacking(); + WaitOnFence(); + + source->WaitOnBacking(); + source->WaitOnFence(); + + if (source->layout == vk::ImageLayout::eUndefined) + throw exception("Cannot copy from image with undefined layout"); + else if (source->dimensions != dimensions) + throw exception("Cannot copy from image with different dimensions"); + else if (source->format != format) + throw exception("Cannot copy from image with different format"); + cycle = gpu.scheduler.Submit([&](vk::raii::CommandBuffer &commandBuffer) { - auto image{GetBacking()}; + auto sourceBacking{source->GetBacking()}; + if (source->layout != vk::ImageLayout::eTransferSrcOptimal) { + commandBuffer.pipelineBarrier(vk::PipelineStageFlagBits::eTopOfPipe, vk::PipelineStageFlagBits::eTransfer, {}, {}, {}, vk::ImageMemoryBarrier{ + .image = sourceBacking, + .srcAccessMask = vk::AccessFlagBits::eMemoryRead | vk::AccessFlagBits::eMemoryWrite, + .dstAccessMask = vk::AccessFlagBits::eTransferRead, + .oldLayout = source->layout, + .newLayout = vk::ImageLayout::eTransferSrcOptimal, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .subresourceRange = { + .aspectMask = vk::ImageAspectFlagBits::eColor, + .levelCount = 1, + .layerCount = 1, + }, + }); + } + + auto destinationBacking{GetBacking()}; if (layout != vk::ImageLayout::eTransferDstOptimal) { commandBuffer.pipelineBarrier(layout != vk::ImageLayout::eUndefined ? vk::PipelineStageFlagBits::eTopOfPipe : vk::PipelineStageFlagBits::eBottomOfPipe, vk::PipelineStageFlagBits::eTransfer, {}, {}, {}, vk::ImageMemoryBarrier{ - .image = image, + .image = destinationBacking, .srcAccessMask = vk::AccessFlagBits::eMemoryRead | vk::AccessFlagBits::eMemoryWrite, .dstAccessMask = vk::AccessFlagBits::eTransferWrite, .oldLayout = layout, @@ -178,17 +330,21 @@ namespace skyline::gpu { layout = vk::ImageLayout::eTransferDstOptimal; } - commandBuffer.copyBufferToImage(stagingBuffer->vkBuffer, image, vk::ImageLayout::eTransferDstOptimal, vk::BufferImageCopy{ - .imageExtent = dimensions, - .imageSubresource = { + commandBuffer.copyImage(sourceBacking, vk::ImageLayout::eTransferSrcOptimal, destinationBacking, vk::ImageLayout::eTransferDstOptimal, vk::ImageCopy{ + .srcSubresource = { .aspectMask = vk::ImageAspectFlagBits::eColor, .layerCount = 1, }, + .dstSubresource = { + .aspectMask = vk::ImageAspectFlagBits::eColor, + .layerCount = 1, + }, + .extent = dimensions, }); if (layout != vk::ImageLayout::eTransferDstOptimal) commandBuffer.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, vk::PipelineStageFlagBits::eTransfer, {}, {}, {}, vk::ImageMemoryBarrier{ - .image = image, + .image = destinationBacking, .srcAccessMask = vk::AccessFlagBits::eTransferWrite, .dstAccessMask = vk::AccessFlagBits::eMemoryRead, .oldLayout = vk::ImageLayout::eTransferDstOptimal, @@ -201,16 +357,23 @@ namespace skyline::gpu { .layerCount = 1, }, }); + + if (layout != vk::ImageLayout::eTransferSrcOptimal) + commandBuffer.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, vk::PipelineStageFlagBits::eTransfer, {}, {}, {}, vk::ImageMemoryBarrier{ + .image = sourceBacking, + .srcAccessMask = vk::AccessFlagBits::eTransferRead, + .dstAccessMask = vk::AccessFlagBits::eMemoryWrite, + .oldLayout = vk::ImageLayout::eTransferSrcOptimal, + .newLayout = source->layout, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .subresourceRange = { + .aspectMask = vk::ImageAspectFlagBits::eColor, + .levelCount = 1, + .layerCount = 1, + }, + }); }); - - cycle->AttachObject(stagingBuffer); - } - - void Texture::SynchronizeGuest() { - WaitOnBacking(); - WaitOnFence(); - - TRACE_EVENT("gpu", "Texture::SynchronizeGuest"); - // TODO: Write Host -> Guest Synchronization + cycle->AttachObject(source); } } diff --git a/app/src/main/cpp/skyline/gpu/texture/texture.h b/app/src/main/cpp/skyline/gpu/texture/texture.h index 61807363..4a4bc686 100644 --- a/app/src/main/cpp/skyline/gpu/texture/texture.h +++ b/app/src/main/cpp/skyline/gpu/texture/texture.h @@ -14,13 +14,19 @@ namespace skyline::gpu { constexpr Dimensions() : width(0), height(0), depth(0) {} + constexpr Dimensions(u32 width) : width(width), height(1), depth(1) {} + constexpr Dimensions(u32 width, u32 height) : width(width), height(height), depth(1) {} constexpr Dimensions(u32 width, u32 height, u32 depth) : width(width), height(height), depth(depth) {} + constexpr Dimensions(vk::Extent2D extent) : Dimensions(extent.width, extent.height) {} + + constexpr Dimensions(vk::Extent3D extent) : Dimensions(extent.width, extent.height, extent.depth) {} + auto operator<=>(const Dimensions &) const = default; - vk::ImageType GetType() { + constexpr vk::ImageType GetType() const { if (depth) return vk::ImageType::e3D; else if (width) @@ -29,32 +35,39 @@ namespace skyline::gpu { return vk::ImageType::e1D; } - operator vk::Extent2D() { + constexpr operator vk::Extent2D() const { return vk::Extent2D{ .width = width, .height = height, }; } - operator vk::Extent3D() { + constexpr operator vk::Extent3D() const { return vk::Extent3D{ .width = width, .height = height, .depth = depth, }; } + + /** + * @return If the dimensions are valid and don't equate to zero + */ + constexpr operator bool() const { + return width && height && depth; + } }; /** * @note Blocks refers to the atomic unit of a compressed format (IE: The minimum amount of data that can be decompressed) */ struct Format { - u8 bpb; //!< Bytes Per Block, this is used instead of bytes per pixel as that might not be a whole number for compressed formats - u16 blockHeight; //!< The height of a block in pixels - u16 blockWidth; //!< The width of a block in pixels - vk::Format vkFormat; + u8 bpb{}; //!< Bytes Per Block, this is used instead of bytes per pixel as that might not be a whole number for compressed formats + u16 blockHeight{}; //!< The height of a block in pixels + u16 blockWidth{}; //!< The width of a block in pixels + vk::Format vkFormat{vk::Format::eUndefined}; - constexpr bool IsCompressed() { + constexpr bool IsCompressed() const { return (blockHeight != 1) || (blockWidth != 1); } @@ -64,26 +77,30 @@ namespace skyline::gpu { * @param depth The depth of the texture in layers * @return The size of the texture in bytes */ - constexpr size_t GetSize(u32 width, u32 height, u32 depth = 1) { + constexpr size_t GetSize(u32 width, u32 height, u32 depth = 1) const { return (((width / blockWidth) * (height / blockHeight)) * bpb) * depth; } - constexpr size_t GetSize(Dimensions dimensions) { + constexpr size_t GetSize(Dimensions dimensions) const { return GetSize(dimensions.width, dimensions.height, dimensions.depth); } - constexpr bool operator==(const Format &format) { + constexpr bool operator==(const Format &format) const { return vkFormat == format.vkFormat; } - constexpr bool operator!=(const Format &format) { + constexpr bool operator!=(const Format &format) const { return vkFormat != format.vkFormat; } + constexpr operator vk::Format() const { + return vkFormat; + } + /** * @return If this format is actually valid or not */ - constexpr operator bool() { + constexpr operator bool() const { return bpb; } }; @@ -171,7 +188,7 @@ namespace skyline::gpu { texture::TileMode tileMode; texture::TileConfig tileConfig; - GuestTexture(const DeviceState &state, u8 *pointer, texture::Dimensions dimensions, texture::Format format, texture::TileMode tileMode = texture::TileMode::Linear, texture::TileConfig tileConfig = {}); + GuestTexture(const DeviceState &state, u8 *pointer, texture::Dimensions dimensions, const texture::Format& format, texture::TileMode tileMode = texture::TileMode::Linear, texture::TileConfig tileConfig = {}); constexpr size_t Size() { return format.GetSize(dimensions); @@ -180,32 +197,39 @@ namespace skyline::gpu { /** * @brief Creates a corresponding host texture object for this guest texture * @param backing The Vulkan Image that is used as the backing on the host, its lifetime is not managed by the host texture object + * @param dimensions The dimensions of the host texture (Defaults to the dimensions of the host texture) + * @param format The format of the host texture (Defaults to the format of the guest texture) * @param tiling The tiling used by the image on host, this is the same as guest by default * @param layout The initial layout of the Vulkan Image, this is used for efficient layout management - * @param format The format of the host texture (Defaults to the format of the guest texture) - * @param dimensions The dimensions of the host texture (Defaults to the dimensions of the host texture) * @param swizzle The channel swizzle of the host texture (Defaults to no channel swizzling) * @return A shared pointer to the host texture object * @note There can only be one host texture for a corresponding guest texture + * @note If any of the supplied parameters do not match up with the backing then it's undefined behavior */ - std::shared_ptr InitializeTexture(vk::Image backing, std::optional tiling = std::nullopt, vk::ImageLayout layout = vk::ImageLayout::eUndefined, std::optional format = std::nullopt, std::optional dimensions = std::nullopt, texture::Swizzle swizzle = {}); + std::shared_ptr InitializeTexture(vk::Image backing, texture::Dimensions dimensions = {}, const texture::Format& format = {}, std::optional tiling = std::nullopt, vk::ImageLayout layout = vk::ImageLayout::eUndefined, texture::Swizzle swizzle = {}); /** * @note As a RAII object is used here, the lifetime of the backing is handled by the host texture */ - std::shared_ptr InitializeTexture(vk::raii::Image &&backing, std::optional tiling = std::nullopt, vk::ImageLayout layout = vk::ImageLayout::eUndefined, std::optional format = std::nullopt, std::optional dimensions = std::nullopt, texture::Swizzle swizzle = {}); + std::shared_ptr InitializeTexture(vk::raii::Image &&backing, std::optional tiling = std::nullopt, vk::ImageLayout layout = vk::ImageLayout::eUndefined, const texture::Format& format = {}, texture::Dimensions dimensions = {}, texture::Swizzle swizzle = {}); + + /** + * @brief Similar to InitializeTexture but creation of the backing and allocation of memory for the backing is automatically performed by the function + * @param usage Usage flags that will applied aside from VK_IMAGE_USAGE_TRANSFER_SRC_BIT/VK_IMAGE_USAGE_TRANSFER_DST_BIT which are mandatory + */ + std::shared_ptr CreateTexture(vk::ImageUsageFlags usage = {}, std::optional tiling = std::nullopt, vk::ImageLayout initialLayout = vk::ImageLayout::eGeneral, const texture::Format& format = {}, texture::Dimensions dimensions = {}, texture::Swizzle swizzle = {}); }; /** * @brief A texture which is backed by host constructs while being synchronized with the underlying guest texture * @note This class conforms to the Lockable and BasicLockable C++ named requirements */ - class Texture { + class Texture : public FenceCycleDependency { private: GPU &gpu; std::mutex mutex; //!< Synchronizes any mutations to the texture or its backing std::condition_variable backingCondition; //!< Signalled when a valid backing has been swapped in - using BackingType = std::variant; + using BackingType = std::variant; BackingType backing; //!< The Vulkan image that backs this texture, it is nullable std::shared_ptr cycle; //!< A fence cycle for when any host operation mutating the texture has completed, it must be waited on prior to any mutations to the backing vk::ImageLayout layout; @@ -217,17 +241,26 @@ namespace skyline::gpu { return std::visit(VariantVisitor{ [](vk::Image image) { return image; }, [](const vk::raii::Image &image) { return *image; }, + [](const memory::Image &image) { return image.vkImage; }, }, backing); } public: - std::shared_ptr guest; //!< The guest texture from which this was created, it's required for syncing and not nullable + std::shared_ptr guest; //!< The guest texture from which this was created, it's required for syncing texture::Dimensions dimensions; texture::Format format; vk::ImageTiling tiling; vk::ComponentMapping mapping; - Texture(GPU &gpu, BackingType &&backing, vk::ImageLayout layout, std::shared_ptr guest, texture::Dimensions dimensions, texture::Format format, vk::ImageTiling tiling, vk::ComponentMapping mapping); + Texture(GPU &gpu, BackingType &&backing, std::shared_ptr guest, texture::Dimensions dimensions, const texture::Format& format, vk::ImageLayout layout, vk::ImageTiling tiling, vk::ComponentMapping mapping); + + Texture(GPU &gpu, BackingType &&backing, texture::Dimensions dimensions, const texture::Format& format, vk::ImageLayout layout, vk::ImageTiling tiling, vk::ComponentMapping mapping = {}); + + /** + * @brief Creates and allocates memory for the backing to creates a texture object wrapping it + * @param usage Usage flags that will applied aside from VK_IMAGE_USAGE_TRANSFER_SRC_BIT/VK_IMAGE_USAGE_TRANSFER_DST_BIT which are mandatory + */ + Texture(GPU &gpu, texture::Dimensions dimensions, const texture::Format& format, vk::ImageLayout initialLayout = vk::ImageLayout::eGeneral, vk::ImageUsageFlags usage = {}, vk::ImageTiling tiling = vk::ImageTiling::eOptimal, vk::ComponentMapping mapping = {}); /** * @brief Acquires an exclusive lock on the texture for the calling thread @@ -300,13 +333,20 @@ namespace skyline::gpu { /** * @brief Synchronizes the host texture with the guest after it has been modified * @note The texture **must** be locked prior to calling this + * @note The guest texture should not be null prior to calling this */ void SynchronizeHost(); /** * @brief Synchronizes the guest texture with the host texture after it has been modified * @note The texture **must** be locked prior to calling this + * @note The guest texture should not be null prior to calling this */ void SynchronizeGuest(); + + /** + * @brief Copies the contents of the supplied source texture into the current texture + */ + void CopyFrom(std::shared_ptr source); }; } diff --git a/app/src/main/cpp/skyline/services/hosbinder/GraphicBufferProducer.cpp b/app/src/main/cpp/skyline/services/hosbinder/GraphicBufferProducer.cpp index 18686b2a..f84c9a7e 100644 --- a/app/src/main/cpp/skyline/services/hosbinder/GraphicBufferProducer.cpp +++ b/app/src/main/cpp/skyline/services/hosbinder/GraphicBufferProducer.cpp @@ -14,7 +14,7 @@ #include "GraphicBufferProducer.h" namespace skyline::service::hosbinder { - GraphicBufferProducer::GraphicBufferProducer(const DeviceState &state) : state(state) {} + GraphicBufferProducer::GraphicBufferProducer(const DeviceState &state) : state(state), bufferEvent(std::make_shared(state, true)) {} u8 GraphicBufferProducer::GetPendingBufferCount() { u8 count{}; @@ -45,36 +45,53 @@ namespace skyline::service::hosbinder { return AndroidStatus::BadValue; } - constexpr i32 invalidGraphicBufferSlot{-1}; //!< https://cs.android.com/android/platform/superproject/+/android-5.1.1_r38:frameworks/native/include/gui/BufferQueueCore.h;l=61 - slot = invalidGraphicBufferSlot; + constexpr i32 InvalidGraphicBufferSlot{-1}; //!< https://cs.android.com/android/platform/superproject/+/android-5.1.1_r38:frameworks/native/include/gui/BufferQueueCore.h;l=61 + slot = InvalidGraphicBufferSlot; std::lock_guard guard(mutex); - auto result{state.gpu->presentation.GetFreeTexture(async, slot)}; - if (result != AndroidStatus::Ok) [[unlikely]] { - if (result == AndroidStatus::Busy) - state.logger->Warn("No free buffers to dequeue"); - return result; + auto buffer{queue.end()}; + while (true) { + size_t dequeuedSlotCount{}; + for (auto it{queue.begin()}; it != queue.end(); it++) { + // We want to select the oldest slot that's free to use as we'd want all slots to be used + // If we go linearly then we have a higher preference for selecting the former slots and being out of order + if (it->state == BufferState::Free && it->texture) { + if (buffer == queue.end() || it->frameNumber < buffer->frameNumber) + buffer = it; + else if (it->state == BufferState::Dequeued) + dequeuedSlotCount++; + } + } + + if (buffer != queue.end()) { + slot = std::distance(queue.begin(), buffer); + break; + } else if (async) { + return AndroidStatus::WouldBlock; + } else if (dequeuedSlotCount == queue.size()) { + state.logger->Warn("Client attempting to dequeue more buffers when all buffers are dequeued by the client: {}", dequeuedSlotCount); + return AndroidStatus::InvalidOperation; + } } width = width ? width : defaultWidth; height = height ? height : defaultHeight; format = (format != AndroidPixelFormat::None) ? format : defaultFormat; - auto &buffer{queue.at(slot)}; - if (!buffer.graphicBuffer) { + if (!buffer->graphicBuffer) { // Horizon OS doesn't ever allocate memory for the buffers on the GraphicBufferProducer end // All buffers must be preallocated on the client application and attached to an Android buffer using SetPreallocatedBuffer return AndroidStatus::NoMemory; } - auto &surface{buffer.graphicBuffer->graphicHandle.surfaces.front()}; - if (buffer.graphicBuffer->format != format || surface.width != width || surface.height != height || (buffer.graphicBuffer->usage & usage) != usage) { - state.logger->Warn("Buffer which has been dequeued isn't compatible with the supplied parameters: Dimensions: {}x{}={}x{}, Format: {}={}, Usage: 0x{:X}=0x{:X}", width, height, surface.width, surface.height, ToString(format), ToString(buffer.graphicBuffer->format), usage, buffer.graphicBuffer->usage); + auto &surface{buffer->graphicBuffer->graphicHandle.surfaces.front()}; + if (buffer->graphicBuffer->format != format || surface.width != width || surface.height != height || (buffer->graphicBuffer->usage & usage) != usage) { + state.logger->Warn("Buffer which has been dequeued isn't compatible with the supplied parameters: Dimensions: {}x{}={}x{}, Format: {}={}, Usage: 0x{:X}=0x{:X}", width, height, surface.width, surface.height, ToString(format), ToString(buffer->graphicBuffer->format), usage, buffer->graphicBuffer->usage); // Nintendo doesn't deallocate the slot which was picked in here and reallocate it as a compatible buffer // This is related to the comment above, Nintendo only allocates buffers on the client side return AndroidStatus::NoInit; } - buffer.state = BufferState::Dequeued; + buffer->state = BufferState::Dequeued; fence = AndroidFence{}; // We just let the presentation engine return a buffer which is ready to be written into, there is no need for further synchronization state.logger->Debug("#{} - Dimensions: {}x{}, Format: {}, Usage: 0x{:X}, Is Async: {}", slot, width, height, ToString(format), usage, async); @@ -106,7 +123,7 @@ namespace skyline::service::hosbinder { return AndroidStatus::BadValue; } else if (!buffer.wasBufferRequested) [[unlikely]] { state.logger->Warn("#{} was queued prior to being requested", slot); - return AndroidStatus::BadValue; + buffer.wasBufferRequested = true; // Switch ignores this and doesn't return an error, certain homebrew ends up depending on this behavior } auto graphicBuffer{*buffer.graphicBuffer}; @@ -139,13 +156,16 @@ namespace skyline::service::hosbinder { fence.Wait(state.soc->host1x); { - std::scoped_lock textureLock(*buffer.texture); - buffer.texture->SynchronizeHost(); - buffer.texture->WaitOnFence(); - state.gpu->presentation.Present(slot); - state.gpu->presentation.bufferEvent->Signal(); + auto &texture{buffer.texture}; + std::scoped_lock textureLock(*texture); + texture->SynchronizeHost(); + state.gpu->presentation.Present(texture, ++frameNumber); } + buffer.frameNumber = frameNumber; + buffer.state = BufferState::Free; + bufferEvent->Signal(); + width = defaultWidth; height = defaultHeight; transformHint = state.gpu->presentation.GetTransformHint(); @@ -169,11 +189,10 @@ namespace skyline::service::hosbinder { } fence.Wait(state.soc->host1x); - state.gpu->presentation.Present(slot); // We use a present as a way to free the buffer so that it can be acquired in dequeueBuffer again buffer.state = BufferState::Free; buffer.frameNumber = 0; - state.gpu->presentation.bufferEvent->Signal(); + bufferEvent->Signal(); state.logger->Debug("#{}", slot); } @@ -349,7 +368,7 @@ namespace skyline::service::hosbinder { throw exception("Surface doesn't fit into NvMap mapping of size 0x{:X} when mapped at 0x{:X} -> 0x{:X}", nvBuffer->size, surface.offset, surface.offset + surface.size); gpu::texture::TileMode tileMode; - gpu::texture::TileConfig tileConfig; + gpu::texture::TileConfig tileConfig{}; if (surface.layout == NvSurfaceLayout::Blocklinear) { tileMode = gpu::texture::TileMode::Block; tileConfig = { @@ -373,11 +392,11 @@ namespace skyline::service::hosbinder { buffer.frameNumber = 0; buffer.wasBufferRequested = false; buffer.graphicBuffer = std::make_unique(graphicBuffer); - buffer.texture = state.gpu->presentation.CreatePresentationTexture(texture, slot); + buffer.texture = texture->CreateTexture({}, vk::ImageTiling::eLinear, vk::ImageLayout::eGeneral); activeSlotCount = hasBufferCount = std::count_if(queue.begin(), queue.end(), [](const BufferSlot &slot) { return static_cast(slot.graphicBuffer); }); - state.gpu->presentation.bufferEvent->Signal(); + bufferEvent->Signal(); state.logger->Debug("#{} - Dimensions: {}x{} [Stride: {}], Format: {}, Layout: {}, {}: {}, Usage: 0x{:X}, NvMap {}: {}, Buffer Start/End: 0x{:X} -> 0x{:X}", slot, surface.width, surface.height, handle.stride, ToString(graphicBuffer.format), ToString(surface.layout), surface.layout == NvSurfaceLayout::Blocklinear ? "Block Height" : "Pitch", surface.layout == NvSurfaceLayout::Blocklinear ? 1U << surface.blockHeightLog2 : surface.pitch, graphicBuffer.usage, surface.nvmapHandle ? "Handle" : "ID", surface.nvmapHandle ? surface.nvmapHandle : handle.nvmapId, surface.offset, surface.offset + surface.size); return AndroidStatus::Ok; diff --git a/app/src/main/cpp/skyline/services/hosbinder/GraphicBufferProducer.h b/app/src/main/cpp/skyline/services/hosbinder/GraphicBufferProducer.h index 508bc1e0..d2eba053 100644 --- a/app/src/main/cpp/skyline/services/hosbinder/GraphicBufferProducer.h +++ b/app/src/main/cpp/skyline/services/hosbinder/GraphicBufferProducer.h @@ -5,6 +5,7 @@ #pragma once +#include #include #include "android_types.h" #include "native_window.h" @@ -80,12 +81,10 @@ namespace skyline::service::hosbinder { * @url https://cs.android.com/android/platform/superproject/+/android-5.1.1_r38:frameworks/native/libs/gui/BufferQueueCore.cpp */ class GraphicBufferProducer { - public: - constexpr static u8 MaxSlotCount{16}; //!< The maximum amount of buffer slots that a buffer queue can hold, Android supports 64 but they go unused for applications like games so we've lowered this to 16 (https://cs.android.com/android/platform/superproject/+/android-5.1.1_r38:frameworks/native/include/gui/BufferQueueDefs.h;l=29) - private: const DeviceState &state; std::mutex mutex; //!< Synchronizes access to the buffer queue + constexpr static u8 MaxSlotCount{16}; //!< The maximum amount of buffer slots that a buffer queue can hold, Android supports 64 but they go unused for applications like games so we've lowered this to 16 (https://cs.android.com/android/platform/superproject/+/android-5.1.1_r38:frameworks/native/include/gui/BufferQueueDefs.h;l=29) std::array queue; u8 activeSlotCount{2}; //!< The amount of slots in the queue that can be used u8 hasBufferCount{}; //!< The amount of slots with buffers attached in the queue @@ -93,6 +92,7 @@ namespace skyline::service::hosbinder { u32 defaultHeight{1}; //!< The assumed height of a buffer if none is supplied in DequeueBuffer AndroidPixelFormat defaultFormat{AndroidPixelFormat::RGBA8888}; //!< The assumed format of a buffer if none is supplied in DequeueBuffer NativeWindowApi connectedApi{NativeWindowApi::None}; //!< The API that the producer is currently connected to + u64 frameNumber{}; //!< The amount of frames that have been presented so far /** * @return The amount of buffers which have been queued onto the consumer @@ -156,6 +156,7 @@ namespace skyline::service::hosbinder { AndroidStatus SetPreallocatedBuffer(i32 slot, const GraphicBuffer &graphicBuffer); public: + std::shared_ptr bufferEvent; //!< Signalled every time a buffer in the queue is freed DisplayId displayId{DisplayId::Null}; //!< The ID of this display LayerStatus layerStatus{LayerStatus::Uninitialized}; //!< The status of the single layer the display has diff --git a/app/src/main/cpp/skyline/services/hosbinder/IHOSBinderDriver.cpp b/app/src/main/cpp/skyline/services/hosbinder/IHOSBinderDriver.cpp index 5713c06f..9868eb33 100644 --- a/app/src/main/cpp/skyline/services/hosbinder/IHOSBinderDriver.cpp +++ b/app/src/main/cpp/skyline/services/hosbinder/IHOSBinderDriver.cpp @@ -40,7 +40,7 @@ namespace skyline::service::hosbinder { } Result IHOSBinderDriver::GetNativeHandle(type::KSession &session, ipc::IpcRequest &request, ipc::IpcResponse &response) { - KHandle handle{state.process->InsertItem(state.gpu->presentation.bufferEvent)}; + KHandle handle{state.process->InsertItem(producer->bufferEvent)}; state.logger->Debug("Display Buffer Event Handle: 0x{:X}", handle); response.copyHandles.push_back(handle); diff --git a/app/src/main/cpp/skyline/services/hosbinder/android_types.h b/app/src/main/cpp/skyline/services/hosbinder/android_types.h index f146e8e3..91f29efe 100644 --- a/app/src/main/cpp/skyline/services/hosbinder/android_types.h +++ b/app/src/main/cpp/skyline/services/hosbinder/android_types.h @@ -45,12 +45,18 @@ namespace skyline::service::hosbinder { /** * @brief Nvidia and Nintendo's Android fence implementation, this significantly differs from the Android implementation (All FDs are inlined as integers rather than explicitly passed as FDs) but is a direct replacement * @url https://cs.android.com/android/platform/superproject/+/android-5.1.1_r38:frameworks/native/include/ui/Fence.h + * @url https://cs.android.com/android/platform/superproject/+/android-5.1.1_r38:frameworks/native/libs/ui/Fence.cpp */ struct AndroidFence { u32 fenceCount{}; //!< The amount of active fences in the array std::array fences{}; //!< Nvidia's Android fence can hold a maximum of 4 fence FDs - AndroidFence() : fenceCount(0) {} + static constexpr u32 InvalidFenceId{0xFFFFFFFF}; //!< A magic value for the syncpoint ID of invalid fences (https://cs.android.com/android/platform/superproject/+/android-5.1.1_r38:frameworks/native/include/ui/Fence.h;l=61) + + /** + * @url https://cs.android.com/android/platform/superproject/+/android-5.1.1_r38:frameworks/native/libs/ui/Fence.cpp;l=34-36 + */ + AndroidFence() : fenceCount(0), fences({InvalidFenceId}) {} /** * @brief Wait on all native fences in this Android fence till they're signalled @@ -59,8 +65,8 @@ namespace skyline::service::hosbinder { if (fenceCount > fences.size()) throw exception("Wait has larger fence count ({}) than storage size ({})", fenceCount, fences.size()); for (auto it{fences.begin()}, end{fences.begin() + fenceCount}; it < end; it++) - if (!host1x.syncpoints.at(it->id).Wait(it->value, std::chrono::steady_clock::duration::max())) - throw exception("Waiting on native fence #{} (Host1X Syncpoint: {}) has timed out", std::distance(fences.begin(), it), it->id); + if (it->id != InvalidFenceId) + host1x.syncpoints.at(it->id).Wait(it->value, std::chrono::steady_clock::duration::max()); } }; diff --git a/app/src/main/cpp/skyline/services/visrv/IApplicationDisplayService.h b/app/src/main/cpp/skyline/services/visrv/IApplicationDisplayService.h index 0ad39eb9..a6313a41 100644 --- a/app/src/main/cpp/skyline/services/visrv/IApplicationDisplayService.h +++ b/app/src/main/cpp/skyline/services/visrv/IApplicationDisplayService.h @@ -7,7 +7,7 @@ namespace skyline::service::visrv { /** - * @brief This is used to access the display + * @brief This is used by applications to access the display * @url https://switchbrew.org/wiki/Display_services#IApplicationDisplayService */ class IApplicationDisplayService : public IDisplayService { diff --git a/app/src/main/cpp/skyline/soc/host1x/syncpoint.cpp b/app/src/main/cpp/skyline/soc/host1x/syncpoint.cpp index 9f08bd83..fb6ffab7 100644 --- a/app/src/main/cpp/skyline/soc/host1x/syncpoint.cpp +++ b/app/src/main/cpp/skyline/soc/host1x/syncpoint.cpp @@ -46,8 +46,6 @@ namespace skyline::soc::host1x { std::condition_variable cv; bool flag{}; - if (timeout == std::chrono::steady_clock::duration::max()) - timeout = std::chrono::seconds(1); if (!RegisterWaiter(threshold, [&cv, &mtx, &flag] { std::unique_lock lock(mtx); flag = true; @@ -58,7 +56,12 @@ namespace skyline::soc::host1x { } std::unique_lock lock(mtx); - return cv.wait_for(lock, timeout, [&flag] { return flag; }); + if (timeout == std::chrono::steady_clock::duration::max()) { + cv.wait(lock, [&flag] { return flag; }); + return true; + } else { + return cv.wait_for(lock, timeout, [&flag] { return flag; }); + } } } diff --git a/app/src/main/cpp/skyline/soc/host1x/syncpoint.h b/app/src/main/cpp/skyline/soc/host1x/syncpoint.h index 716b8aff..1d7b5b81 100644 --- a/app/src/main/cpp/skyline/soc/host1x/syncpoint.h +++ b/app/src/main/cpp/skyline/soc/host1x/syncpoint.h @@ -47,6 +47,7 @@ namespace skyline::soc::host1x { /** * @brief Waits for the syncpoint to reach given threshold * @return If the wait was successful (true) or timed out (false) + * @note Guaranteed to succeed when 'steady_clock::duration::max()' is used */ bool Wait(u32 threshold, std::chrono::steady_clock::duration timeout); };