diff --git a/app/CMakeLists.txt b/app/CMakeLists.txt index e96c81cf..4f5c449d 100644 --- a/app/CMakeLists.txt +++ b/app/CMakeLists.txt @@ -90,8 +90,8 @@ add_library(skyline SHARED ${source_DIR}/skyline/gpu.cpp ${source_DIR}/skyline/gpu/memory_manager.cpp ${source_DIR}/skyline/gpu/command_scheduler.cpp + ${source_DIR}/skyline/gpu/texture/texture.cpp ${source_DIR}/skyline/gpu/presentation_engine.cpp - ${source_DIR}/skyline/gpu/texture.cpp ${source_DIR}/skyline/soc/gmmu.cpp ${source_DIR}/skyline/soc/host1x/syncpoint.cpp ${source_DIR}/skyline/soc/gm20b/gpfifo.cpp diff --git a/app/src/main/cpp/skyline/common.h b/app/src/main/cpp/skyline/common.h index 8f00538b..2615ca6b 100644 --- a/app/src/main/cpp/skyline/common.h +++ b/app/src/main/cpp/skyline/common.h @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -444,6 +445,13 @@ namespace skyline { template span(const Container &) -> span; + /** + * @brief A deduction guide for overloads required for std::visit with std::variant + */ + template + struct VariantVisitor : Ts ... { using Ts::operator()...; }; + template VariantVisitor(Ts...) -> VariantVisitor; + /** * @brief A wrapper around writing logs into a log file and logcat using Android Log APIs */ diff --git a/app/src/main/cpp/skyline/gpu/presentation_engine.cpp b/app/src/main/cpp/skyline/gpu/presentation_engine.cpp index f326648a..508887c6 100644 --- a/app/src/main/cpp/skyline/gpu/presentation_engine.cpp +++ b/app/src/main/cpp/skyline/gpu/presentation_engine.cpp @@ -10,7 +10,7 @@ extern skyline::i32 Fps; extern skyline::i32 FrameTime; namespace skyline::gpu { - PresentationEngine::PresentationEngine(const DeviceState &state, const GPU &gpu) : state(state), gpu(gpu), vsyncEvent(std::make_shared(state, true)), bufferEvent(std::make_shared(state, true)), presentationTrack(static_cast(trace::TrackIds::Presentation), perfetto::ProcessTrack::Current()) { + PresentationEngine::PresentationEngine(const DeviceState &state, GPU &gpu) : state(state), gpu(gpu), vsyncEvent(std::make_shared(state, true)), bufferEvent(std::make_shared(state, true)), presentationTrack(static_cast(trace::TrackIds::Presentation), perfetto::ProcessTrack::Current()) { auto desc{presentationTrack.Serialize()}; desc.set_name("Presentation"); perfetto::TrackEvent::SetTrackDescriptor(presentationTrack, desc); @@ -18,8 +18,8 @@ namespace skyline::gpu { PresentationEngine::~PresentationEngine() { auto env{state.jvm->GetEnv()}; - if (!env->IsSameObject(surface, nullptr)) - env->DeleteGlobalRef(surface); + if (!env->IsSameObject(jSurface, nullptr)) + env->DeleteGlobalRef(jSurface); } service::hosbinder::NativeWindowTransform GetAndroidTransform(vk::SurfaceTransformFlagBitsKHR transform) { @@ -45,22 +45,28 @@ namespace skyline::gpu { } } - void PresentationEngine::UpdateSwapchain(u16 imageCount, vk::Format imageFormat, vk::Extent2D imageExtent) { + void PresentationEngine::UpdateSwapchain(u16 imageCount, vk::Format imageFormat, vk::Extent2D imageExtent, bool newSurface) { if (!imageCount) return; + else if (imageCount > service::hosbinder::GraphicBufferProducer::MaxSlotCount) + throw exception("Requesting swapchain with higher image count ({}) than maximum slot count ({})", imageCount, service::hosbinder::GraphicBufferProducer::MaxSlotCount); - auto capabilities{gpu.vkPhysicalDevice.getSurfaceCapabilitiesKHR(**vkSurface)}; + const auto &capabilities{vkSurfaceCapabilities}; if (imageCount < capabilities.minImageCount || (capabilities.maxImageCount && imageCount > capabilities.maxImageCount)) throw exception("Cannot update swapchain to accomodate image count: {} ({}-{})", imageCount, capabilities.minImageCount, capabilities.maxImageCount); if (capabilities.minImageExtent.height > imageExtent.height || capabilities.minImageExtent.width > imageExtent.width || capabilities.maxImageExtent.height < imageExtent.height || capabilities.maxImageExtent.width < imageExtent.width) throw exception("Cannot update swapchain to accomodate image extent: {}x{} ({}x{}-{}x{})", imageExtent.width, imageExtent.height, capabilities.minImageExtent.width, capabilities.minImageExtent.height, capabilities.maxImageExtent.width, capabilities.maxImageExtent.height); + if (swapchain.imageFormat != imageFormat || newSurface) { + auto formats{gpu.vkPhysicalDevice.getSurfaceFormatsKHR(**vkSurface)}; + if (std::find(formats.begin(), formats.end(), vk::SurfaceFormatKHR{imageFormat, vk::ColorSpaceKHR::eSrgbNonlinear}) == formats.end()) + throw exception("Surface doesn't support requested image format '{}' with colorspace '{}'", vk::to_string(imageFormat), vk::to_string(vk::ColorSpaceKHR::eSrgbNonlinear)); + } + constexpr vk::ImageUsageFlags presentUsage{vk::ImageUsageFlagBits::eColorAttachment | vk::ImageUsageFlagBits::eTransferSrc | vk::ImageUsageFlagBits::eTransferDst}; if ((capabilities.supportedUsageFlags & presentUsage) != presentUsage) throw exception("Swapchain doesn't support image usage '{}': {}", vk::to_string(presentUsage), vk::to_string(capabilities.supportedUsageFlags)); - transformHint = GetAndroidTransform(capabilities.currentTransform); - vkSwapchain = vk::raii::SwapchainKHR(gpu.vkDevice, vk::SwapchainCreateInfoKHR{ .surface = **vkSurface, .minImageCount = imageCount, @@ -76,32 +82,56 @@ namespace skyline::gpu { .oldSwapchain = vkSwapchain ? **vkSwapchain : vk::SwapchainKHR{}, }); - swapchain = SwapchainContext{ - .imageCount = imageCount, - .imageFormat = imageFormat, - .imageExtent = imageExtent, - }; + auto vkImages{vkSwapchain->getImages()}; + for (u16 slot{}; slot < imageCount; slot++) { + auto &vkImage{vkImages[slot]}; + swapchain.vkImages[slot] = vkImage; + auto &image{swapchain.textures[slot]}; + if (image) { + std::scoped_lock lock(*image); + image->SwapBacking(vkImage); + image->TransitionLayout(vk::ImageLayout::ePresentSrcKHR); + image->SynchronizeHost(); // Synchronize the new host backing with guest memory + } + } + swapchain.imageCount = imageCount; + swapchain.imageFormat = imageFormat; + swapchain.imageExtent = imageExtent; } void PresentationEngine::UpdateSurface(jobject newSurface) { std::lock_guard guard(mutex); auto env{state.jvm->GetEnv()}; - if (!env->IsSameObject(surface, nullptr)) { - env->DeleteGlobalRef(surface); - surface = nullptr; + if (!env->IsSameObject(jSurface, nullptr)) { + env->DeleteGlobalRef(jSurface); + jSurface = nullptr; } if (!env->IsSameObject(newSurface, nullptr)) - surface = env->NewGlobalRef(newSurface); + jSurface = env->NewGlobalRef(newSurface); - if (surface) { + if (vkSwapchain) { + for (u16 slot{}; slot < swapchain.imageCount; slot++) { + auto &image{swapchain.textures[slot]}; + if (image) { + std::scoped_lock lock(*image); + image->SynchronizeGuest(); // Synchronize host backing to guest memory prior to being destroyed + image->SwapBacking(nullptr); + } + } + swapchain.vkImages = {}; + vkSwapchain.reset(); + } + + if (jSurface) { vkSurface.emplace(gpu.vkInstance, vk::AndroidSurfaceCreateInfoKHR{ - .window = ANativeWindow_fromSurface(env, surface), + .window = ANativeWindow_fromSurface(env, jSurface), }); if (!gpu.vkPhysicalDevice.getSurfaceSupportKHR(gpu.vkQueueFamilyIndex, **vkSurface)) throw exception("Vulkan Queue doesn't support presentation with surface"); + vkSurfaceCapabilities = gpu.vkPhysicalDevice.getSurfaceCapabilitiesKHR(**vkSurface); - UpdateSwapchain(swapchain.imageCount, swapchain.imageFormat, swapchain.imageExtent); + UpdateSwapchain(swapchain.imageCount, swapchain.imageFormat, swapchain.imageExtent, true); surfaceCondition.notify_all(); } else { @@ -109,39 +139,42 @@ namespace skyline::gpu { } } - std::shared_ptr PresentationEngine::CreatePresentationTexture(const std::shared_ptr &texture, u32 slot) { + std::shared_ptr PresentationEngine::CreatePresentationTexture(const std::shared_ptr &texture, u8 slot) { std::lock_guard guard(mutex); - if (swapchain.imageCount <= slot) - UpdateSwapchain(std::max(slot + 1, 2U), texture->format.vkFormat, texture->dimensions); - return texture->InitializeTexture(vk::raii::Image(gpu.vkDevice, vkSwapchain->getImages().at(slot))); + if (swapchain.imageCount <= slot && slot + 1 >= vkSurfaceCapabilities.minImageCount) + UpdateSwapchain(slot + 1, texture->format.vkFormat, texture->dimensions); + auto host{texture->InitializeTexture(swapchain.vkImages.at(slot), vk::ImageTiling::eOptimal)}; + swapchain.textures[slot] = host; + return host; } service::hosbinder::AndroidStatus PresentationEngine::GetFreeTexture(bool async, i32 &slot) { using AndroidStatus = service::hosbinder::AndroidStatus; std::unique_lock lock(mutex); + surfaceCondition.wait(lock, [this]() { return vkSurface.has_value(); }); if (swapchain.dequeuedCount < swapchain.imageCount) { - swapchain.dequeuedCount++; - - vk::raii::Fence fence(state.gpu->vkDevice, vk::FenceCreateInfo{}); + static vk::raii::Fence fence(gpu.vkDevice, vk::FenceCreateInfo{}); auto timeout{async ? 0ULL : std::numeric_limits::max()}; // We cannot block for a buffer to be retrieved in async mode auto nextImage{vkSwapchain->acquireNextImage(timeout, {}, *fence)}; - if (nextImage.first == vk::Result::eTimeout) { + if (nextImage.first == vk::Result::eSuccess) { + swapchain.dequeuedCount++; + while (gpu.vkDevice.waitForFences(*fence, true, std::numeric_limits::max()) == vk::Result::eTimeout); + slot = nextImage.second; + return AndroidStatus::Ok; + } else if (nextImage.first == vk::Result::eNotReady || nextImage.first == vk::Result::eTimeout) { return AndroidStatus::WouldBlock; - } else if (nextImage.first == vk::Result::eErrorSurfaceLostKHR || nextImage.first == vk::Result::eSuboptimalKHR) { + } else if (nextImage.first == vk::Result::eSuboptimalKHR) { surfaceCondition.wait(lock, [this]() { return vkSurface.has_value(); }); return GetFreeTexture(async, slot); + } else { + throw exception("VkAcquireNextImageKHR returned an unhandled result '{}'", vk::to_string(nextImage.first)); } - - gpu.vkDevice.waitForFences(*fence, true, std::numeric_limits::max()); - - slot = nextImage.second; - return AndroidStatus::Ok; } return AndroidStatus::Busy; } - void PresentationEngine::Present(i32 slot) { + void PresentationEngine::Present(u32 slot) { std::unique_lock lock(mutex); surfaceCondition.wait(lock, [this]() { return vkSurface.has_value(); }); @@ -149,6 +182,15 @@ namespace skyline::gpu { throw exception("Swapchain has been presented more times than images from it have been acquired: {} (Image Count: {})", swapchain.dequeuedCount, swapchain.imageCount); } + { + std::lock_guard queueLock(gpu.queueMutex); + static_cast(gpu.vkQueue.presentKHR(vk::PresentInfoKHR{ + .swapchainCount = 1, + .pSwapchains = &**vkSwapchain, + .pImageIndices = &slot, + })); // We explicitly discard the result here as suboptimal images are expected when the game doesn't respect the transform hint + } + vsyncEvent->Signal(); if (frameTimestamp) { @@ -167,8 +209,6 @@ namespace skyline::gpu { service::hosbinder::NativeWindowTransform PresentationEngine::GetTransformHint() { std::unique_lock lock(mutex); surfaceCondition.wait(lock, [this]() { return vkSurface.has_value(); }); - if (!transformHint) - transformHint = GetAndroidTransform(gpu.vkPhysicalDevice.getSurfaceCapabilitiesKHR(**vkSurface).currentTransform); - return *transformHint; + return GetAndroidTransform(vkSurfaceCapabilities.currentTransform); } } diff --git a/app/src/main/cpp/skyline/gpu/presentation_engine.h b/app/src/main/cpp/skyline/gpu/presentation_engine.h index 3aae3700..65a6ee81 100644 --- a/app/src/main/cpp/skyline/gpu/presentation_engine.h +++ b/app/src/main/cpp/skyline/gpu/presentation_engine.h @@ -5,9 +5,8 @@ #include #include -#include -#include -#include "texture.h" +#include +#include "texture/texture.h" struct ANativeWindow; @@ -18,25 +17,35 @@ namespace skyline::gpu { class PresentationEngine { private: const DeviceState &state; - const GPU &gpu; + GPU &gpu; + std::mutex mutex; //!< Synchronizes access to the surface objects std::condition_variable surfaceCondition; //!< Allows us to efficiently wait for Vulkan surface to be initialized - jobject surface{}; //!< The Surface object backing the ANativeWindow + jobject jSurface{}; //!< The Java Surface object backing the ANativeWindow std::optional vkSurface; //!< The Vulkan Surface object that is backed by ANativeWindow - std::optional transformHint; //!< The optimal transform for the application to render as + vk::SurfaceCapabilitiesKHR vkSurfaceCapabilities; //!< The capabilities of the current Vulkan Surface + std::optional vkSwapchain; //!< The Vulkan swapchain and the properties associated with it struct SwapchainContext { - u16 imageCount{}; - i32 dequeuedCount{}; + std::array, service::hosbinder::GraphicBufferProducer::MaxSlotCount> textures{}; + std::array vkImages{VK_NULL_HANDLE}; + u8 imageCount{}; + i8 dequeuedCount{}; vk::Format imageFormat{}; vk::Extent2D imageExtent{}; + + static_assert(std::numeric_limits::max() >= service::hosbinder::GraphicBufferProducer::MaxSlotCount); + static_assert(std::numeric_limits::max() >= service::hosbinder::GraphicBufferProducer::MaxSlotCount); } swapchain; //!< The properties of the currently created swapchain u64 frameTimestamp{}; //!< The timestamp of the last frame being shown perfetto::Track presentationTrack; //!< Perfetto track used for presentation events - void UpdateSwapchain(u16 imageCount, vk::Format imageFormat, vk::Extent2D imageExtent); + /** + * @note 'PresentationEngine::mutex' **must** be locked prior to calling this + */ + void UpdateSwapchain(u16 imageCount, vk::Format imageFormat, vk::Extent2D imageExtent, bool newSurface = false); public: texture::Dimensions resolution{}; @@ -44,7 +53,7 @@ namespace skyline::gpu { std::shared_ptr vsyncEvent; //!< Signalled every time a frame is drawn std::shared_ptr bufferEvent; //!< Signalled every time a buffer is freed - PresentationEngine(const DeviceState &state, const GPU& gpu); + PresentationEngine(const DeviceState &state, GPU &gpu); ~PresentationEngine(); @@ -56,18 +65,18 @@ namespace skyline::gpu { /** * @brief Creates a Texture object from a GuestTexture as a part of the Vulkan swapchain */ - std::shared_ptr CreatePresentationTexture(const std::shared_ptr &texture, u32 slot); + std::shared_ptr CreatePresentationTexture(const std::shared_ptr &texture, u8 slot); /** * @param async If to return immediately when a texture is not available * @param slot The slot the freed texture is in is written into this, it is untouched if there's an error */ - service::hosbinder::AndroidStatus GetFreeTexture(bool async, i32& slot); + service::hosbinder::AndroidStatus GetFreeTexture(bool async, i32 &slot); /** * @brief Send a texture from a slot to the presentation queue to be displayed */ - void Present(i32 slot); + void Present(u32 slot); /** * @return A transform that the application should render with to elide costly transforms later diff --git a/app/src/main/cpp/skyline/gpu/texture.cpp b/app/src/main/cpp/skyline/gpu/texture.cpp deleted file mode 100644 index e70179ea..00000000 --- a/app/src/main/cpp/skyline/gpu/texture.cpp +++ /dev/null @@ -1,88 +0,0 @@ -// SPDX-License-Identifier: MPL-2.0 -// Copyright © 2020 Skyline Team and Contributors (https://github.com/skyline-emu/) - -#include -#include -#include -#include "texture.h" - -namespace skyline::gpu { - GuestTexture::GuestTexture(const DeviceState &state, u8 *pointer, texture::Dimensions dimensions, texture::Format format, texture::TileMode tiling, texture::TileConfig layout) : state(state), pointer(pointer), dimensions(dimensions), format(format), tileMode(tiling), tileConfig(layout) {} - - std::shared_ptr GuestTexture::InitializeTexture(vk::raii::Image &&backing, std::optional pFormat, std::optional pDimensions, texture::Swizzle swizzle) { - if (!host.expired()) - throw exception("Trying to create multiple Texture objects from a single GuestTexture"); - auto sharedHost{std::make_shared(std::move(backing), shared_from_this(), pDimensions ? *pDimensions : dimensions, pFormat ? *pFormat : format, swizzle)}; - host = sharedHost; - return sharedHost; - } - - Texture::Texture(vk::raii::Image&& backing, std::shared_ptr guest, texture::Dimensions dimensions, texture::Format format, texture::Swizzle swizzle) : backing(std::move(backing)), guest(std::move(guest)), dimensions(dimensions), format(format), swizzle(swizzle) { - SynchronizeHost(); - } - - void Texture::SynchronizeHost() { - TRACE_EVENT("gpu", "Texture::SynchronizeHost"); - auto pointer{guest->pointer}; - auto size{format.GetSize(dimensions)}; - u8* output{nullptr}; - return; - - if (guest->tileMode == texture::TileMode::Block) { - // Reference on Block-linear tiling: https://gist.github.com/PixelyIon/d9c35050af0ef5690566ca9f0965bc32 - constexpr u8 sectorWidth{16}; // The width of a sector in bytes - constexpr u8 sectorHeight{2}; // The height of a sector in lines - constexpr u8 gobWidth{64}; // The width of a GOB in bytes - constexpr u8 gobHeight{8}; // The height of a GOB in lines - - auto blockHeight{guest->tileConfig.blockHeight}; // The height of the blocks in GOBs - auto robHeight{gobHeight * blockHeight}; // The height of a single ROB (Row of Blocks) in lines - auto surfaceHeight{dimensions.height / format.blockHeight}; // The height of the surface in lines - auto surfaceHeightRobs{util::AlignUp(surfaceHeight, robHeight) / robHeight}; // The height of the surface in ROBs (Row Of Blocks) - auto robWidthBytes{util::AlignUp((guest->tileConfig.surfaceWidth / format.blockWidth) * format.bpb, gobWidth)}; // The width of a ROB in bytes - auto robWidthBlocks{robWidthBytes / gobWidth}; // The width of a ROB in blocks (and GOBs because block width == 1 on the Tegra X1) - auto robBytes{robWidthBytes * robHeight}; // The size of a ROB in bytes - auto gobYOffset{robWidthBytes * gobHeight}; // The offset of the next Y-axis GOB from the current one in linear space - - auto inputSector{pointer}; // The address of the input sector - auto outputRob{output}; // The address of the output block - - for (u32 rob{}, y{}, paddingY{}; rob < surfaceHeightRobs; rob++) { // Every Surface contains `surfaceHeightRobs` ROBs - auto outputBlock{outputRob}; // We iterate through a block independently of the ROB - for (u32 block{}; block < robWidthBlocks; block++) { // Every ROB contains `surfaceWidthBlocks` Blocks - auto outputGob{outputBlock}; // We iterate through a GOB independently of the block - for (u32 gobY{}; gobY < blockHeight; gobY++) { // Every Block contains `blockHeight` Y-axis GOBs - for (u32 index{}; index < sectorWidth * sectorHeight; index++) { // Every Y-axis GOB contains `sectorWidth * sectorHeight` sectors - u32 xT{((index << 3) & 0b10000) | ((index << 1) & 0b100000)}; // Morton-Swizzle on the X-axis - u32 yT{((index >> 1) & 0b110) | (index & 0b1)}; // Morton-Swizzle on the Y-axis - std::memcpy(outputGob + (yT * robWidthBytes) + xT, inputSector, sectorWidth); - inputSector += sectorWidth; // `sectorWidth` bytes are of sequential image data - } - outputGob += gobYOffset; // Increment the output GOB to the next Y-axis GOB - } - inputSector += paddingY; // Increment the input sector to the next sector - outputBlock += gobWidth; // Increment the output block to the next block (As Block Width = 1 GOB Width) - } - outputRob += robBytes; // Increment the output block to the next ROB - - y += robHeight; // Increment the Y position to the next ROB - blockHeight = static_cast(std::min(static_cast(blockHeight), (surfaceHeight - y) / gobHeight)); // Calculate the amount of Y GOBs which aren't padding - paddingY = (guest->tileConfig.blockHeight - blockHeight) * (sectorWidth * sectorWidth * sectorHeight); // Calculate the amount of padding between contiguous sectors - } - } else if (guest->tileMode == texture::TileMode::Pitch) { - auto sizeLine{guest->format.GetSize(dimensions.width, 1)}; // The size of a single line of pixel data - auto sizeStride{guest->format.GetSize(guest->tileConfig.pitch, 1)}; // The size of a single stride of pixel data - - auto inputLine{pointer}; // The address of the input line - auto outputLine{output}; // The address of the output line - - for (u32 line{}; line < dimensions.height; line++) { - std::memcpy(outputLine, inputLine, sizeLine); - inputLine += sizeStride; - outputLine += sizeLine; - } - } else if (guest->tileMode == texture::TileMode::Linear) { - std::memcpy(output, pointer, size); - } - } -} diff --git a/app/src/main/cpp/skyline/gpu/format.h b/app/src/main/cpp/skyline/gpu/texture/format.h similarity index 100% rename from app/src/main/cpp/skyline/gpu/format.h rename to app/src/main/cpp/skyline/gpu/texture/format.h diff --git a/app/src/main/cpp/skyline/gpu/texture/texture.cpp b/app/src/main/cpp/skyline/gpu/texture/texture.cpp new file mode 100644 index 00000000..c693f762 --- /dev/null +++ b/app/src/main/cpp/skyline/gpu/texture/texture.cpp @@ -0,0 +1,216 @@ +// SPDX-License-Identifier: MPL-2.0 +// Copyright © 2020 Skyline Team and Contributors (https://github.com/skyline-emu/) + +#include +#include +#include +#include "texture.h" + +namespace skyline::gpu { + GuestTexture::GuestTexture(const DeviceState &state, u8 *pointer, texture::Dimensions dimensions, texture::Format format, texture::TileMode tiling, texture::TileConfig layout) : state(state), pointer(pointer), dimensions(dimensions), format(format), tileMode(tiling), tileConfig(layout) {} + + std::shared_ptr GuestTexture::InitializeTexture(vk::Image backing, std::optional tiling, vk::ImageLayout pLayout, std::optional pFormat, std::optional pDimensions, texture::Swizzle swizzle) { + if (!host.expired()) + throw exception("Trying to create multiple Texture objects from a single GuestTexture"); + auto sharedHost{std::make_shared(*state.gpu, backing, pLayout, shared_from_this(), pDimensions ? *pDimensions : dimensions, pFormat ? *pFormat : format, tiling ? *tiling : (tileMode == texture::TileMode::Block) ? vk::ImageTiling::eOptimal : vk::ImageTiling::eLinear, swizzle)}; + host = sharedHost; + return sharedHost; + } + + std::shared_ptr GuestTexture::InitializeTexture(vk::raii::Image &&backing, std::optional tiling, vk::ImageLayout pLayout, std::optional pFormat, std::optional pDimensions, texture::Swizzle swizzle) { + if (!host.expired()) + throw exception("Trying to create multiple Texture objects from a single GuestTexture"); + auto sharedHost{std::make_shared(*state.gpu, std::move(backing), pLayout, shared_from_this(), pDimensions ? *pDimensions : dimensions, pFormat ? *pFormat : format, tiling ? *tiling : (tileMode == texture::TileMode::Block) ? vk::ImageTiling::eOptimal : vk::ImageTiling::eLinear, swizzle)}; + host = sharedHost; + return sharedHost; + } + + Texture::Texture(GPU &gpu, BackingType &&backing, vk::ImageLayout layout, std::shared_ptr guest, texture::Dimensions dimensions, texture::Format format, vk::ImageTiling tiling, vk::ComponentMapping mapping) : gpu(gpu), backing(std::move(backing)), layout(layout), guest(std::move(guest)), dimensions(dimensions), format(format), tiling(tiling), mapping(mapping) { + if (GetBacking()) + SynchronizeHost(); + } + + bool Texture::WaitOnBacking() { + if (GetBacking()) [[likely]] { + return false; + } else { + std::unique_lock lock(mutex, std::adopt_lock); + backingCondition.wait(lock, [&]() -> bool { return GetBacking(); }); + lock.release(); + return true; + } + } + + void Texture::WaitOnFence() { + if (cycle) { + cycle->Wait(); + cycle.reset(); + } + } + + void Texture::SwapBacking(BackingType &&pBacking, vk::ImageLayout pLayout) { + WaitOnFence(); + + backing = std::move(pBacking); + layout = pLayout; + if (GetBacking()) + backingCondition.notify_all(); + } + + void Texture::TransitionLayout(vk::ImageLayout pLayout) { + WaitOnBacking(); + WaitOnFence(); + + if (layout != pLayout) { + cycle = gpu.scheduler.Submit([&](vk::raii::CommandBuffer &commandBuffer) { + commandBuffer.pipelineBarrier(layout != vk::ImageLayout::eUndefined ? vk::PipelineStageFlagBits::eTopOfPipe : vk::PipelineStageFlagBits::eBottomOfPipe, vk::PipelineStageFlagBits::eBottomOfPipe, {}, {}, {}, vk::ImageMemoryBarrier{ + .image = GetBacking(), + .srcAccessMask = vk::AccessFlagBits::eMemoryWrite, + .dstAccessMask = vk::AccessFlagBits::eMemoryRead, + .oldLayout = layout, + .newLayout = pLayout, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .subresourceRange = { + .aspectMask = vk::ImageAspectFlagBits::eColor, + .levelCount = 1, + .layerCount = 1, + }, + }); + }); + layout = pLayout; + } + } + + void Texture::SynchronizeHost() { + TRACE_EVENT("gpu", "Texture::SynchronizeHost"); + auto pointer{guest->pointer}; + auto size{format.GetSize(dimensions)}; + + auto stagingBuffer{[&]() { + if (tiling == vk::ImageTiling::eOptimal) { + return gpu.memory.AllocateStagingBuffer(size); + } else { + throw exception("Guest -> Host synchronization of images tiled as '{}' isn't implemented", vk::to_string(tiling)); + } + }()}; + + if (guest->tileMode == texture::TileMode::Block) { + // Reference on Block-linear tiling: https://gist.github.com/PixelyIon/d9c35050af0ef5690566ca9f0965bc32 + constexpr u8 sectorWidth{16}; // The width of a sector in bytes + constexpr u8 sectorHeight{2}; // The height of a sector in lines + constexpr u8 gobWidth{64}; // The width of a GOB in bytes + constexpr u8 gobHeight{8}; // The height of a GOB in lines + + auto blockHeight{guest->tileConfig.blockHeight}; // The height of the blocks in GOBs + auto robHeight{gobHeight * blockHeight}; // The height of a single ROB (Row of Blocks) in lines + auto surfaceHeight{dimensions.height / guest->format.blockHeight}; // The height of the surface in lines + auto surfaceHeightRobs{util::AlignUp(surfaceHeight, robHeight) / robHeight}; // The height of the surface in ROBs (Row Of Blocks) + auto robWidthBytes{util::AlignUp((guest->tileConfig.surfaceWidth / guest->format.blockWidth) * guest->format.bpb, gobWidth)}; // The width of a ROB in bytes + auto robWidthBlocks{robWidthBytes / gobWidth}; // The width of a ROB in blocks (and GOBs because block width == 1 on the Tegra X1) + auto robBytes{robWidthBytes * robHeight}; // The size of a ROB in bytes + auto gobYOffset{robWidthBytes * gobHeight}; // The offset of the next Y-axis GOB from the current one in linear space + + auto inputSector{pointer}; // The address of the input sector + auto outputRob{stagingBuffer->data()}; // The address of the output block + + for (u32 rob{}, y{}, paddingY{}; rob < surfaceHeightRobs; rob++) { // Every Surface contains `surfaceHeightRobs` ROBs + auto outputBlock{outputRob}; // We iterate through a block independently of the ROB + for (u32 block{}; block < robWidthBlocks; block++) { // Every ROB contains `surfaceWidthBlocks` Blocks + auto outputGob{outputBlock}; // We iterate through a GOB independently of the block + for (u32 gobY{}; gobY < blockHeight; gobY++) { // Every Block contains `blockHeight` Y-axis GOBs + for (u32 index{}; index < sectorWidth * sectorHeight; index++) { // Every Y-axis GOB contains `sectorWidth * sectorHeight` sectors + u32 xT{((index << 3) & 0b10000) | ((index << 1) & 0b100000)}; // Morton-Swizzle on the X-axis + u32 yT{((index >> 1) & 0b110) | (index & 0b1)}; // Morton-Swizzle on the Y-axis + std::memcpy(outputGob + (yT * robWidthBytes) + xT, inputSector, sectorWidth); + inputSector += sectorWidth; // `sectorWidth` bytes are of sequential image data + } + outputGob += gobYOffset; // Increment the output GOB to the next Y-axis GOB + } + inputSector += paddingY; // Increment the input sector to the next sector + outputBlock += gobWidth; // Increment the output block to the next block (As Block Width = 1 GOB Width) + } + outputRob += robBytes; // Increment the output block to the next ROB + + y += robHeight; // Increment the Y position to the next ROB + blockHeight = static_cast(std::min(static_cast(blockHeight), (surfaceHeight - y) / gobHeight)); // Calculate the amount of Y GOBs which aren't padding + paddingY = (guest->tileConfig.blockHeight - blockHeight) * (sectorWidth * sectorWidth * sectorHeight); // Calculate the amount of padding between contiguous sectors + } + } else if (guest->tileMode == texture::TileMode::Pitch) { + auto sizeLine{guest->format.GetSize(dimensions.width, 1)}; // The size of a single line of pixel data + auto sizeStride{guest->format.GetSize(guest->tileConfig.pitch, 1)}; // The size of a single stride of pixel data + + auto inputLine{pointer}; // The address of the input line + auto outputLine{stagingBuffer->data()}; // The address of the output line + + for (u32 line{}; line < dimensions.height; line++) { + std::memcpy(outputLine, inputLine, sizeLine); + inputLine += sizeStride; + outputLine += sizeLine; + } + } else if (guest->tileMode == texture::TileMode::Linear) { + std::memcpy(stagingBuffer->data(), pointer, size); + } + + if (WaitOnBacking() && size != format.GetSize(dimensions)) + throw exception("Backing properties changing during sync is not supported"); + WaitOnFence(); + + cycle = gpu.scheduler.Submit([&](vk::raii::CommandBuffer &commandBuffer) { + auto image{GetBacking()}; + if (layout != vk::ImageLayout::eTransferDstOptimal) { + commandBuffer.pipelineBarrier(layout != vk::ImageLayout::eUndefined ? vk::PipelineStageFlagBits::eTopOfPipe : vk::PipelineStageFlagBits::eBottomOfPipe, vk::PipelineStageFlagBits::eTransfer, {}, {}, {}, vk::ImageMemoryBarrier{ + .image = image, + .srcAccessMask = vk::AccessFlagBits::eMemoryRead | vk::AccessFlagBits::eMemoryWrite, + .dstAccessMask = vk::AccessFlagBits::eTransferWrite, + .oldLayout = layout, + .newLayout = vk::ImageLayout::eTransferDstOptimal, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .subresourceRange = { + .aspectMask = vk::ImageAspectFlagBits::eColor, + .levelCount = 1, + .layerCount = 1, + }, + }); + + if (layout == vk::ImageLayout::eUndefined) + layout = vk::ImageLayout::eTransferDstOptimal; + } + + commandBuffer.copyBufferToImage(stagingBuffer->vkBuffer, image, vk::ImageLayout::eTransferDstOptimal, vk::BufferImageCopy{ + .imageExtent = dimensions, + .imageSubresource = { + .aspectMask = vk::ImageAspectFlagBits::eColor, + .layerCount = 1, + }, + }); + + if (layout != vk::ImageLayout::eTransferDstOptimal) + commandBuffer.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, vk::PipelineStageFlagBits::eTransfer, {}, {}, {}, vk::ImageMemoryBarrier{ + .image = image, + .srcAccessMask = vk::AccessFlagBits::eTransferWrite, + .dstAccessMask = vk::AccessFlagBits::eMemoryRead, + .oldLayout = vk::ImageLayout::eTransferDstOptimal, + .newLayout = layout, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .subresourceRange = { + .aspectMask = vk::ImageAspectFlagBits::eColor, + .levelCount = 1, + .layerCount = 1, + }, + }); + }); + + cycle->AttachObject(stagingBuffer); + } + + void Texture::SynchronizeGuest() { + WaitOnBacking(); + WaitOnFence(); + + TRACE_EVENT("gpu", "Texture::SynchronizeGuest"); + // TODO: Write Host -> Guest Synchronization + } +} diff --git a/app/src/main/cpp/skyline/gpu/texture.h b/app/src/main/cpp/skyline/gpu/texture/texture.h similarity index 59% rename from app/src/main/cpp/skyline/gpu/texture.h rename to app/src/main/cpp/skyline/gpu/texture/texture.h index cae7ae1d..61807363 100644 --- a/app/src/main/cpp/skyline/gpu/texture.h +++ b/app/src/main/cpp/skyline/gpu/texture/texture.h @@ -3,8 +3,7 @@ #pragma once -#include -#include +#include namespace skyline::gpu { namespace texture { @@ -111,7 +110,7 @@ namespace skyline::gpu { u32 pitch; //!< The pitch of the texture if it's pitch linear }; - enum class SwizzleChannel { + enum class SwizzleChannel : u8 { Zero, //!< Write 0 to the channel One, //!< Write 1 to the channel Red, //!< Red color channel @@ -125,6 +124,32 @@ namespace skyline::gpu { SwizzleChannel green{SwizzleChannel::Green}; //!< Swizzle for the green channel SwizzleChannel blue{SwizzleChannel::Blue}; //!< Swizzle for the blue channel SwizzleChannel alpha{SwizzleChannel::Alpha}; //!< Swizzle for the alpha channel + + constexpr operator vk::ComponentMapping() { + auto swizzleConvert{[](SwizzleChannel channel) { + switch (channel) { + case SwizzleChannel::Zero: + return vk::ComponentSwizzle::eZero; + case SwizzleChannel::One: + return vk::ComponentSwizzle::eOne; + case SwizzleChannel::Red: + return vk::ComponentSwizzle::eR; + case SwizzleChannel::Green: + return vk::ComponentSwizzle::eG; + case SwizzleChannel::Blue: + return vk::ComponentSwizzle::eB; + case SwizzleChannel::Alpha: + return vk::ComponentSwizzle::eA; + } + }}; + + return vk::ComponentMapping{ + .r = swizzleConvert(red), + .g = swizzleConvert(green), + .b = swizzleConvert(blue), + .a = swizzleConvert(alpha), + }; + } }; } @@ -154,29 +179,101 @@ namespace skyline::gpu { /** * @brief Creates a corresponding host texture object for this guest texture - * @param backing The Vulkan Image that is used as the backing on the host + * @param backing The Vulkan Image that is used as the backing on the host, its lifetime is not managed by the host texture object + * @param tiling The tiling used by the image on host, this is the same as guest by default + * @param layout The initial layout of the Vulkan Image, this is used for efficient layout management * @param format The format of the host texture (Defaults to the format of the guest texture) * @param dimensions The dimensions of the host texture (Defaults to the dimensions of the host texture) * @param swizzle The channel swizzle of the host texture (Defaults to no channel swizzling) * @return A shared pointer to the host texture object * @note There can only be one host texture for a corresponding guest texture */ - std::shared_ptr InitializeTexture(vk::raii::Image &&backing, std::optional format = std::nullopt, std::optional dimensions = std::nullopt, texture::Swizzle swizzle = {}); + std::shared_ptr InitializeTexture(vk::Image backing, std::optional tiling = std::nullopt, vk::ImageLayout layout = vk::ImageLayout::eUndefined, std::optional format = std::nullopt, std::optional dimensions = std::nullopt, texture::Swizzle swizzle = {}); + + /** + * @note As a RAII object is used here, the lifetime of the backing is handled by the host texture + */ + std::shared_ptr InitializeTexture(vk::raii::Image &&backing, std::optional tiling = std::nullopt, vk::ImageLayout layout = vk::ImageLayout::eUndefined, std::optional format = std::nullopt, std::optional dimensions = std::nullopt, texture::Swizzle swizzle = {}); }; /** * @brief A texture which is backed by host constructs while being synchronized with the underlying guest texture + * @note This class conforms to the Lockable and BasicLockable C++ named requirements */ class Texture { - public: - vk::raii::Image backing; //!< The object that holds a host copy of the guest texture - std::shared_ptr guest; //!< The guest texture from which this was created, it's required for syncing - texture::Dimensions dimensions; - texture::Format format; - texture::Swizzle swizzle; + private: + GPU &gpu; + std::mutex mutex; //!< Synchronizes any mutations to the texture or its backing + std::condition_variable backingCondition; //!< Signalled when a valid backing has been swapped in + using BackingType = std::variant; + BackingType backing; //!< The Vulkan image that backs this texture, it is nullable + std::shared_ptr cycle; //!< A fence cycle for when any host operation mutating the texture has completed, it must be waited on prior to any mutations to the backing + vk::ImageLayout layout; + + /** + * @note The handle returned is nullable and the appropriate precautions should be taken + */ + constexpr vk::Image GetBacking() { + return std::visit(VariantVisitor{ + [](vk::Image image) { return image; }, + [](const vk::raii::Image &image) { return *image; }, + }, backing); + } public: - Texture(vk::raii::Image &&backing, std::shared_ptr guest, texture::Dimensions dimensions, texture::Format format, texture::Swizzle swizzle); + std::shared_ptr guest; //!< The guest texture from which this was created, it's required for syncing and not nullable + texture::Dimensions dimensions; + texture::Format format; + vk::ImageTiling tiling; + vk::ComponentMapping mapping; + + Texture(GPU &gpu, BackingType &&backing, vk::ImageLayout layout, std::shared_ptr guest, texture::Dimensions dimensions, texture::Format format, vk::ImageTiling tiling, vk::ComponentMapping mapping); + + /** + * @brief Acquires an exclusive lock on the texture for the calling thread + */ + void lock() { + mutex.lock(); + } + + /** + * @brief Relinquishes an existing lock on the texture by the calling thread + */ + void unlock() { + mutex.unlock(); + } + + /** + * @brief Attempts to acquire an exclusive lock but returns immediately if it's captured by another thread + */ + bool try_lock() { + return mutex.try_lock(); + } + + /** + * @brief Waits on the texture backing to be a valid non-null Vulkan image + * @return If the mutex could be unlocked during the function + * @note The texture **must** be locked prior to calling this + */ + bool WaitOnBacking(); + + /** + * @brief Waits on a fence cycle if it exists till it's signalled and resets it after + * @note The texture **must** be locked prior to calling this + */ + void WaitOnFence(); + + /** + * @note All memory residing in the current backing is not copied to the new backing, it must be handled externally + * @note The texture **must** be locked prior to calling this + */ + void SwapBacking(BackingType &&backing, vk::ImageLayout layout = vk::ImageLayout::eUndefined); + + /** + * @brief Transitions the backing to the supplied layout, if the backing already is in this layout then this does nothing + * @note The texture **must** be locked prior to calling this + */ + void TransitionLayout(vk::ImageLayout layout); /** * @brief Convert this texture to the specified tiling mode @@ -202,11 +299,13 @@ namespace skyline::gpu { /** * @brief Synchronizes the host texture with the guest after it has been modified + * @note The texture **must** be locked prior to calling this */ void SynchronizeHost(); /** * @brief Synchronizes the guest texture with the host texture after it has been modified + * @note The texture **must** be locked prior to calling this */ void SynchronizeGuest(); }; diff --git a/app/src/main/cpp/skyline/kernel/ipc.cpp b/app/src/main/cpp/skyline/kernel/ipc.cpp index 06d00930..832e2b3b 100644 --- a/app/src/main/cpp/skyline/kernel/ipc.cpp +++ b/app/src/main/cpp/skyline/kernel/ipc.cpp @@ -183,6 +183,6 @@ namespace skyline::kernel::ipc { } } - state.logger->Verbose("Output: Raw Size: {}, Command ID: 0x{:X}, Copy Handles: {}, Move Handles: {}", static_cast(header->rawSize), static_cast(payloadHeader->value), copyHandles.size(), moveHandles.size()); + state.logger->Verbose("Output: Raw Size: {}, Result: 0x{:X}, Copy Handles: {}, Move Handles: {}", static_cast(header->rawSize), static_cast(payloadHeader->value), copyHandles.size(), moveHandles.size()); } } diff --git a/app/src/main/cpp/skyline/services/hosbinder/GraphicBufferProducer.cpp b/app/src/main/cpp/skyline/services/hosbinder/GraphicBufferProducer.cpp index a28b9624..18686b2a 100644 --- a/app/src/main/cpp/skyline/services/hosbinder/GraphicBufferProducer.cpp +++ b/app/src/main/cpp/skyline/services/hosbinder/GraphicBufferProducer.cpp @@ -5,7 +5,7 @@ #include #include -#include +#include #include #include #include @@ -137,9 +137,14 @@ namespace skyline::service::hosbinder { throw exception("Any non-identity sticky transform is not supported: '{}' ({:#b})", ToString(stickyTransform), static_cast(stickyTransform)); fence.Wait(state.soc->host1x); - buffer.texture->SynchronizeHost(); - state.gpu->presentation.Present(slot); - state.gpu->presentation.bufferEvent->Signal(); + + { + std::scoped_lock textureLock(*buffer.texture); + buffer.texture->SynchronizeHost(); + buffer.texture->WaitOnFence(); + state.gpu->presentation.Present(slot); + state.gpu->presentation.bufferEvent->Signal(); + } width = defaultWidth; height = defaultHeight; @@ -345,14 +350,14 @@ namespace skyline::service::hosbinder { gpu::texture::TileMode tileMode; gpu::texture::TileConfig tileConfig; - if (surface.layout != NvSurfaceLayout::Blocklinear) { + if (surface.layout == NvSurfaceLayout::Blocklinear) { tileMode = gpu::texture::TileMode::Block; tileConfig = { .surfaceWidth = static_cast(surface.width), .blockHeight = static_cast(1U << surface.blockHeightLog2), .blockDepth = 1, }; - } else if (surface.layout != NvSurfaceLayout::Pitch) { + } else if (surface.layout == NvSurfaceLayout::Pitch) { tileMode = gpu::texture::TileMode::Pitch; tileConfig = { .pitch = surface.pitch, @@ -408,12 +413,13 @@ namespace skyline::service::hosbinder { auto queueBufferInputSize{in.Pop()}; if (queueBufferInputSize != QueueBufferInputSize) throw exception("The size of QueueBufferInput in the Parcel (0x{:X}) doesn't match the expected size (0x{:X})", queueBufferInputSize, QueueBufferInputSize); - QueueBuffer(slot, in.Pop(), in.Pop(), in.Pop(), in.Pop(), in.Pop(), in.Pop(), in.Pop(), in.Pop(), in.Pop(), width, height, transformHint, pendingBufferCount); + auto result{QueueBuffer(slot, in.Pop(), in.Pop(), in.Pop(), in.Pop(), in.Pop(), in.Pop(), in.Pop(), in.Pop(), in.Pop(), width, height, transformHint, pendingBufferCount)}; out.Push(width); out.Push(height); out.Push(transformHint); out.Push(pendingBufferCount); + out.Push(result); break; } diff --git a/app/src/main/cpp/skyline/services/hosbinder/GraphicBufferProducer.h b/app/src/main/cpp/skyline/services/hosbinder/GraphicBufferProducer.h index 9d27295d..508bc1e0 100644 --- a/app/src/main/cpp/skyline/services/hosbinder/GraphicBufferProducer.h +++ b/app/src/main/cpp/skyline/services/hosbinder/GraphicBufferProducer.h @@ -80,10 +80,12 @@ namespace skyline::service::hosbinder { * @url https://cs.android.com/android/platform/superproject/+/android-5.1.1_r38:frameworks/native/libs/gui/BufferQueueCore.cpp */ class GraphicBufferProducer { + public: + constexpr static u8 MaxSlotCount{16}; //!< The maximum amount of buffer slots that a buffer queue can hold, Android supports 64 but they go unused for applications like games so we've lowered this to 16 (https://cs.android.com/android/platform/superproject/+/android-5.1.1_r38:frameworks/native/include/gui/BufferQueueDefs.h;l=29) + private: const DeviceState &state; std::mutex mutex; //!< Synchronizes access to the buffer queue - constexpr static u8 MaxSlotCount{16}; //!< The maximum amount of buffer slots that a buffer queue can hold, Android supports 64 but they go unused for applications like games so we've lowered this to 16 std::array queue; u8 activeSlotCount{2}; //!< The amount of slots in the queue that can be used u8 hasBufferCount{}; //!< The amount of slots with buffers attached in the queue diff --git a/app/src/main/java/emu/skyline/EmulationActivity.kt b/app/src/main/java/emu/skyline/EmulationActivity.kt index 62efc493..22c6d645 100644 --- a/app/src/main/java/emu/skyline/EmulationActivity.kt +++ b/app/src/main/java/emu/skyline/EmulationActivity.kt @@ -212,7 +212,7 @@ class EmulationActivity : AppCompatActivity(), SurfaceHolder.Callback, View.OnTo } @Suppress("DEPRECATION") val display = if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.R) display!! else windowManager.defaultDisplay - display?.supportedModes?.maxByOrNull { it.refreshRate + (it.physicalHeight * it.physicalWidth) }?.let { window.attributes.preferredDisplayModeId = it.modeId } + display?.supportedModes?.maxByOrNull { it.refreshRate * it.physicalHeight * it.physicalWidth }?.let { window.attributes.preferredDisplayModeId = it.modeId } binding.gameView.setOnTouchListener(this)