Redesign Texture Class + Improve Presentation Engine

This commit reworks the `Texture` class to include a Vulkan Image backing that can be optionally owning or non-owning and swapped in with consideration for Vulkan image layout, it also adds CPU-sided synchronization for the texture objects with FenceCycle. It also makes the appropriate changes to `PresentationEngine` and `GraphicBufferProducer` to work with the new `Texture` class while setting the groundwork for supporting swapchain recreation. It also fixes a log in `IpcResponse` and improves the display mode selection algorithm by further weighing refresh rate.
This commit is contained in:
PixelyIon 2021-05-22 23:16:28 +05:30
parent 0bfddc1b0d
commit b4799f612c
12 changed files with 453 additions and 161 deletions

View File

@ -90,8 +90,8 @@ add_library(skyline SHARED
${source_DIR}/skyline/gpu.cpp
${source_DIR}/skyline/gpu/memory_manager.cpp
${source_DIR}/skyline/gpu/command_scheduler.cpp
${source_DIR}/skyline/gpu/texture/texture.cpp
${source_DIR}/skyline/gpu/presentation_engine.cpp
${source_DIR}/skyline/gpu/texture.cpp
${source_DIR}/skyline/soc/gmmu.cpp
${source_DIR}/skyline/soc/host1x/syncpoint.cpp
${source_DIR}/skyline/soc/gm20b/gpfifo.cpp

View File

@ -21,6 +21,7 @@
#include <sstream>
#include <memory>
#include <compare>
#include <variant>
#include <sys/mman.h>
#include <fmt/format.h>
#include <frozen/unordered_map.h>
@ -444,6 +445,13 @@ namespace skyline {
template<typename Container>
span(const Container &) -> span<const typename Container::value_type>;
/**
* @brief A deduction guide for overloads required for std::visit with std::variant
*/
template<class... Ts>
struct VariantVisitor : Ts ... { using Ts::operator()...; };
template<class... Ts> VariantVisitor(Ts...) -> VariantVisitor<Ts...>;
/**
* @brief A wrapper around writing logs into a log file and logcat using Android Log APIs
*/

View File

@ -10,7 +10,7 @@ extern skyline::i32 Fps;
extern skyline::i32 FrameTime;
namespace skyline::gpu {
PresentationEngine::PresentationEngine(const DeviceState &state, const GPU &gpu) : state(state), gpu(gpu), vsyncEvent(std::make_shared<kernel::type::KEvent>(state, true)), bufferEvent(std::make_shared<kernel::type::KEvent>(state, true)), presentationTrack(static_cast<u64>(trace::TrackIds::Presentation), perfetto::ProcessTrack::Current()) {
PresentationEngine::PresentationEngine(const DeviceState &state, GPU &gpu) : state(state), gpu(gpu), vsyncEvent(std::make_shared<kernel::type::KEvent>(state, true)), bufferEvent(std::make_shared<kernel::type::KEvent>(state, true)), presentationTrack(static_cast<u64>(trace::TrackIds::Presentation), perfetto::ProcessTrack::Current()) {
auto desc{presentationTrack.Serialize()};
desc.set_name("Presentation");
perfetto::TrackEvent::SetTrackDescriptor(presentationTrack, desc);
@ -18,8 +18,8 @@ namespace skyline::gpu {
PresentationEngine::~PresentationEngine() {
auto env{state.jvm->GetEnv()};
if (!env->IsSameObject(surface, nullptr))
env->DeleteGlobalRef(surface);
if (!env->IsSameObject(jSurface, nullptr))
env->DeleteGlobalRef(jSurface);
}
service::hosbinder::NativeWindowTransform GetAndroidTransform(vk::SurfaceTransformFlagBitsKHR transform) {
@ -45,22 +45,28 @@ namespace skyline::gpu {
}
}
void PresentationEngine::UpdateSwapchain(u16 imageCount, vk::Format imageFormat, vk::Extent2D imageExtent) {
void PresentationEngine::UpdateSwapchain(u16 imageCount, vk::Format imageFormat, vk::Extent2D imageExtent, bool newSurface) {
if (!imageCount)
return;
else if (imageCount > service::hosbinder::GraphicBufferProducer::MaxSlotCount)
throw exception("Requesting swapchain with higher image count ({}) than maximum slot count ({})", imageCount, service::hosbinder::GraphicBufferProducer::MaxSlotCount);
auto capabilities{gpu.vkPhysicalDevice.getSurfaceCapabilitiesKHR(**vkSurface)};
const auto &capabilities{vkSurfaceCapabilities};
if (imageCount < capabilities.minImageCount || (capabilities.maxImageCount && imageCount > capabilities.maxImageCount))
throw exception("Cannot update swapchain to accomodate image count: {} ({}-{})", imageCount, capabilities.minImageCount, capabilities.maxImageCount);
if (capabilities.minImageExtent.height > imageExtent.height || capabilities.minImageExtent.width > imageExtent.width || capabilities.maxImageExtent.height < imageExtent.height || capabilities.maxImageExtent.width < imageExtent.width)
throw exception("Cannot update swapchain to accomodate image extent: {}x{} ({}x{}-{}x{})", imageExtent.width, imageExtent.height, capabilities.minImageExtent.width, capabilities.minImageExtent.height, capabilities.maxImageExtent.width, capabilities.maxImageExtent.height);
if (swapchain.imageFormat != imageFormat || newSurface) {
auto formats{gpu.vkPhysicalDevice.getSurfaceFormatsKHR(**vkSurface)};
if (std::find(formats.begin(), formats.end(), vk::SurfaceFormatKHR{imageFormat, vk::ColorSpaceKHR::eSrgbNonlinear}) == formats.end())
throw exception("Surface doesn't support requested image format '{}' with colorspace '{}'", vk::to_string(imageFormat), vk::to_string(vk::ColorSpaceKHR::eSrgbNonlinear));
}
constexpr vk::ImageUsageFlags presentUsage{vk::ImageUsageFlagBits::eColorAttachment | vk::ImageUsageFlagBits::eTransferSrc | vk::ImageUsageFlagBits::eTransferDst};
if ((capabilities.supportedUsageFlags & presentUsage) != presentUsage)
throw exception("Swapchain doesn't support image usage '{}': {}", vk::to_string(presentUsage), vk::to_string(capabilities.supportedUsageFlags));
transformHint = GetAndroidTransform(capabilities.currentTransform);
vkSwapchain = vk::raii::SwapchainKHR(gpu.vkDevice, vk::SwapchainCreateInfoKHR{
.surface = **vkSurface,
.minImageCount = imageCount,
@ -76,32 +82,56 @@ namespace skyline::gpu {
.oldSwapchain = vkSwapchain ? **vkSwapchain : vk::SwapchainKHR{},
});
swapchain = SwapchainContext{
.imageCount = imageCount,
.imageFormat = imageFormat,
.imageExtent = imageExtent,
};
auto vkImages{vkSwapchain->getImages()};
for (u16 slot{}; slot < imageCount; slot++) {
auto &vkImage{vkImages[slot]};
swapchain.vkImages[slot] = vkImage;
auto &image{swapchain.textures[slot]};
if (image) {
std::scoped_lock lock(*image);
image->SwapBacking(vkImage);
image->TransitionLayout(vk::ImageLayout::ePresentSrcKHR);
image->SynchronizeHost(); // Synchronize the new host backing with guest memory
}
}
swapchain.imageCount = imageCount;
swapchain.imageFormat = imageFormat;
swapchain.imageExtent = imageExtent;
}
void PresentationEngine::UpdateSurface(jobject newSurface) {
std::lock_guard guard(mutex);
auto env{state.jvm->GetEnv()};
if (!env->IsSameObject(surface, nullptr)) {
env->DeleteGlobalRef(surface);
surface = nullptr;
if (!env->IsSameObject(jSurface, nullptr)) {
env->DeleteGlobalRef(jSurface);
jSurface = nullptr;
}
if (!env->IsSameObject(newSurface, nullptr))
surface = env->NewGlobalRef(newSurface);
jSurface = env->NewGlobalRef(newSurface);
if (surface) {
if (vkSwapchain) {
for (u16 slot{}; slot < swapchain.imageCount; slot++) {
auto &image{swapchain.textures[slot]};
if (image) {
std::scoped_lock lock(*image);
image->SynchronizeGuest(); // Synchronize host backing to guest memory prior to being destroyed
image->SwapBacking(nullptr);
}
}
swapchain.vkImages = {};
vkSwapchain.reset();
}
if (jSurface) {
vkSurface.emplace(gpu.vkInstance, vk::AndroidSurfaceCreateInfoKHR{
.window = ANativeWindow_fromSurface(env, surface),
.window = ANativeWindow_fromSurface(env, jSurface),
});
if (!gpu.vkPhysicalDevice.getSurfaceSupportKHR(gpu.vkQueueFamilyIndex, **vkSurface))
throw exception("Vulkan Queue doesn't support presentation with surface");
vkSurfaceCapabilities = gpu.vkPhysicalDevice.getSurfaceCapabilitiesKHR(**vkSurface);
UpdateSwapchain(swapchain.imageCount, swapchain.imageFormat, swapchain.imageExtent);
UpdateSwapchain(swapchain.imageCount, swapchain.imageFormat, swapchain.imageExtent, true);
surfaceCondition.notify_all();
} else {
@ -109,39 +139,42 @@ namespace skyline::gpu {
}
}
std::shared_ptr<Texture> PresentationEngine::CreatePresentationTexture(const std::shared_ptr<GuestTexture> &texture, u32 slot) {
std::shared_ptr<Texture> PresentationEngine::CreatePresentationTexture(const std::shared_ptr<GuestTexture> &texture, u8 slot) {
std::lock_guard guard(mutex);
if (swapchain.imageCount <= slot)
UpdateSwapchain(std::max(slot + 1, 2U), texture->format.vkFormat, texture->dimensions);
return texture->InitializeTexture(vk::raii::Image(gpu.vkDevice, vkSwapchain->getImages().at(slot)));
if (swapchain.imageCount <= slot && slot + 1 >= vkSurfaceCapabilities.minImageCount)
UpdateSwapchain(slot + 1, texture->format.vkFormat, texture->dimensions);
auto host{texture->InitializeTexture(swapchain.vkImages.at(slot), vk::ImageTiling::eOptimal)};
swapchain.textures[slot] = host;
return host;
}
service::hosbinder::AndroidStatus PresentationEngine::GetFreeTexture(bool async, i32 &slot) {
using AndroidStatus = service::hosbinder::AndroidStatus;
std::unique_lock lock(mutex);
surfaceCondition.wait(lock, [this]() { return vkSurface.has_value(); });
if (swapchain.dequeuedCount < swapchain.imageCount) {
swapchain.dequeuedCount++;
vk::raii::Fence fence(state.gpu->vkDevice, vk::FenceCreateInfo{});
static vk::raii::Fence fence(gpu.vkDevice, vk::FenceCreateInfo{});
auto timeout{async ? 0ULL : std::numeric_limits<u64>::max()}; // We cannot block for a buffer to be retrieved in async mode
auto nextImage{vkSwapchain->acquireNextImage(timeout, {}, *fence)};
if (nextImage.first == vk::Result::eTimeout) {
return AndroidStatus::WouldBlock;
} else if (nextImage.first == vk::Result::eErrorSurfaceLostKHR || nextImage.first == vk::Result::eSuboptimalKHR) {
surfaceCondition.wait(lock, [this]() { return vkSurface.has_value(); });
return GetFreeTexture(async, slot);
}
gpu.vkDevice.waitForFences(*fence, true, std::numeric_limits<u64>::max());
if (nextImage.first == vk::Result::eSuccess) {
swapchain.dequeuedCount++;
while (gpu.vkDevice.waitForFences(*fence, true, std::numeric_limits<u64>::max()) == vk::Result::eTimeout);
slot = nextImage.second;
return AndroidStatus::Ok;
} else if (nextImage.first == vk::Result::eNotReady || nextImage.first == vk::Result::eTimeout) {
return AndroidStatus::WouldBlock;
} else if (nextImage.first == vk::Result::eSuboptimalKHR) {
surfaceCondition.wait(lock, [this]() { return vkSurface.has_value(); });
return GetFreeTexture(async, slot);
} else {
throw exception("VkAcquireNextImageKHR returned an unhandled result '{}'", vk::to_string(nextImage.first));
}
}
return AndroidStatus::Busy;
}
void PresentationEngine::Present(i32 slot) {
void PresentationEngine::Present(u32 slot) {
std::unique_lock lock(mutex);
surfaceCondition.wait(lock, [this]() { return vkSurface.has_value(); });
@ -149,6 +182,15 @@ namespace skyline::gpu {
throw exception("Swapchain has been presented more times than images from it have been acquired: {} (Image Count: {})", swapchain.dequeuedCount, swapchain.imageCount);
}
{
std::lock_guard queueLock(gpu.queueMutex);
static_cast<void>(gpu.vkQueue.presentKHR(vk::PresentInfoKHR{
.swapchainCount = 1,
.pSwapchains = &**vkSwapchain,
.pImageIndices = &slot,
})); // We explicitly discard the result here as suboptimal images are expected when the game doesn't respect the transform hint
}
vsyncEvent->Signal();
if (frameTimestamp) {
@ -167,8 +209,6 @@ namespace skyline::gpu {
service::hosbinder::NativeWindowTransform PresentationEngine::GetTransformHint() {
std::unique_lock lock(mutex);
surfaceCondition.wait(lock, [this]() { return vkSurface.has_value(); });
if (!transformHint)
transformHint = GetAndroidTransform(gpu.vkPhysicalDevice.getSurfaceCapabilitiesKHR(**vkSurface).currentTransform);
return *transformHint;
return GetAndroidTransform(vkSurfaceCapabilities.currentTransform);
}
}

View File

@ -5,9 +5,8 @@
#include <common/trace.h>
#include <kernel/types/KEvent.h>
#include <services/hosbinder/native_window.h>
#include <services/hosbinder/android_types.h>
#include "texture.h"
#include <services/hosbinder/GraphicBufferProducer.h>
#include "texture/texture.h"
struct ANativeWindow;
@ -18,25 +17,35 @@ namespace skyline::gpu {
class PresentationEngine {
private:
const DeviceState &state;
const GPU &gpu;
GPU &gpu;
std::mutex mutex; //!< Synchronizes access to the surface objects
std::condition_variable surfaceCondition; //!< Allows us to efficiently wait for Vulkan surface to be initialized
jobject surface{}; //!< The Surface object backing the ANativeWindow
jobject jSurface{}; //!< The Java Surface object backing the ANativeWindow
std::optional<vk::raii::SurfaceKHR> vkSurface; //!< The Vulkan Surface object that is backed by ANativeWindow
std::optional<service::hosbinder::NativeWindowTransform> transformHint; //!< The optimal transform for the application to render as
vk::SurfaceCapabilitiesKHR vkSurfaceCapabilities; //!< The capabilities of the current Vulkan Surface
std::optional<vk::raii::SwapchainKHR> vkSwapchain; //!< The Vulkan swapchain and the properties associated with it
struct SwapchainContext {
u16 imageCount{};
i32 dequeuedCount{};
std::array<std::shared_ptr<Texture>, service::hosbinder::GraphicBufferProducer::MaxSlotCount> textures{};
std::array<VkImage, service::hosbinder::GraphicBufferProducer::MaxSlotCount> vkImages{VK_NULL_HANDLE};
u8 imageCount{};
i8 dequeuedCount{};
vk::Format imageFormat{};
vk::Extent2D imageExtent{};
static_assert(std::numeric_limits<decltype(imageCount)>::max() >= service::hosbinder::GraphicBufferProducer::MaxSlotCount);
static_assert(std::numeric_limits<decltype(dequeuedCount)>::max() >= service::hosbinder::GraphicBufferProducer::MaxSlotCount);
} swapchain; //!< The properties of the currently created swapchain
u64 frameTimestamp{}; //!< The timestamp of the last frame being shown
perfetto::Track presentationTrack; //!< Perfetto track used for presentation events
void UpdateSwapchain(u16 imageCount, vk::Format imageFormat, vk::Extent2D imageExtent);
/**
* @note 'PresentationEngine::mutex' **must** be locked prior to calling this
*/
void UpdateSwapchain(u16 imageCount, vk::Format imageFormat, vk::Extent2D imageExtent, bool newSurface = false);
public:
texture::Dimensions resolution{};
@ -44,7 +53,7 @@ namespace skyline::gpu {
std::shared_ptr<kernel::type::KEvent> vsyncEvent; //!< Signalled every time a frame is drawn
std::shared_ptr<kernel::type::KEvent> bufferEvent; //!< Signalled every time a buffer is freed
PresentationEngine(const DeviceState &state, const GPU& gpu);
PresentationEngine(const DeviceState &state, GPU &gpu);
~PresentationEngine();
@ -56,18 +65,18 @@ namespace skyline::gpu {
/**
* @brief Creates a Texture object from a GuestTexture as a part of the Vulkan swapchain
*/
std::shared_ptr<Texture> CreatePresentationTexture(const std::shared_ptr<GuestTexture> &texture, u32 slot);
std::shared_ptr<Texture> CreatePresentationTexture(const std::shared_ptr<GuestTexture> &texture, u8 slot);
/**
* @param async If to return immediately when a texture is not available
* @param slot The slot the freed texture is in is written into this, it is untouched if there's an error
*/
service::hosbinder::AndroidStatus GetFreeTexture(bool async, i32& slot);
service::hosbinder::AndroidStatus GetFreeTexture(bool async, i32 &slot);
/**
* @brief Send a texture from a slot to the presentation queue to be displayed
*/
void Present(i32 slot);
void Present(u32 slot);
/**
* @return A transform that the application should render with to elide costly transforms later

View File

@ -1,88 +0,0 @@
// SPDX-License-Identifier: MPL-2.0
// Copyright © 2020 Skyline Team and Contributors (https://github.com/skyline-emu/)
#include <gpu.h>
#include <common/trace.h>
#include <kernel/types/KProcess.h>
#include "texture.h"
namespace skyline::gpu {
GuestTexture::GuestTexture(const DeviceState &state, u8 *pointer, texture::Dimensions dimensions, texture::Format format, texture::TileMode tiling, texture::TileConfig layout) : state(state), pointer(pointer), dimensions(dimensions), format(format), tileMode(tiling), tileConfig(layout) {}
std::shared_ptr<Texture> GuestTexture::InitializeTexture(vk::raii::Image &&backing, std::optional<texture::Format> pFormat, std::optional<texture::Dimensions> pDimensions, texture::Swizzle swizzle) {
if (!host.expired())
throw exception("Trying to create multiple Texture objects from a single GuestTexture");
auto sharedHost{std::make_shared<Texture>(std::move(backing), shared_from_this(), pDimensions ? *pDimensions : dimensions, pFormat ? *pFormat : format, swizzle)};
host = sharedHost;
return sharedHost;
}
Texture::Texture(vk::raii::Image&& backing, std::shared_ptr<GuestTexture> guest, texture::Dimensions dimensions, texture::Format format, texture::Swizzle swizzle) : backing(std::move(backing)), guest(std::move(guest)), dimensions(dimensions), format(format), swizzle(swizzle) {
SynchronizeHost();
}
void Texture::SynchronizeHost() {
TRACE_EVENT("gpu", "Texture::SynchronizeHost");
auto pointer{guest->pointer};
auto size{format.GetSize(dimensions)};
u8* output{nullptr};
return;
if (guest->tileMode == texture::TileMode::Block) {
// Reference on Block-linear tiling: https://gist.github.com/PixelyIon/d9c35050af0ef5690566ca9f0965bc32
constexpr u8 sectorWidth{16}; // The width of a sector in bytes
constexpr u8 sectorHeight{2}; // The height of a sector in lines
constexpr u8 gobWidth{64}; // The width of a GOB in bytes
constexpr u8 gobHeight{8}; // The height of a GOB in lines
auto blockHeight{guest->tileConfig.blockHeight}; // The height of the blocks in GOBs
auto robHeight{gobHeight * blockHeight}; // The height of a single ROB (Row of Blocks) in lines
auto surfaceHeight{dimensions.height / format.blockHeight}; // The height of the surface in lines
auto surfaceHeightRobs{util::AlignUp(surfaceHeight, robHeight) / robHeight}; // The height of the surface in ROBs (Row Of Blocks)
auto robWidthBytes{util::AlignUp((guest->tileConfig.surfaceWidth / format.blockWidth) * format.bpb, gobWidth)}; // The width of a ROB in bytes
auto robWidthBlocks{robWidthBytes / gobWidth}; // The width of a ROB in blocks (and GOBs because block width == 1 on the Tegra X1)
auto robBytes{robWidthBytes * robHeight}; // The size of a ROB in bytes
auto gobYOffset{robWidthBytes * gobHeight}; // The offset of the next Y-axis GOB from the current one in linear space
auto inputSector{pointer}; // The address of the input sector
auto outputRob{output}; // The address of the output block
for (u32 rob{}, y{}, paddingY{}; rob < surfaceHeightRobs; rob++) { // Every Surface contains `surfaceHeightRobs` ROBs
auto outputBlock{outputRob}; // We iterate through a block independently of the ROB
for (u32 block{}; block < robWidthBlocks; block++) { // Every ROB contains `surfaceWidthBlocks` Blocks
auto outputGob{outputBlock}; // We iterate through a GOB independently of the block
for (u32 gobY{}; gobY < blockHeight; gobY++) { // Every Block contains `blockHeight` Y-axis GOBs
for (u32 index{}; index < sectorWidth * sectorHeight; index++) { // Every Y-axis GOB contains `sectorWidth * sectorHeight` sectors
u32 xT{((index << 3) & 0b10000) | ((index << 1) & 0b100000)}; // Morton-Swizzle on the X-axis
u32 yT{((index >> 1) & 0b110) | (index & 0b1)}; // Morton-Swizzle on the Y-axis
std::memcpy(outputGob + (yT * robWidthBytes) + xT, inputSector, sectorWidth);
inputSector += sectorWidth; // `sectorWidth` bytes are of sequential image data
}
outputGob += gobYOffset; // Increment the output GOB to the next Y-axis GOB
}
inputSector += paddingY; // Increment the input sector to the next sector
outputBlock += gobWidth; // Increment the output block to the next block (As Block Width = 1 GOB Width)
}
outputRob += robBytes; // Increment the output block to the next ROB
y += robHeight; // Increment the Y position to the next ROB
blockHeight = static_cast<u8>(std::min(static_cast<u32>(blockHeight), (surfaceHeight - y) / gobHeight)); // Calculate the amount of Y GOBs which aren't padding
paddingY = (guest->tileConfig.blockHeight - blockHeight) * (sectorWidth * sectorWidth * sectorHeight); // Calculate the amount of padding between contiguous sectors
}
} else if (guest->tileMode == texture::TileMode::Pitch) {
auto sizeLine{guest->format.GetSize(dimensions.width, 1)}; // The size of a single line of pixel data
auto sizeStride{guest->format.GetSize(guest->tileConfig.pitch, 1)}; // The size of a single stride of pixel data
auto inputLine{pointer}; // The address of the input line
auto outputLine{output}; // The address of the output line
for (u32 line{}; line < dimensions.height; line++) {
std::memcpy(outputLine, inputLine, sizeLine);
inputLine += sizeStride;
outputLine += sizeLine;
}
} else if (guest->tileMode == texture::TileMode::Linear) {
std::memcpy(output, pointer, size);
}
}
}

View File

@ -0,0 +1,216 @@
// SPDX-License-Identifier: MPL-2.0
// Copyright © 2020 Skyline Team and Contributors (https://github.com/skyline-emu/)
#include <gpu.h>
#include <common/trace.h>
#include <kernel/types/KProcess.h>
#include "texture.h"
namespace skyline::gpu {
GuestTexture::GuestTexture(const DeviceState &state, u8 *pointer, texture::Dimensions dimensions, texture::Format format, texture::TileMode tiling, texture::TileConfig layout) : state(state), pointer(pointer), dimensions(dimensions), format(format), tileMode(tiling), tileConfig(layout) {}
std::shared_ptr<Texture> GuestTexture::InitializeTexture(vk::Image backing, std::optional<vk::ImageTiling> tiling, vk::ImageLayout pLayout, std::optional<texture::Format> pFormat, std::optional<texture::Dimensions> pDimensions, texture::Swizzle swizzle) {
if (!host.expired())
throw exception("Trying to create multiple Texture objects from a single GuestTexture");
auto sharedHost{std::make_shared<Texture>(*state.gpu, backing, pLayout, shared_from_this(), pDimensions ? *pDimensions : dimensions, pFormat ? *pFormat : format, tiling ? *tiling : (tileMode == texture::TileMode::Block) ? vk::ImageTiling::eOptimal : vk::ImageTiling::eLinear, swizzle)};
host = sharedHost;
return sharedHost;
}
std::shared_ptr<Texture> GuestTexture::InitializeTexture(vk::raii::Image &&backing, std::optional<vk::ImageTiling> tiling, vk::ImageLayout pLayout, std::optional<texture::Format> pFormat, std::optional<texture::Dimensions> pDimensions, texture::Swizzle swizzle) {
if (!host.expired())
throw exception("Trying to create multiple Texture objects from a single GuestTexture");
auto sharedHost{std::make_shared<Texture>(*state.gpu, std::move(backing), pLayout, shared_from_this(), pDimensions ? *pDimensions : dimensions, pFormat ? *pFormat : format, tiling ? *tiling : (tileMode == texture::TileMode::Block) ? vk::ImageTiling::eOptimal : vk::ImageTiling::eLinear, swizzle)};
host = sharedHost;
return sharedHost;
}
Texture::Texture(GPU &gpu, BackingType &&backing, vk::ImageLayout layout, std::shared_ptr<GuestTexture> guest, texture::Dimensions dimensions, texture::Format format, vk::ImageTiling tiling, vk::ComponentMapping mapping) : gpu(gpu), backing(std::move(backing)), layout(layout), guest(std::move(guest)), dimensions(dimensions), format(format), tiling(tiling), mapping(mapping) {
if (GetBacking())
SynchronizeHost();
}
bool Texture::WaitOnBacking() {
if (GetBacking()) [[likely]] {
return false;
} else {
std::unique_lock lock(mutex, std::adopt_lock);
backingCondition.wait(lock, [&]() -> bool { return GetBacking(); });
lock.release();
return true;
}
}
void Texture::WaitOnFence() {
if (cycle) {
cycle->Wait();
cycle.reset();
}
}
void Texture::SwapBacking(BackingType &&pBacking, vk::ImageLayout pLayout) {
WaitOnFence();
backing = std::move(pBacking);
layout = pLayout;
if (GetBacking())
backingCondition.notify_all();
}
void Texture::TransitionLayout(vk::ImageLayout pLayout) {
WaitOnBacking();
WaitOnFence();
if (layout != pLayout) {
cycle = gpu.scheduler.Submit([&](vk::raii::CommandBuffer &commandBuffer) {
commandBuffer.pipelineBarrier(layout != vk::ImageLayout::eUndefined ? vk::PipelineStageFlagBits::eTopOfPipe : vk::PipelineStageFlagBits::eBottomOfPipe, vk::PipelineStageFlagBits::eBottomOfPipe, {}, {}, {}, vk::ImageMemoryBarrier{
.image = GetBacking(),
.srcAccessMask = vk::AccessFlagBits::eMemoryWrite,
.dstAccessMask = vk::AccessFlagBits::eMemoryRead,
.oldLayout = layout,
.newLayout = pLayout,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.subresourceRange = {
.aspectMask = vk::ImageAspectFlagBits::eColor,
.levelCount = 1,
.layerCount = 1,
},
});
});
layout = pLayout;
}
}
void Texture::SynchronizeHost() {
TRACE_EVENT("gpu", "Texture::SynchronizeHost");
auto pointer{guest->pointer};
auto size{format.GetSize(dimensions)};
auto stagingBuffer{[&]() {
if (tiling == vk::ImageTiling::eOptimal) {
return gpu.memory.AllocateStagingBuffer(size);
} else {
throw exception("Guest -> Host synchronization of images tiled as '{}' isn't implemented", vk::to_string(tiling));
}
}()};
if (guest->tileMode == texture::TileMode::Block) {
// Reference on Block-linear tiling: https://gist.github.com/PixelyIon/d9c35050af0ef5690566ca9f0965bc32
constexpr u8 sectorWidth{16}; // The width of a sector in bytes
constexpr u8 sectorHeight{2}; // The height of a sector in lines
constexpr u8 gobWidth{64}; // The width of a GOB in bytes
constexpr u8 gobHeight{8}; // The height of a GOB in lines
auto blockHeight{guest->tileConfig.blockHeight}; // The height of the blocks in GOBs
auto robHeight{gobHeight * blockHeight}; // The height of a single ROB (Row of Blocks) in lines
auto surfaceHeight{dimensions.height / guest->format.blockHeight}; // The height of the surface in lines
auto surfaceHeightRobs{util::AlignUp(surfaceHeight, robHeight) / robHeight}; // The height of the surface in ROBs (Row Of Blocks)
auto robWidthBytes{util::AlignUp((guest->tileConfig.surfaceWidth / guest->format.blockWidth) * guest->format.bpb, gobWidth)}; // The width of a ROB in bytes
auto robWidthBlocks{robWidthBytes / gobWidth}; // The width of a ROB in blocks (and GOBs because block width == 1 on the Tegra X1)
auto robBytes{robWidthBytes * robHeight}; // The size of a ROB in bytes
auto gobYOffset{robWidthBytes * gobHeight}; // The offset of the next Y-axis GOB from the current one in linear space
auto inputSector{pointer}; // The address of the input sector
auto outputRob{stagingBuffer->data()}; // The address of the output block
for (u32 rob{}, y{}, paddingY{}; rob < surfaceHeightRobs; rob++) { // Every Surface contains `surfaceHeightRobs` ROBs
auto outputBlock{outputRob}; // We iterate through a block independently of the ROB
for (u32 block{}; block < robWidthBlocks; block++) { // Every ROB contains `surfaceWidthBlocks` Blocks
auto outputGob{outputBlock}; // We iterate through a GOB independently of the block
for (u32 gobY{}; gobY < blockHeight; gobY++) { // Every Block contains `blockHeight` Y-axis GOBs
for (u32 index{}; index < sectorWidth * sectorHeight; index++) { // Every Y-axis GOB contains `sectorWidth * sectorHeight` sectors
u32 xT{((index << 3) & 0b10000) | ((index << 1) & 0b100000)}; // Morton-Swizzle on the X-axis
u32 yT{((index >> 1) & 0b110) | (index & 0b1)}; // Morton-Swizzle on the Y-axis
std::memcpy(outputGob + (yT * robWidthBytes) + xT, inputSector, sectorWidth);
inputSector += sectorWidth; // `sectorWidth` bytes are of sequential image data
}
outputGob += gobYOffset; // Increment the output GOB to the next Y-axis GOB
}
inputSector += paddingY; // Increment the input sector to the next sector
outputBlock += gobWidth; // Increment the output block to the next block (As Block Width = 1 GOB Width)
}
outputRob += robBytes; // Increment the output block to the next ROB
y += robHeight; // Increment the Y position to the next ROB
blockHeight = static_cast<u8>(std::min(static_cast<u32>(blockHeight), (surfaceHeight - y) / gobHeight)); // Calculate the amount of Y GOBs which aren't padding
paddingY = (guest->tileConfig.blockHeight - blockHeight) * (sectorWidth * sectorWidth * sectorHeight); // Calculate the amount of padding between contiguous sectors
}
} else if (guest->tileMode == texture::TileMode::Pitch) {
auto sizeLine{guest->format.GetSize(dimensions.width, 1)}; // The size of a single line of pixel data
auto sizeStride{guest->format.GetSize(guest->tileConfig.pitch, 1)}; // The size of a single stride of pixel data
auto inputLine{pointer}; // The address of the input line
auto outputLine{stagingBuffer->data()}; // The address of the output line
for (u32 line{}; line < dimensions.height; line++) {
std::memcpy(outputLine, inputLine, sizeLine);
inputLine += sizeStride;
outputLine += sizeLine;
}
} else if (guest->tileMode == texture::TileMode::Linear) {
std::memcpy(stagingBuffer->data(), pointer, size);
}
if (WaitOnBacking() && size != format.GetSize(dimensions))
throw exception("Backing properties changing during sync is not supported");
WaitOnFence();
cycle = gpu.scheduler.Submit([&](vk::raii::CommandBuffer &commandBuffer) {
auto image{GetBacking()};
if (layout != vk::ImageLayout::eTransferDstOptimal) {
commandBuffer.pipelineBarrier(layout != vk::ImageLayout::eUndefined ? vk::PipelineStageFlagBits::eTopOfPipe : vk::PipelineStageFlagBits::eBottomOfPipe, vk::PipelineStageFlagBits::eTransfer, {}, {}, {}, vk::ImageMemoryBarrier{
.image = image,
.srcAccessMask = vk::AccessFlagBits::eMemoryRead | vk::AccessFlagBits::eMemoryWrite,
.dstAccessMask = vk::AccessFlagBits::eTransferWrite,
.oldLayout = layout,
.newLayout = vk::ImageLayout::eTransferDstOptimal,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.subresourceRange = {
.aspectMask = vk::ImageAspectFlagBits::eColor,
.levelCount = 1,
.layerCount = 1,
},
});
if (layout == vk::ImageLayout::eUndefined)
layout = vk::ImageLayout::eTransferDstOptimal;
}
commandBuffer.copyBufferToImage(stagingBuffer->vkBuffer, image, vk::ImageLayout::eTransferDstOptimal, vk::BufferImageCopy{
.imageExtent = dimensions,
.imageSubresource = {
.aspectMask = vk::ImageAspectFlagBits::eColor,
.layerCount = 1,
},
});
if (layout != vk::ImageLayout::eTransferDstOptimal)
commandBuffer.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, vk::PipelineStageFlagBits::eTransfer, {}, {}, {}, vk::ImageMemoryBarrier{
.image = image,
.srcAccessMask = vk::AccessFlagBits::eTransferWrite,
.dstAccessMask = vk::AccessFlagBits::eMemoryRead,
.oldLayout = vk::ImageLayout::eTransferDstOptimal,
.newLayout = layout,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.subresourceRange = {
.aspectMask = vk::ImageAspectFlagBits::eColor,
.levelCount = 1,
.layerCount = 1,
},
});
});
cycle->AttachObject(stagingBuffer);
}
void Texture::SynchronizeGuest() {
WaitOnBacking();
WaitOnFence();
TRACE_EVENT("gpu", "Texture::SynchronizeGuest");
// TODO: Write Host -> Guest Synchronization
}
}

View File

@ -3,8 +3,7 @@
#pragma once
#include <common.h>
#include <vulkan/vulkan_raii.hpp>
#include <gpu/fence_cycle.h>
namespace skyline::gpu {
namespace texture {
@ -111,7 +110,7 @@ namespace skyline::gpu {
u32 pitch; //!< The pitch of the texture if it's pitch linear
};
enum class SwizzleChannel {
enum class SwizzleChannel : u8 {
Zero, //!< Write 0 to the channel
One, //!< Write 1 to the channel
Red, //!< Red color channel
@ -125,6 +124,32 @@ namespace skyline::gpu {
SwizzleChannel green{SwizzleChannel::Green}; //!< Swizzle for the green channel
SwizzleChannel blue{SwizzleChannel::Blue}; //!< Swizzle for the blue channel
SwizzleChannel alpha{SwizzleChannel::Alpha}; //!< Swizzle for the alpha channel
constexpr operator vk::ComponentMapping() {
auto swizzleConvert{[](SwizzleChannel channel) {
switch (channel) {
case SwizzleChannel::Zero:
return vk::ComponentSwizzle::eZero;
case SwizzleChannel::One:
return vk::ComponentSwizzle::eOne;
case SwizzleChannel::Red:
return vk::ComponentSwizzle::eR;
case SwizzleChannel::Green:
return vk::ComponentSwizzle::eG;
case SwizzleChannel::Blue:
return vk::ComponentSwizzle::eB;
case SwizzleChannel::Alpha:
return vk::ComponentSwizzle::eA;
}
}};
return vk::ComponentMapping{
.r = swizzleConvert(red),
.g = swizzleConvert(green),
.b = swizzleConvert(blue),
.a = swizzleConvert(alpha),
};
}
};
}
@ -154,29 +179,101 @@ namespace skyline::gpu {
/**
* @brief Creates a corresponding host texture object for this guest texture
* @param backing The Vulkan Image that is used as the backing on the host
* @param backing The Vulkan Image that is used as the backing on the host, its lifetime is not managed by the host texture object
* @param tiling The tiling used by the image on host, this is the same as guest by default
* @param layout The initial layout of the Vulkan Image, this is used for efficient layout management
* @param format The format of the host texture (Defaults to the format of the guest texture)
* @param dimensions The dimensions of the host texture (Defaults to the dimensions of the host texture)
* @param swizzle The channel swizzle of the host texture (Defaults to no channel swizzling)
* @return A shared pointer to the host texture object
* @note There can only be one host texture for a corresponding guest texture
*/
std::shared_ptr<Texture> InitializeTexture(vk::raii::Image &&backing, std::optional<texture::Format> format = std::nullopt, std::optional<texture::Dimensions> dimensions = std::nullopt, texture::Swizzle swizzle = {});
std::shared_ptr<Texture> InitializeTexture(vk::Image backing, std::optional<vk::ImageTiling> tiling = std::nullopt, vk::ImageLayout layout = vk::ImageLayout::eUndefined, std::optional<texture::Format> format = std::nullopt, std::optional<texture::Dimensions> dimensions = std::nullopt, texture::Swizzle swizzle = {});
/**
* @note As a RAII object is used here, the lifetime of the backing is handled by the host texture
*/
std::shared_ptr<Texture> InitializeTexture(vk::raii::Image &&backing, std::optional<vk::ImageTiling> tiling = std::nullopt, vk::ImageLayout layout = vk::ImageLayout::eUndefined, std::optional<texture::Format> format = std::nullopt, std::optional<texture::Dimensions> dimensions = std::nullopt, texture::Swizzle swizzle = {});
};
/**
* @brief A texture which is backed by host constructs while being synchronized with the underlying guest texture
* @note This class conforms to the Lockable and BasicLockable C++ named requirements
*/
class Texture {
public:
vk::raii::Image backing; //!< The object that holds a host copy of the guest texture
std::shared_ptr<GuestTexture> guest; //!< The guest texture from which this was created, it's required for syncing
texture::Dimensions dimensions;
texture::Format format;
texture::Swizzle swizzle;
private:
GPU &gpu;
std::mutex mutex; //!< Synchronizes any mutations to the texture or its backing
std::condition_variable backingCondition; //!< Signalled when a valid backing has been swapped in
using BackingType = std::variant<vk::Image, vk::raii::Image>;
BackingType backing; //!< The Vulkan image that backs this texture, it is nullable
std::shared_ptr<FenceCycle> cycle; //!< A fence cycle for when any host operation mutating the texture has completed, it must be waited on prior to any mutations to the backing
vk::ImageLayout layout;
/**
* @note The handle returned is nullable and the appropriate precautions should be taken
*/
constexpr vk::Image GetBacking() {
return std::visit(VariantVisitor{
[](vk::Image image) { return image; },
[](const vk::raii::Image &image) { return *image; },
}, backing);
}
public:
Texture(vk::raii::Image &&backing, std::shared_ptr<GuestTexture> guest, texture::Dimensions dimensions, texture::Format format, texture::Swizzle swizzle);
std::shared_ptr<GuestTexture> guest; //!< The guest texture from which this was created, it's required for syncing and not nullable
texture::Dimensions dimensions;
texture::Format format;
vk::ImageTiling tiling;
vk::ComponentMapping mapping;
Texture(GPU &gpu, BackingType &&backing, vk::ImageLayout layout, std::shared_ptr<GuestTexture> guest, texture::Dimensions dimensions, texture::Format format, vk::ImageTiling tiling, vk::ComponentMapping mapping);
/**
* @brief Acquires an exclusive lock on the texture for the calling thread
*/
void lock() {
mutex.lock();
}
/**
* @brief Relinquishes an existing lock on the texture by the calling thread
*/
void unlock() {
mutex.unlock();
}
/**
* @brief Attempts to acquire an exclusive lock but returns immediately if it's captured by another thread
*/
bool try_lock() {
return mutex.try_lock();
}
/**
* @brief Waits on the texture backing to be a valid non-null Vulkan image
* @return If the mutex could be unlocked during the function
* @note The texture **must** be locked prior to calling this
*/
bool WaitOnBacking();
/**
* @brief Waits on a fence cycle if it exists till it's signalled and resets it after
* @note The texture **must** be locked prior to calling this
*/
void WaitOnFence();
/**
* @note All memory residing in the current backing is not copied to the new backing, it must be handled externally
* @note The texture **must** be locked prior to calling this
*/
void SwapBacking(BackingType &&backing, vk::ImageLayout layout = vk::ImageLayout::eUndefined);
/**
* @brief Transitions the backing to the supplied layout, if the backing already is in this layout then this does nothing
* @note The texture **must** be locked prior to calling this
*/
void TransitionLayout(vk::ImageLayout layout);
/**
* @brief Convert this texture to the specified tiling mode
@ -202,11 +299,13 @@ namespace skyline::gpu {
/**
* @brief Synchronizes the host texture with the guest after it has been modified
* @note The texture **must** be locked prior to calling this
*/
void SynchronizeHost();
/**
* @brief Synchronizes the guest texture with the host texture after it has been modified
* @note The texture **must** be locked prior to calling this
*/
void SynchronizeGuest();
};

View File

@ -183,6 +183,6 @@ namespace skyline::kernel::ipc {
}
}
state.logger->Verbose("Output: Raw Size: {}, Command ID: 0x{:X}, Copy Handles: {}, Move Handles: {}", static_cast<u32>(header->rawSize), static_cast<u32>(payloadHeader->value), copyHandles.size(), moveHandles.size());
state.logger->Verbose("Output: Raw Size: {}, Result: 0x{:X}, Copy Handles: {}, Move Handles: {}", static_cast<u32>(header->rawSize), static_cast<u32>(payloadHeader->value), copyHandles.size(), moveHandles.size());
}
}

View File

@ -5,7 +5,7 @@
#include <android/hardware_buffer.h>
#include <gpu.h>
#include <gpu/format.h>
#include <gpu/texture/format.h>
#include <soc.h>
#include <common/settings.h>
#include <services/nvdrv/driver.h>
@ -137,9 +137,14 @@ namespace skyline::service::hosbinder {
throw exception("Any non-identity sticky transform is not supported: '{}' ({:#b})", ToString(stickyTransform), static_cast<u32>(stickyTransform));
fence.Wait(state.soc->host1x);
{
std::scoped_lock textureLock(*buffer.texture);
buffer.texture->SynchronizeHost();
buffer.texture->WaitOnFence();
state.gpu->presentation.Present(slot);
state.gpu->presentation.bufferEvent->Signal();
}
width = defaultWidth;
height = defaultHeight;
@ -345,14 +350,14 @@ namespace skyline::service::hosbinder {
gpu::texture::TileMode tileMode;
gpu::texture::TileConfig tileConfig;
if (surface.layout != NvSurfaceLayout::Blocklinear) {
if (surface.layout == NvSurfaceLayout::Blocklinear) {
tileMode = gpu::texture::TileMode::Block;
tileConfig = {
.surfaceWidth = static_cast<u16>(surface.width),
.blockHeight = static_cast<u8>(1U << surface.blockHeightLog2),
.blockDepth = 1,
};
} else if (surface.layout != NvSurfaceLayout::Pitch) {
} else if (surface.layout == NvSurfaceLayout::Pitch) {
tileMode = gpu::texture::TileMode::Pitch;
tileConfig = {
.pitch = surface.pitch,
@ -408,12 +413,13 @@ namespace skyline::service::hosbinder {
auto queueBufferInputSize{in.Pop<u64>()};
if (queueBufferInputSize != QueueBufferInputSize)
throw exception("The size of QueueBufferInput in the Parcel (0x{:X}) doesn't match the expected size (0x{:X})", queueBufferInputSize, QueueBufferInputSize);
QueueBuffer(slot, in.Pop<i64>(), in.Pop<u32>(), in.Pop<AndroidRect>(), in.Pop<NativeWindowScalingMode>(), in.Pop<NativeWindowTransform>(), in.Pop<NativeWindowTransform>(), in.Pop<u32>(), in.Pop<u32>(), in.Pop<AndroidFence>(), width, height, transformHint, pendingBufferCount);
auto result{QueueBuffer(slot, in.Pop<i64>(), in.Pop<u32>(), in.Pop<AndroidRect>(), in.Pop<NativeWindowScalingMode>(), in.Pop<NativeWindowTransform>(), in.Pop<NativeWindowTransform>(), in.Pop<u32>(), in.Pop<u32>(), in.Pop<AndroidFence>(), width, height, transformHint, pendingBufferCount)};
out.Push(width);
out.Push(height);
out.Push(transformHint);
out.Push(pendingBufferCount);
out.Push(result);
break;
}

View File

@ -80,10 +80,12 @@ namespace skyline::service::hosbinder {
* @url https://cs.android.com/android/platform/superproject/+/android-5.1.1_r38:frameworks/native/libs/gui/BufferQueueCore.cpp
*/
class GraphicBufferProducer {
public:
constexpr static u8 MaxSlotCount{16}; //!< The maximum amount of buffer slots that a buffer queue can hold, Android supports 64 but they go unused for applications like games so we've lowered this to 16 (https://cs.android.com/android/platform/superproject/+/android-5.1.1_r38:frameworks/native/include/gui/BufferQueueDefs.h;l=29)
private:
const DeviceState &state;
std::mutex mutex; //!< Synchronizes access to the buffer queue
constexpr static u8 MaxSlotCount{16}; //!< The maximum amount of buffer slots that a buffer queue can hold, Android supports 64 but they go unused for applications like games so we've lowered this to 16
std::array<BufferSlot, MaxSlotCount> queue;
u8 activeSlotCount{2}; //!< The amount of slots in the queue that can be used
u8 hasBufferCount{}; //!< The amount of slots with buffers attached in the queue

View File

@ -212,7 +212,7 @@ class EmulationActivity : AppCompatActivity(), SurfaceHolder.Callback, View.OnTo
}
@Suppress("DEPRECATION") val display = if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.R) display!! else windowManager.defaultDisplay
display?.supportedModes?.maxByOrNull { it.refreshRate + (it.physicalHeight * it.physicalWidth) }?.let { window.attributes.preferredDisplayModeId = it.modeId }
display?.supportedModes?.maxByOrNull { it.refreshRate * it.physicalHeight * it.physicalWidth }?.let { window.attributes.preferredDisplayModeId = it.modeId }
binding.gameView.setOnTouchListener(this)