Separate Guest and Host Presentation + AChoreographer V-Sync Event

We had issues when combining host and guest presentation since certain configurations in guest presentation such as double buffering were very unoptimal for the host and would significantly affect the FPS. As a result of this, we've now made host presentation have its own presentation textures which are copied into from the guest at presentation time, allowing us to change parameters of the host presentation independently of the guest.

We've implemented the infrastructure for this which includes being able to create images from host GPU memory using VMA, an optimized linear texture sync and a method to do on-GPU texture-to-texture copies.

We've also moved to driving the V-Sync event using AChoreographer on its on thread in this PR, which more accurately encapsulates HOS behavior and allows games such as ARMS to boot as they depend on the V-Sync event being signalled even when the game isn't presenting.
This commit is contained in:
PixelyIon 2021-06-18 16:25:19 +05:30
parent b4799f612c
commit b9af701bbe
20 changed files with 535 additions and 231 deletions

View File

@ -171,7 +171,7 @@
</inspection_tool>
<inspection_tool class="CheckedExceptionClass" enabled="true" level="WARNING" enabled_by_default="true" />
<inspection_tool class="ClangTidy" enabled="true" level="WARNING" enabled_by_default="true">
<option name="clangTidyChecks" value="-*,bugprone-argument-comment,bugprone-assert-side-effect,bugprone-bad-signal-to-kill-thread,bugprone-branch-clone,bugprone-copy-constructor-init,bugprone-dangling-handle,bugprone-dynamic-static-initializers,bugprone-fold-init-type,bugprone-forward-declaration-namespace,bugprone-forwarding-reference-overload,bugprone-inaccurate-erase,bugprone-incorrect-roundings,bugprone-integer-division,bugprone-lambda-function-name,bugprone-macro-parentheses,bugprone-macro-repeated-side-effects,bugprone-misplaced-operator-in-strlen-in-alloc,bugprone-misplaced-pointer-arithmetic-in-alloc,bugprone-misplaced-widening-cast,bugprone-move-forwarding-reference,bugprone-multiple-statement-macro,bugprone-no-escape,bugprone-not-null-terminated-result,bugprone-parent-virtual-call,bugprone-posix-return,bugprone-reserved-identifier,bugprone-sizeof-container,bugprone-sizeof-expression,bugprone-spuriously-wake-up-functions,bugprone-string-constructor,bugprone-string-integer-assignment,bugprone-string-literal-with-embedded-nul,bugprone-suspicious-enum-usage,bugprone-suspicious-include,bugprone-suspicious-memset-usage,bugprone-suspicious-missing-comma,bugprone-suspicious-semicolon,bugprone-suspicious-string-compare,bugprone-swapped-arguments,bugprone-terminating-continue,bugprone-throw-keyword-missing,bugprone-too-small-loop-variable,bugprone-undefined-memory-manipulation,bugprone-undelegated-constructor,bugprone-unhandled-self-assignment,bugprone-unused-raii,bugprone-unused-return-value,bugprone-use-after-move,bugprone-virtual-near-miss,cert-dcl21-cpp,cert-dcl58-cpp,cert-err34-c,cert-err52-cpp,cert-err58-cpp,cert-err60-cpp,cert-flp30-c,cert-msc50-cpp,cert-msc51-cpp,cert-str34-c,cppcoreguidelines-interfaces-global-init,cppcoreguidelines-narrowing-conversions,cppcoreguidelines-pro-type-member-init,cppcoreguidelines-pro-type-static-cast-downcast,google-default-arguments,google-explicit-constructor,google-runtime-operator,hicpp-exception-baseclass,hicpp-multiway-paths-covered,misc-misplaced-const,misc-new-delete-overloads,misc-no-recursion,misc-non-copyable-objects,misc-throw-by-value-catch-by-reference,misc-unconventional-assign-operator,misc-uniqueptr-reset-release,modernize-avoid-bind,modernize-concat-nested-namespaces,modernize-deprecated-ios-base-aliases,modernize-loop-convert,modernize-make-shared,modernize-make-unique,modernize-pass-by-value,modernize-raw-string-literal,modernize-redundant-void-arg,modernize-replace-auto-ptr,modernize-replace-disallow-copy-and-assign-macro,modernize-replace-random-shuffle,modernize-return-braced-init-list,modernize-shrink-to-fit,modernize-unary-static-assert,modernize-use-auto,modernize-use-bool-literals,modernize-use-emplace,modernize-use-equals-default,modernize-use-equals-delete,modernize-use-nodiscard,modernize-use-noexcept,modernize-use-nullptr,modernize-use-override,modernize-use-transparent-functors,modernize-use-uncaught-exceptions,mpi-buffer-deref,mpi-type-mismatch,openmp-use-default-none,performance-faster-string-find,performance-for-range-copy,performance-implicit-conversion-in-loop,performance-inefficient-algorithm,performance-inefficient-string-concatenation,performance-inefficient-vector-operation,performance-move-const-arg,performance-move-constructor-init,performance-no-automatic-move,performance-noexcept-move-constructor,performance-trivially-destructible,performance-type-promotion-in-math-fn,performance-unnecessary-copy-initialization,performance-unnecessary-value-param,portability-simd-intrinsics,readability-avoid-const-params-in-decls,readability-const-return-type,readability-container-size-empty,readability-convert-member-functions-to-static,readability-delete-null-pointer,readability-deleted-default,readability-inconsistent-declaration-parameter-name,readability-make-member-function-const,readability-misleading-indentation,readability-misplaced-array-index,readability-non-const-parameter,readability-redundant-control-flow,readability-redundant-declaration,readability-redundant-function-ptr-dereference,readability-redundant-smartptr-get,readability-redundant-string-cstr,readability-redundant-string-init,readability-simplify-subscript-expr,readability-static-accessed-through-instance,readability-static-definition-in-anonymous-namespace,readability-string-compare,readability-uniqueptr-delete-release,readability-use-anyofallof" />
<option name="clangTidyChecks" value="-*,bugprone-argument-comment,bugprone-assert-side-effect,bugprone-bad-signal-to-kill-thread,bugprone-branch-clone,bugprone-copy-constructor-init,bugprone-dangling-handle,bugprone-dynamic-static-initializers,bugprone-fold-init-type,bugprone-forward-declaration-namespace,bugprone-forwarding-reference-overload,bugprone-inaccurate-erase,bugprone-incorrect-roundings,bugprone-integer-division,bugprone-lambda-function-name,bugprone-macro-parentheses,bugprone-macro-repeated-side-effects,bugprone-misplaced-operator-in-strlen-in-alloc,bugprone-misplaced-pointer-arithmetic-in-alloc,bugprone-misplaced-widening-cast,bugprone-move-forwarding-reference,bugprone-multiple-statement-macro,bugprone-no-escape,bugprone-not-null-terminated-result,bugprone-parent-virtual-call,bugprone-posix-return,bugprone-reserved-identifier,bugprone-sizeof-container,bugprone-sizeof-expression,bugprone-spuriously-wake-up-functions,bugprone-string-constructor,bugprone-string-integer-assignment,bugprone-string-literal-with-embedded-nul,bugprone-suspicious-enum-usage,bugprone-suspicious-include,bugprone-suspicious-memset-usage,bugprone-suspicious-missing-comma,bugprone-suspicious-semicolon,bugprone-suspicious-string-compare,bugprone-swapped-arguments,bugprone-terminating-continue,bugprone-throw-keyword-missing,bugprone-too-small-loop-variable,bugprone-undefined-memory-manipulation,bugprone-undelegated-constructor,bugprone-unhandled-self-assignment,bugprone-unused-raii,bugprone-unused-return-value,bugprone-use-after-move,bugprone-virtual-near-miss,cert-dcl21-cpp,cert-dcl58-cpp,cert-err34-c,cert-err52-cpp,cert-err58-cpp,cert-err60-cpp,cert-flp30-c,cert-msc50-cpp,cert-msc51-cpp,cert-str34-c,cppcoreguidelines-interfaces-global-init,cppcoreguidelines-narrowing-conversions,cppcoreguidelines-pro-type-member-init,cppcoreguidelines-pro-type-static-cast-downcast,cppcoreguidelines-slicing,google-default-arguments,google-explicit-constructor,google-runtime-operator,hicpp-exception-baseclass,hicpp-multiway-paths-covered,misc-misplaced-const,misc-new-delete-overloads,misc-no-recursion,misc-non-copyable-objects,misc-throw-by-value-catch-by-reference,misc-unconventional-assign-operator,misc-uniqueptr-reset-release,modernize-avoid-bind,modernize-concat-nested-namespaces,modernize-deprecated-headers,modernize-deprecated-ios-base-aliases,modernize-loop-convert,modernize-make-shared,modernize-make-unique,modernize-pass-by-value,modernize-raw-string-literal,modernize-redundant-void-arg,modernize-replace-auto-ptr,modernize-replace-disallow-copy-and-assign-macro,modernize-replace-random-shuffle,modernize-return-braced-init-list,modernize-shrink-to-fit,modernize-unary-static-assert,modernize-use-auto,modernize-use-bool-literals,modernize-use-emplace,modernize-use-equals-default,modernize-use-equals-delete,modernize-use-nodiscard,modernize-use-noexcept,modernize-use-nullptr,modernize-use-override,modernize-use-transparent-functors,modernize-use-uncaught-exceptions,mpi-buffer-deref,mpi-type-mismatch,openmp-use-default-none,performance-faster-string-find,performance-for-range-copy,performance-implicit-conversion-in-loop,performance-inefficient-algorithm,performance-inefficient-string-concatenation,performance-inefficient-vector-operation,performance-move-const-arg,performance-move-constructor-init,performance-no-automatic-move,performance-noexcept-move-constructor,performance-trivially-destructible,performance-type-promotion-in-math-fn,performance-unnecessary-copy-initialization,performance-unnecessary-value-param,portability-simd-intrinsics,readability-avoid-const-params-in-decls,readability-const-return-type,readability-container-size-empty,readability-convert-member-functions-to-static,readability-delete-null-pointer,readability-deleted-default,readability-inconsistent-declaration-parameter-name,readability-misleading-indentation,readability-misplaced-array-index,readability-non-const-parameter,readability-redundant-control-flow,readability-redundant-declaration,readability-redundant-function-ptr-dereference,readability-redundant-smartptr-get,readability-redundant-string-cstr,readability-redundant-string-init,readability-simplify-subscript-expr,readability-static-accessed-through-instance,readability-static-definition-in-anonymous-namespace,readability-string-compare,readability-uniqueptr-delete-release,readability-use-anyofallof" />
</inspection_tool>
<inspection_tool class="ClassComplexity" enabled="true" level="WARNING" enabled_by_default="true">
<option name="m_limit" value="80" />
@ -949,17 +949,6 @@
<inspection_tool class="SimplifiableEqualsExpression" enabled="true" level="WARNING" enabled_by_default="true" />
<inspection_tool class="SimplifiableJUnitAssertion" enabled="true" level="WARNING" enabled_by_default="true" />
<inspection_tool class="SimplifiedTestNGAssertion" enabled="true" level="WARNING" enabled_by_default="true" />
<inspection_tool class="Simplify" enabled="true" level="WARNING" enabled_by_default="true">
<option name="clangTidyCheckOptions">
<list>
<ClangTidyCheckOption>
<option name="optionName" value="clion-simplify.SimplifyIfWithReturn" />
<option name="optionValue" value="1" />
</ClangTidyCheckOption>
</list>
</option>
<option name="enableSimplifyIfWithReturn" value="true" />
</inspection_tool>
<inspection_tool class="SimplifyNestedEachInScopeFunction" enabled="false" level="WEAK WARNING" enabled_by_default="false" />
<inspection_tool class="SingleCharacterStartsWith" enabled="true" level="WARNING" enabled_by_default="true" />
<inspection_tool class="SingleClassImport" enabled="true" level="WARNING" enabled_by_default="true" />

View File

@ -50,7 +50,7 @@
</value>
</option>
</component>
<component name="ProjectRootManager" version="2" languageLevel="JDK_11" default="false" project-jdk-name="JDK" project-jdk-type="JavaSDK">
<component name="ProjectRootManager" version="2" languageLevel="JDK_11" default="true" project-jdk-name="JDK" project-jdk-type="JavaSDK">
<output url="file://$PROJECT_DIR$/build/classes" />
</component>
<component name="ProjectType">

View File

@ -1,3 +1,3 @@
<component name="DependencyValidationManager">
<scope name="SkylineLibraries" pattern="(file[skyline.app]:libraries/fmt/include//*||file[skyline.app]:libraries/frozen/include//*||file[skyline.app]:libraries/lz4/lib//*||file[skyline.app]:libraries/oboe/include//*||file[skyline.app]:libraries/perfetto/include//*||file:libraries/pugixml/src/pugixml.hpp||file[skyline.app]:libraries/tzcode/include/*||file[skyline.app]:libraries/vkhpp/vulkan//*||file[skyline.app]:libraries/vkhpp/Vulkan-Headers/include//*)&amp;&amp;!file:libraries/vkhpp/Vulkan-Headers/include/vulkan/vulkan.hpp" />
<scope name="SkylineLibraries" pattern="(file[skyline.app]:libraries/fmt/include//*||file[skyline.app]:libraries/frozen/include//*||file[skyline.app]:libraries/lz4/lib//*||file[skyline.app]:libraries/oboe/include//*||file[skyline.app]:libraries/perfetto/include//*||file:libraries/pugixml/src/pugixml.hpp||file[skyline.app]:libraries/tzcode/include/*||file[skyline.app]:libraries/vkhpp/vulkan//*||file[skyline.app]:libraries/vkhpp/Vulkan-Headers/include//*)&amp;&amp;!file:libraries/vkhpp/Vulkan-Headers/include/vulkan/vulkan.hpp||file:libraries/vkma/include/vk_mem_alloc.h" />
</component>

View File

@ -16,9 +16,9 @@ namespace skyline {
#define PREF_ELEM(name, memberName, rhs) std::make_pair(std::string(name), [](Settings &settings, const pugi::xml_node &element) { settings.memberName = rhs; })
std::tuple preferences{
PREF_ELEM("operation_mode", operationMode, element.attribute("value").as_bool()),
PREF_ELEM("username_value", username, element.text().as_string()),
PREF_ELEM("log_level", logLevel, static_cast<Logger::LogLevel>(element.text().as_uint(static_cast<unsigned int>(Logger::LogLevel::Info)))),
PREF_ELEM("username_value", username, element.text().as_string()),
PREF_ELEM("operation_mode", operationMode, element.attribute("value").as_bool()),
};
#undef PREF_ELEM

View File

@ -12,8 +12,9 @@ namespace skyline {
class Settings {
public:
Logger::LogLevel logLevel; //!< The minimum level that logs need to be for them to be printed
bool operationMode; //!< If the emulated Switch should be handheld or docked
std::string username; //!< The name set by the user to be supplied to the guest
bool operationMode; //!< If the emulated Switch should be handheld or docked
bool forceTripleBuffering{true}; //!< If the presentation should always triple buffer even if the game double buffers
/**
* @param fd An FD to the preference XML file

View File

@ -123,8 +123,8 @@ namespace skyline::gpu {
}
}
const auto& first{*dependencies.begin()};
const auto& last{*dependencies.end()};
const auto &first{*dependencies.begin()};
const auto &last{*dependencies.end()};
std::shared_ptr<FenceCycleDependency> next{std::atomic_load_explicit(&list, std::memory_order_consume)};
do {
last->next = next;

View File

@ -5,13 +5,32 @@
#include "memory_manager.h"
namespace skyline::gpu::memory {
/**
* @brief If the result isn't VK_SUCCESS then an exception is thrown
*/
void ThrowOnFail(VkResult result, const char *function = __builtin_FUNCTION()) {
if (result != VK_SUCCESS)
vk::throwResultException(vk::Result(result), function);
}
StagingBuffer::~StagingBuffer() {
if (vmaAllocator && vmaAllocation && vkBuffer)
vmaDestroyBuffer(vmaAllocator, vkBuffer, vmaAllocation);
}
void MemoryManager::ThrowOnFail(VkResult result, const char *function) {
if (result != VK_SUCCESS)
vk::throwResultException(vk::Result(result), function);
Image::~Image() {
if (vmaAllocator && vmaAllocation && vkImage) {
if (pointer)
vmaUnmapMemory(vmaAllocator, vmaAllocation);
vmaDestroyImage(vmaAllocator, vkImage, vmaAllocation);
}
}
u8 *Image::data() {
if (pointer) [[likely]]
return pointer;
ThrowOnFail(vmaMapMemory(vmaAllocator, vmaAllocation, reinterpret_cast<void **>(&pointer)));
return pointer;
}
MemoryManager::MemoryManager(const GPU &pGpu) : gpu(pGpu) {
@ -75,4 +94,32 @@ namespace skyline::gpu::memory {
return std::make_shared<memory::StagingBuffer>(reinterpret_cast<u8 *>(allocationInfo.pMappedData), allocationInfo.size, vmaAllocator, buffer, allocation);
}
Image MemoryManager::AllocateImage(const vk::ImageCreateInfo &createInfo) {
VmaAllocationCreateInfo allocationCreateInfo{
.usage = VMA_MEMORY_USAGE_GPU_ONLY,
};
VkImage image;
VmaAllocation allocation;
VmaAllocationInfo allocationInfo;
ThrowOnFail(vmaCreateImage(vmaAllocator, &static_cast<const VkImageCreateInfo &>(createInfo), &allocationCreateInfo, &image, &allocation, &allocationInfo));
return Image(vmaAllocator, image, allocation);
}
Image MemoryManager::AllocateMappedImage(const vk::ImageCreateInfo &createInfo) {
VmaAllocationCreateInfo allocationCreateInfo{
.flags = VMA_ALLOCATION_CREATE_MAPPED_BIT,
.usage = VMA_MEMORY_USAGE_UNKNOWN,
.memoryTypeBits = static_cast<u32>(vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent | vk::MemoryPropertyFlagBits::eDeviceLocal),
};
VkImage image;
VmaAllocation allocation;
VmaAllocationInfo allocationInfo;
ThrowOnFail(vmaCreateImage(vmaAllocator, &static_cast<const VkImageCreateInfo &>(createInfo), &allocationCreateInfo, &image, &allocation, &allocationInfo));
return Image(reinterpret_cast<u8 *>(allocationInfo.pMappedData), vmaAllocator, image, allocation);
}
}

View File

@ -11,16 +11,57 @@ namespace skyline::gpu::memory {
* @brief A view into a CPU mapping of a Vulkan buffer
* @note The mapping **should not** be used after the lifetime of the object has ended
*/
struct StagingBuffer : public span<u8>, FenceCycleDependency {
struct StagingBuffer : public span<u8>, public FenceCycleDependency {
VmaAllocator vmaAllocator;
VmaAllocation vmaAllocation;
vk::Buffer vkBuffer;
constexpr StagingBuffer(u8 *pointer, size_t size, VmaAllocator vmaAllocator, vk::Buffer vkBuffer, VmaAllocation vmaAllocation) : vmaAllocator(vmaAllocator), vkBuffer(vkBuffer), vmaAllocation(vmaAllocation), span(pointer, size) {}
StagingBuffer(const StagingBuffer &) = delete;
constexpr StagingBuffer(StagingBuffer &&other) : vmaAllocator(std::exchange(other.vmaAllocator, nullptr)), vmaAllocation(std::exchange(other.vmaAllocation, nullptr)), vkBuffer(std::exchange(other.vkBuffer, {})) {}
StagingBuffer &operator=(const StagingBuffer &) = delete;
StagingBuffer &operator=(StagingBuffer &&) = default;
~StagingBuffer();
};
/**
* @brief A Vulkan image which VMA allocates and manages the backing memory for
*/
struct Image {
private:
u8 *pointer{};
public:
VmaAllocator vmaAllocator;
VmaAllocation vmaAllocation;
vk::Image vkImage;
constexpr Image(VmaAllocator vmaAllocator, vk::Image vkImage, VmaAllocation vmaAllocation) : vmaAllocator(vmaAllocator), vkImage(vkImage), vmaAllocation(vmaAllocation) {}
constexpr Image(u8 *pointer, VmaAllocator vmaAllocator, vk::Image vkImage, VmaAllocation vmaAllocation) : pointer(pointer), vmaAllocator(vmaAllocator), vkImage(vkImage), vmaAllocation(vmaAllocation) {}
Image(const Image &) = delete;
constexpr Image(Image &&other) : pointer(std::exchange(other.pointer, nullptr)), vmaAllocator(std::exchange(other.vmaAllocator, nullptr)), vmaAllocation(std::exchange(other.vmaAllocation, nullptr)), vkImage(std::exchange(other.vkImage, {})) {}
Image &operator=(const Image &) = delete;
Image &operator=(Image &&) = default;
~Image();
/**
* @return A pointer to a mapping of the image on the CPU
* @note If the image isn't already mapped on the CPU, this creates a mapping for it
*/
u8 *data();
};
/**
* @brief An abstraction over memory operations done in Vulkan, it's used for all allocations on the host GPU
*/
@ -29,11 +70,6 @@ namespace skyline::gpu::memory {
const GPU &gpu;
VmaAllocator vmaAllocator{VK_NULL_HANDLE};
/**
* @brief If the result isn't VK_SUCCESS then an exception is thrown
*/
static void ThrowOnFail(VkResult result, const char *function = __builtin_FUNCTION());
public:
MemoryManager(const GPU &gpu);
@ -43,5 +79,15 @@ namespace skyline::gpu::memory {
* @brief Creates a buffer which is optimized for staging (Transfer Source)
*/
std::shared_ptr<StagingBuffer> AllocateStagingBuffer(vk::DeviceSize size);
/**
* @brief Creates an image which is allocated and deallocated using RAII
*/
Image AllocateImage(const vk::ImageCreateInfo &createInfo);
/**
* @brief Creates an image which is allocated and deallocated using RAII and is optimal for being mapped on the CPU
*/
Image AllocateMappedImage(const vk::ImageCreateInfo &createInfo);
};
}

View File

@ -2,15 +2,18 @@
// Copyright © 2020 Skyline Team and Contributors (https://github.com/skyline-emu/)
#include <android/native_window_jni.h>
#include <gpu.h>
#include <android/choreographer.h>
#include <common/settings.h>
#include <jvm.h>
#include <gpu.h>
#include "presentation_engine.h"
#include "texture/format.h"
extern skyline::i32 Fps;
extern skyline::i32 FrameTime;
namespace skyline::gpu {
PresentationEngine::PresentationEngine(const DeviceState &state, GPU &gpu) : state(state), gpu(gpu), vsyncEvent(std::make_shared<kernel::type::KEvent>(state, true)), bufferEvent(std::make_shared<kernel::type::KEvent>(state, true)), presentationTrack(static_cast<u64>(trace::TrackIds::Presentation), perfetto::ProcessTrack::Current()) {
PresentationEngine::PresentationEngine(const DeviceState &state, GPU &gpu) : state(state), gpu(gpu), acquireFence(gpu.vkDevice, vk::FenceCreateInfo{}), presentationTrack(static_cast<u64>(trace::TrackIds::Presentation), perfetto::ProcessTrack::Current()), choreographerThread(&PresentationEngine::ChoreographerThread, this), vsyncEvent(std::make_shared<kernel::type::KEvent>(state, true)) {
auto desc{presentationTrack.Serialize()};
desc.set_name("Presentation");
perfetto::TrackEvent::SetTrackDescriptor(presentationTrack, desc);
@ -20,6 +23,26 @@ namespace skyline::gpu {
auto env{state.jvm->GetEnv()};
if (!env->IsSameObject(jSurface, nullptr))
env->DeleteGlobalRef(jSurface);
if (choreographerThread.joinable()) {
if (choreographerLooper)
ALooper_wake(choreographerLooper);
choreographerThread.join();
}
}
/**
* @url https://developer.android.com/ndk/reference/group/choreographer#achoreographer_framecallback
*/
void ChoreographerCallback(long frameTimeNanos, kernel::type::KEvent* vsyncEvent) {
vsyncEvent->Signal();
AChoreographer_postFrameCallback(AChoreographer_getInstance(), reinterpret_cast<AChoreographer_frameCallback>(&ChoreographerCallback), vsyncEvent);
}
void PresentationEngine::ChoreographerThread() {
choreographerLooper = ALooper_prepare(0);
AChoreographer_postFrameCallback(AChoreographer_getInstance(), reinterpret_cast<AChoreographer_frameCallback>(&ChoreographerCallback), vsyncEvent.get());
ALooper_pollAll(-1, nullptr, nullptr, nullptr);
}
service::hosbinder::NativeWindowTransform GetAndroidTransform(vk::SurfaceTransformFlagBitsKHR transform) {
@ -45,58 +68,55 @@ namespace skyline::gpu {
}
}
void PresentationEngine::UpdateSwapchain(u16 imageCount, vk::Format imageFormat, vk::Extent2D imageExtent, bool newSurface) {
if (!imageCount)
return;
else if (imageCount > service::hosbinder::GraphicBufferProducer::MaxSlotCount)
throw exception("Requesting swapchain with higher image count ({}) than maximum slot count ({})", imageCount, service::hosbinder::GraphicBufferProducer::MaxSlotCount);
void PresentationEngine::UpdateSwapchain(texture::Format format, texture::Dimensions extent) {
auto minImageCount{std::max(vkSurfaceCapabilities.minImageCount, state.settings->forceTripleBuffering ? 3U : 0U)};
if (minImageCount > MaxSlotCount)
throw exception("Requesting swapchain with higher image count ({}) than maximum slot count ({})", minImageCount, MaxSlotCount);
const auto &capabilities{vkSurfaceCapabilities};
if (imageCount < capabilities.minImageCount || (capabilities.maxImageCount && imageCount > capabilities.maxImageCount))
throw exception("Cannot update swapchain to accomodate image count: {} ({}-{})", imageCount, capabilities.minImageCount, capabilities.maxImageCount);
if (capabilities.minImageExtent.height > imageExtent.height || capabilities.minImageExtent.width > imageExtent.width || capabilities.maxImageExtent.height < imageExtent.height || capabilities.maxImageExtent.width < imageExtent.width)
throw exception("Cannot update swapchain to accomodate image extent: {}x{} ({}x{}-{}x{})", imageExtent.width, imageExtent.height, capabilities.minImageExtent.width, capabilities.minImageExtent.height, capabilities.maxImageExtent.width, capabilities.maxImageExtent.height);
if (minImageCount < capabilities.minImageCount || (capabilities.maxImageCount && minImageCount > capabilities.maxImageCount))
throw exception("Cannot update swapchain to accomodate image count: {} ({}-{})", minImageCount, capabilities.minImageCount, capabilities.maxImageCount);
else if (capabilities.minImageExtent.height > extent.height || capabilities.minImageExtent.width > extent.width || capabilities.maxImageExtent.height < extent.height || capabilities.maxImageExtent.width < extent.width)
throw exception("Cannot update swapchain to accomodate image extent: {}x{} ({}x{}-{}x{})", extent.width, extent.height, capabilities.minImageExtent.width, capabilities.minImageExtent.height, capabilities.maxImageExtent.width, capabilities.maxImageExtent.height);
if (swapchain.imageFormat != imageFormat || newSurface) {
if (swapchainFormat != format) {
auto formats{gpu.vkPhysicalDevice.getSurfaceFormatsKHR(**vkSurface)};
if (std::find(formats.begin(), formats.end(), vk::SurfaceFormatKHR{imageFormat, vk::ColorSpaceKHR::eSrgbNonlinear}) == formats.end())
throw exception("Surface doesn't support requested image format '{}' with colorspace '{}'", vk::to_string(imageFormat), vk::to_string(vk::ColorSpaceKHR::eSrgbNonlinear));
if (std::find(formats.begin(), formats.end(), vk::SurfaceFormatKHR{format, vk::ColorSpaceKHR::eSrgbNonlinear}) == formats.end())
throw exception("Surface doesn't support requested image format '{}' with colorspace '{}'", vk::to_string(format), vk::to_string(vk::ColorSpaceKHR::eSrgbNonlinear));
}
constexpr vk::ImageUsageFlags presentUsage{vk::ImageUsageFlagBits::eColorAttachment | vk::ImageUsageFlagBits::eTransferSrc | vk::ImageUsageFlagBits::eTransferDst};
if ((capabilities.supportedUsageFlags & presentUsage) != presentUsage)
throw exception("Swapchain doesn't support image usage '{}': {}", vk::to_string(presentUsage), vk::to_string(capabilities.supportedUsageFlags));
vkSwapchain = vk::raii::SwapchainKHR(gpu.vkDevice, vk::SwapchainCreateInfoKHR{
vkSwapchain.emplace(gpu.vkDevice, vk::SwapchainCreateInfoKHR{
.surface = **vkSurface,
.minImageCount = imageCount,
.imageFormat = imageFormat,
.minImageCount = minImageCount,
.imageFormat = format,
.imageColorSpace = vk::ColorSpaceKHR::eSrgbNonlinear,
.imageExtent = imageExtent,
.imageExtent = extent,
.imageArrayLayers = 1,
.imageUsage = presentUsage,
.imageSharingMode = vk::SharingMode::eExclusive,
.compositeAlpha = vk::CompositeAlphaFlagBitsKHR::eInherit,
.presentMode = vk::PresentModeKHR::eFifo,
.clipped = false,
.oldSwapchain = vkSwapchain ? **vkSwapchain : vk::SwapchainKHR{},
.presentMode = vk::PresentModeKHR::eMailbox,
.clipped = true,
});
auto vkImages{vkSwapchain->getImages()};
for (u16 slot{}; slot < imageCount; slot++) {
auto &vkImage{vkImages[slot]};
swapchain.vkImages[slot] = vkImage;
auto &image{swapchain.textures[slot]};
if (image) {
std::scoped_lock lock(*image);
image->SwapBacking(vkImage);
image->TransitionLayout(vk::ImageLayout::ePresentSrcKHR);
image->SynchronizeHost(); // Synchronize the new host backing with guest memory
}
if (vkImages.size() > MaxSlotCount)
throw exception("Swapchain has higher image count ({}) than maximum slot count ({})", minImageCount, MaxSlotCount);
for (size_t index{}; index < vkImages.size(); index++) {
auto &slot{slots[index]};
slot = std::make_shared<Texture>(*state.gpu, vkImages[index], extent, format::GetFormat(format), vk::ImageLayout::eUndefined, vk::ImageTiling::eOptimal);
slot->TransitionLayout(vk::ImageLayout::ePresentSrcKHR);
}
swapchain.imageCount = imageCount;
swapchain.imageFormat = imageFormat;
swapchain.imageExtent = imageExtent;
for (size_t index{vkImages.size()}; index < MaxSlotCount; index++)
slots[index] = {};
swapchainFormat = format;
swapchainExtent = extent;
}
void PresentationEngine::UpdateSurface(jobject newSurface) {
@ -110,18 +130,7 @@ namespace skyline::gpu {
if (!env->IsSameObject(newSurface, nullptr))
jSurface = env->NewGlobalRef(newSurface);
if (vkSwapchain) {
for (u16 slot{}; slot < swapchain.imageCount; slot++) {
auto &image{swapchain.textures[slot]};
if (image) {
std::scoped_lock lock(*image);
image->SynchronizeGuest(); // Synchronize host backing to guest memory prior to being destroyed
image->SwapBacking(nullptr);
}
}
swapchain.vkImages = {};
vkSwapchain.reset();
}
vkSwapchain.reset();
if (jSurface) {
vkSurface.emplace(gpu.vkInstance, vk::AndroidSurfaceCreateInfoKHR{
@ -131,7 +140,8 @@ namespace skyline::gpu {
throw exception("Vulkan Queue doesn't support presentation with surface");
vkSurfaceCapabilities = gpu.vkPhysicalDevice.getSurfaceCapabilitiesKHR(**vkSurface);
UpdateSwapchain(swapchain.imageCount, swapchain.imageFormat, swapchain.imageExtent, true);
if (swapchainExtent && swapchainFormat)
UpdateSwapchain(swapchainFormat, swapchainExtent);
surfaceCondition.notify_all();
} else {
@ -139,60 +149,32 @@ namespace skyline::gpu {
}
}
std::shared_ptr<Texture> PresentationEngine::CreatePresentationTexture(const std::shared_ptr<GuestTexture> &texture, u8 slot) {
std::lock_guard guard(mutex);
if (swapchain.imageCount <= slot && slot + 1 >= vkSurfaceCapabilities.minImageCount)
UpdateSwapchain(slot + 1, texture->format.vkFormat, texture->dimensions);
auto host{texture->InitializeTexture(swapchain.vkImages.at(slot), vk::ImageTiling::eOptimal)};
swapchain.textures[slot] = host;
return host;
}
service::hosbinder::AndroidStatus PresentationEngine::GetFreeTexture(bool async, i32 &slot) {
using AndroidStatus = service::hosbinder::AndroidStatus;
void PresentationEngine::Present(const std::shared_ptr<Texture> &texture, u64 presentId) {
std::unique_lock lock(mutex);
surfaceCondition.wait(lock, [this]() { return vkSurface.has_value(); });
if (swapchain.dequeuedCount < swapchain.imageCount) {
static vk::raii::Fence fence(gpu.vkDevice, vk::FenceCreateInfo{});
auto timeout{async ? 0ULL : std::numeric_limits<u64>::max()}; // We cannot block for a buffer to be retrieved in async mode
auto nextImage{vkSwapchain->acquireNextImage(timeout, {}, *fence)};
if (nextImage.first == vk::Result::eSuccess) {
swapchain.dequeuedCount++;
while (gpu.vkDevice.waitForFences(*fence, true, std::numeric_limits<u64>::max()) == vk::Result::eTimeout);
slot = nextImage.second;
return AndroidStatus::Ok;
} else if (nextImage.first == vk::Result::eNotReady || nextImage.first == vk::Result::eTimeout) {
return AndroidStatus::WouldBlock;
} else if (nextImage.first == vk::Result::eSuboptimalKHR) {
if (texture->format != swapchainFormat || texture->dimensions != swapchainExtent)
UpdateSwapchain(texture->format, texture->dimensions);
std::pair<vk::Result, u32> nextImage;
while ((nextImage = vkSwapchain->acquireNextImage(std::numeric_limits<u64>::max(), {}, *acquireFence)).first != vk::Result::eSuccess) [[unlikely]]
if (nextImage.first == vk::Result::eSuboptimalKHR)
surfaceCondition.wait(lock, [this]() { return vkSurface.has_value(); });
return GetFreeTexture(async, slot);
} else {
throw exception("VkAcquireNextImageKHR returned an unhandled result '{}'", vk::to_string(nextImage.first));
}
}
return AndroidStatus::Busy;
}
else
throw exception("vkAcquireNextImageKHR returned an unhandled result '{}'", vk::to_string(nextImage.first));
while (gpu.vkDevice.waitForFences(*acquireFence, true, std::numeric_limits<u64>::max()) == vk::Result::eTimeout);
void PresentationEngine::Present(u32 slot) {
std::unique_lock lock(mutex);
surfaceCondition.wait(lock, [this]() { return vkSurface.has_value(); });
if (--swapchain.dequeuedCount < 0) [[unlikely]] {
throw exception("Swapchain has been presented more times than images from it have been acquired: {} (Image Count: {})", swapchain.dequeuedCount, swapchain.imageCount);
}
slots.at(nextImage.second)->CopyFrom(texture);
{
std::lock_guard queueLock(gpu.queueMutex);
static_cast<void>(gpu.vkQueue.presentKHR(vk::PresentInfoKHR{
.swapchainCount = 1,
.pSwapchains = &**vkSwapchain,
.pImageIndices = &slot,
.pImageIndices = &nextImage.second,
})); // We explicitly discard the result here as suboptimal images are expected when the game doesn't respect the transform hint
}
vsyncEvent->Signal();
if (frameTimestamp) {
auto now{util::GetTimeNs()};
FrameTime = static_cast<u32>((now - frameTimestamp) / 10000); // frametime / 100 is the real ms value, this is to retain the first two decimals

View File

@ -3,6 +3,7 @@
#pragma once
#include <android/looper.h>
#include <common/trace.h>
#include <kernel/types/KEvent.h>
#include <services/hosbinder/GraphicBufferProducer.h>
@ -27,31 +28,31 @@ namespace skyline::gpu {
vk::SurfaceCapabilitiesKHR vkSurfaceCapabilities; //!< The capabilities of the current Vulkan Surface
std::optional<vk::raii::SwapchainKHR> vkSwapchain; //!< The Vulkan swapchain and the properties associated with it
struct SwapchainContext {
std::array<std::shared_ptr<Texture>, service::hosbinder::GraphicBufferProducer::MaxSlotCount> textures{};
std::array<VkImage, service::hosbinder::GraphicBufferProducer::MaxSlotCount> vkImages{VK_NULL_HANDLE};
u8 imageCount{};
i8 dequeuedCount{};
vk::Format imageFormat{};
vk::Extent2D imageExtent{};
vk::raii::Fence acquireFence; //!< A fence for acquiring an image from the swapchain
texture::Format swapchainFormat{}; //!< The image format of the textures in the current swapchain
texture::Dimensions swapchainExtent{}; //!< The extent of images in the current swapchain
static_assert(std::numeric_limits<decltype(imageCount)>::max() >= service::hosbinder::GraphicBufferProducer::MaxSlotCount);
static_assert(std::numeric_limits<decltype(dequeuedCount)>::max() >= service::hosbinder::GraphicBufferProducer::MaxSlotCount);
} swapchain; //!< The properties of the currently created swapchain
static constexpr size_t MaxSlotCount{6}; //!< The maximum amount of queue slots, this affects the amount of images that can be in the swapchain
std::array<std::shared_ptr<Texture>, MaxSlotCount> slots; //!< The backing for storing all slots and sorted in the same order as supplied by the Vulkan swapchain
u64 frameTimestamp{}; //!< The timestamp of the last frame being shown
perfetto::Track presentationTrack; //!< Perfetto track used for presentation events
std::thread choreographerThread; //!< A thread for signalling the V-Sync event using AChoreographer
ALooper* choreographerLooper{}; //!< The looper object associated with the Choreographer thread
/**
* @brief The entry point for the the Choreographer thread, the function runs ALooper on the thread
*/
void ChoreographerThread();
/**
* @note 'PresentationEngine::mutex' **must** be locked prior to calling this
*/
void UpdateSwapchain(u16 imageCount, vk::Format imageFormat, vk::Extent2D imageExtent, bool newSurface = false);
void UpdateSwapchain(texture::Format format, texture::Dimensions extent);
public:
texture::Dimensions resolution{};
i32 format{};
std::shared_ptr<kernel::type::KEvent> vsyncEvent; //!< Signalled every time a frame is drawn
std::shared_ptr<kernel::type::KEvent> bufferEvent; //!< Signalled every time a buffer is freed
PresentationEngine(const DeviceState &state, GPU &gpu);
@ -63,20 +64,11 @@ namespace skyline::gpu {
void UpdateSurface(jobject newSurface);
/**
* @brief Creates a Texture object from a GuestTexture as a part of the Vulkan swapchain
* @brief Queue the supplied texture to be presented to the screen
* @param presentId A UUID used to tag this frame for presentation timing readouts
* @note The texture **must** be locked prior to calling this
*/
std::shared_ptr<Texture> CreatePresentationTexture(const std::shared_ptr<GuestTexture> &texture, u8 slot);
/**
* @param async If to return immediately when a texture is not available
* @param slot The slot the freed texture is in is written into this, it is untouched if there's an error
*/
service::hosbinder::AndroidStatus GetFreeTexture(bool async, i32 &slot);
/**
* @brief Send a texture from a slot to the presentation queue to be displayed
*/
void Present(u32 slot);
void Present(const std::shared_ptr<Texture> &texture, u64 presentId);
/**
* @return A transform that the application should render with to elide costly transforms later

View File

@ -10,4 +10,18 @@ namespace skyline::gpu::format {
constexpr Format RGBA8888Unorm{sizeof(u8) * 4, 1, 1, vk::Format::eR8G8B8A8Unorm}; //!< 8-bits per channel 4-channel pixels
constexpr Format RGB565Unorm{sizeof(u8) * 2, 1, 1, vk::Format::eR5G6B5UnormPack16}; //!< Red channel: 5-bit, Green channel: 6-bit, Blue channel: 5-bit
/**
* @brief Converts a format from Vulkan to a Skyline format
*/
constexpr const Format &GetFormat(vk::Format format) {
switch (format) {
case vk::Format::eR8G8B8A8Unorm:
return RGBA8888Unorm;
case vk::Format::eR5G6B5UnormPack16:
return RGB565Unorm;
default:
throw exception("Vulkan format not supported: '{}'", vk::to_string(format));
}
}
}

View File

@ -7,29 +7,76 @@
#include "texture.h"
namespace skyline::gpu {
GuestTexture::GuestTexture(const DeviceState &state, u8 *pointer, texture::Dimensions dimensions, texture::Format format, texture::TileMode tiling, texture::TileConfig layout) : state(state), pointer(pointer), dimensions(dimensions), format(format), tileMode(tiling), tileConfig(layout) {}
GuestTexture::GuestTexture(const DeviceState &state, u8 *pointer, texture::Dimensions dimensions, const texture::Format &format, texture::TileMode tiling, texture::TileConfig layout) : state(state), pointer(pointer), dimensions(dimensions), format(format), tileMode(tiling), tileConfig(layout) {}
std::shared_ptr<Texture> GuestTexture::InitializeTexture(vk::Image backing, std::optional<vk::ImageTiling> tiling, vk::ImageLayout pLayout, std::optional<texture::Format> pFormat, std::optional<texture::Dimensions> pDimensions, texture::Swizzle swizzle) {
std::shared_ptr<Texture> GuestTexture::InitializeTexture(vk::Image backing, texture::Dimensions pDimensions, const texture::Format &pFormat, std::optional<vk::ImageTiling> tiling, vk::ImageLayout layout, texture::Swizzle swizzle) {
if (!host.expired())
throw exception("Trying to create multiple Texture objects from a single GuestTexture");
auto sharedHost{std::make_shared<Texture>(*state.gpu, backing, pLayout, shared_from_this(), pDimensions ? *pDimensions : dimensions, pFormat ? *pFormat : format, tiling ? *tiling : (tileMode == texture::TileMode::Block) ? vk::ImageTiling::eOptimal : vk::ImageTiling::eLinear, swizzle)};
auto sharedHost{std::make_shared<Texture>(*state.gpu, backing, shared_from_this(), pDimensions ? pDimensions : dimensions, pFormat ? pFormat : format, layout, tiling ? *tiling : (tileMode == texture::TileMode::Block) ? vk::ImageTiling::eOptimal : vk::ImageTiling::eLinear, swizzle)};
host = sharedHost;
return sharedHost;
}
std::shared_ptr<Texture> GuestTexture::InitializeTexture(vk::raii::Image &&backing, std::optional<vk::ImageTiling> tiling, vk::ImageLayout pLayout, std::optional<texture::Format> pFormat, std::optional<texture::Dimensions> pDimensions, texture::Swizzle swizzle) {
std::shared_ptr<Texture> GuestTexture::InitializeTexture(vk::raii::Image &&backing, std::optional<vk::ImageTiling> tiling, vk::ImageLayout layout, const texture::Format &pFormat, texture::Dimensions pDimensions, texture::Swizzle swizzle) {
if (!host.expired())
throw exception("Trying to create multiple Texture objects from a single GuestTexture");
auto sharedHost{std::make_shared<Texture>(*state.gpu, std::move(backing), pLayout, shared_from_this(), pDimensions ? *pDimensions : dimensions, pFormat ? *pFormat : format, tiling ? *tiling : (tileMode == texture::TileMode::Block) ? vk::ImageTiling::eOptimal : vk::ImageTiling::eLinear, swizzle)};
auto sharedHost{std::make_shared<Texture>(*state.gpu, std::move(backing), shared_from_this(), pDimensions ? pDimensions : dimensions, pFormat ? pFormat : format, layout, tiling ? *tiling : (tileMode == texture::TileMode::Block) ? vk::ImageTiling::eOptimal : vk::ImageTiling::eLinear, swizzle)};
host = sharedHost;
return sharedHost;
}
Texture::Texture(GPU &gpu, BackingType &&backing, vk::ImageLayout layout, std::shared_ptr<GuestTexture> guest, texture::Dimensions dimensions, texture::Format format, vk::ImageTiling tiling, vk::ComponentMapping mapping) : gpu(gpu), backing(std::move(backing)), layout(layout), guest(std::move(guest)), dimensions(dimensions), format(format), tiling(tiling), mapping(mapping) {
std::shared_ptr<Texture> GuestTexture::CreateTexture(vk::ImageUsageFlags usage, std::optional<vk::ImageTiling> pTiling, vk::ImageLayout initialLayout, const texture::Format &pFormat, texture::Dimensions pDimensions, texture::Swizzle swizzle) {
if (!host.expired())
throw exception("Trying to create multiple Texture objects from a single GuestTexture");
pDimensions = pDimensions ? pDimensions : dimensions;
const auto &lFormat{pFormat ? pFormat : format};
auto tiling{pTiling ? *pTiling : (tileMode == texture::TileMode::Block) ? vk::ImageTiling::eOptimal : vk::ImageTiling::eLinear};
vk::ImageCreateInfo imageCreateInfo{
.imageType = pDimensions.GetType(),
.format = lFormat,
.extent = pDimensions,
.mipLevels = 1,
.arrayLayers = 1,
.samples = vk::SampleCountFlagBits::e1,
.tiling = tiling,
.usage = usage | vk::ImageUsageFlagBits::eTransferSrc | vk::ImageUsageFlagBits::eTransferDst,
.sharingMode = vk::SharingMode::eExclusive,
.queueFamilyIndexCount = 1,
.pQueueFamilyIndices = &state.gpu->vkQueueFamilyIndex,
.initialLayout = initialLayout,
};
auto sharedHost{std::make_shared<Texture>(*state.gpu, tiling != vk::ImageTiling::eLinear ? state.gpu->memory.AllocateImage(imageCreateInfo) : state.gpu->memory.AllocateMappedImage(imageCreateInfo), shared_from_this(), pDimensions, lFormat, initialLayout, tiling, swizzle)};
host = sharedHost;
return sharedHost;
}
Texture::Texture(GPU &gpu, BackingType &&backing, std::shared_ptr<GuestTexture> guest, texture::Dimensions dimensions, const texture::Format &format, vk::ImageLayout layout, vk::ImageTiling tiling, vk::ComponentMapping mapping) : gpu(gpu), backing(std::move(backing)), layout(layout), guest(std::move(guest)), dimensions(dimensions), format(format), tiling(tiling), mapping(mapping) {
if (GetBacking())
SynchronizeHost();
}
Texture::Texture(GPU &gpu, BackingType &&backing, texture::Dimensions dimensions, const texture::Format &format, vk::ImageLayout layout, vk::ImageTiling tiling, vk::ComponentMapping mapping) : gpu(gpu), backing(std::move(backing)), guest(nullptr), dimensions(dimensions), format(format), layout(layout), tiling(tiling), mapping(mapping) {}
Texture::Texture(GPU &gpu, texture::Dimensions dimensions, const texture::Format &format, vk::ImageLayout initialLayout, vk::ImageUsageFlags usage, vk::ImageTiling tiling, vk::ComponentMapping mapping) : gpu(gpu), guest(nullptr), dimensions(dimensions), format(format), layout(initialLayout), tiling(tiling), mapping(mapping) {
vk::ImageCreateInfo imageCreateInfo{
.imageType = dimensions.GetType(),
.format = format,
.extent = dimensions,
.mipLevels = 1,
.arrayLayers = 1,
.samples = vk::SampleCountFlagBits::e1,
.tiling = tiling,
.usage = usage | vk::ImageUsageFlagBits::eTransferSrc | vk::ImageUsageFlagBits::eTransferDst,
.sharingMode = vk::SharingMode::eExclusive,
.queueFamilyIndexCount = 1,
.pQueueFamilyIndices = &gpu.vkQueueFamilyIndex,
.initialLayout = initialLayout,
};
backing = tiling != vk::ImageTiling::eLinear ? gpu.memory.AllocateImage(imageCreateInfo) : gpu.memory.AllocateMappedImage(imageCreateInfo);
}
bool Texture::WaitOnBacking() {
if (GetBacking()) [[likely]] {
return false;
@ -83,13 +130,23 @@ namespace skyline::gpu {
}
void Texture::SynchronizeHost() {
if (!guest)
throw exception("Synchronization of host textures requires a valid guest texture to synchronize from");
TRACE_EVENT("gpu", "Texture::SynchronizeHost");
auto pointer{guest->pointer};
auto size{format.GetSize(dimensions)};
auto stagingBuffer{[&]() {
if (tiling == vk::ImageTiling::eOptimal) {
return gpu.memory.AllocateStagingBuffer(size);
u8 *bufferData;
auto stagingBuffer{[&]() -> std::shared_ptr<memory::StagingBuffer> {
if (tiling == vk::ImageTiling::eOptimal || !std::holds_alternative<memory::Image>(backing)) {
auto stagingBuffer{gpu.memory.AllocateStagingBuffer(size)};
bufferData = stagingBuffer->data();
return stagingBuffer;
} else if (tiling == vk::ImageTiling::eLinear) {
bufferData = std::get<memory::Image>(backing).data();
WaitOnFence();
return nullptr;
} else {
throw exception("Guest -> Host synchronization of images tiled as '{}' isn't implemented", vk::to_string(tiling));
}
@ -112,7 +169,7 @@ namespace skyline::gpu {
auto gobYOffset{robWidthBytes * gobHeight}; // The offset of the next Y-axis GOB from the current one in linear space
auto inputSector{pointer}; // The address of the input sector
auto outputRob{stagingBuffer->data()}; // The address of the output block
auto outputRob{bufferData}; // The address of the output block
for (u32 rob{}, y{}, paddingY{}; rob < surfaceHeightRobs; rob++) { // Every Surface contains `surfaceHeightRobs` ROBs
auto outputBlock{outputRob}; // We iterate through a block independently of the ROB
@ -141,7 +198,7 @@ namespace skyline::gpu {
auto sizeStride{guest->format.GetSize(guest->tileConfig.pitch, 1)}; // The size of a single stride of pixel data
auto inputLine{pointer}; // The address of the input line
auto outputLine{stagingBuffer->data()}; // The address of the output line
auto outputLine{bufferData}; // The address of the output line
for (u32 line{}; line < dimensions.height; line++) {
std::memcpy(outputLine, inputLine, sizeLine);
@ -149,18 +206,113 @@ namespace skyline::gpu {
outputLine += sizeLine;
}
} else if (guest->tileMode == texture::TileMode::Linear) {
std::memcpy(stagingBuffer->data(), pointer, size);
std::memcpy(bufferData, pointer, size);
}
if (WaitOnBacking() && size != format.GetSize(dimensions))
throw exception("Backing properties changing during sync is not supported");
if (stagingBuffer) {
if (WaitOnBacking() && size != format.GetSize(dimensions))
throw exception("Backing properties changing during sync is not supported");
WaitOnFence();
cycle = gpu.scheduler.Submit([&](vk::raii::CommandBuffer &commandBuffer) {
auto image{GetBacking()};
if (layout != vk::ImageLayout::eTransferDstOptimal) {
commandBuffer.pipelineBarrier(layout != vk::ImageLayout::eUndefined ? vk::PipelineStageFlagBits::eTopOfPipe : vk::PipelineStageFlagBits::eBottomOfPipe, vk::PipelineStageFlagBits::eTransfer, {}, {}, {}, vk::ImageMemoryBarrier{
.image = image,
.srcAccessMask = vk::AccessFlagBits::eMemoryRead | vk::AccessFlagBits::eMemoryWrite,
.dstAccessMask = vk::AccessFlagBits::eTransferWrite,
.oldLayout = layout,
.newLayout = vk::ImageLayout::eTransferDstOptimal,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.subresourceRange = {
.aspectMask = vk::ImageAspectFlagBits::eColor,
.levelCount = 1,
.layerCount = 1,
},
});
if (layout == vk::ImageLayout::eUndefined)
layout = vk::ImageLayout::eTransferDstOptimal;
}
commandBuffer.copyBufferToImage(stagingBuffer->vkBuffer, image, vk::ImageLayout::eTransferDstOptimal, vk::BufferImageCopy{
.imageExtent = dimensions,
.imageSubresource = {
.aspectMask = vk::ImageAspectFlagBits::eColor,
.layerCount = 1,
},
});
if (layout != vk::ImageLayout::eTransferDstOptimal)
commandBuffer.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, vk::PipelineStageFlagBits::eTransfer, {}, {}, {}, vk::ImageMemoryBarrier{
.image = image,
.srcAccessMask = vk::AccessFlagBits::eTransferWrite,
.dstAccessMask = vk::AccessFlagBits::eMemoryRead,
.oldLayout = vk::ImageLayout::eTransferDstOptimal,
.newLayout = layout,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.subresourceRange = {
.aspectMask = vk::ImageAspectFlagBits::eColor,
.levelCount = 1,
.layerCount = 1,
},
});
});
cycle->AttachObject(stagingBuffer);
}
}
void Texture::SynchronizeGuest() {
if (!guest)
throw exception("Synchronization of guest textures requires a valid guest texture to synchronize to");
WaitOnBacking();
WaitOnFence();
TRACE_EVENT("gpu", "Texture::SynchronizeGuest");
// TODO: Write Host -> Guest Synchronization
}
void Texture::CopyFrom(std::shared_ptr<Texture> source) {
WaitOnBacking();
WaitOnFence();
source->WaitOnBacking();
source->WaitOnFence();
if (source->layout == vk::ImageLayout::eUndefined)
throw exception("Cannot copy from image with undefined layout");
else if (source->dimensions != dimensions)
throw exception("Cannot copy from image with different dimensions");
else if (source->format != format)
throw exception("Cannot copy from image with different format");
cycle = gpu.scheduler.Submit([&](vk::raii::CommandBuffer &commandBuffer) {
auto image{GetBacking()};
auto sourceBacking{source->GetBacking()};
if (source->layout != vk::ImageLayout::eTransferSrcOptimal) {
commandBuffer.pipelineBarrier(vk::PipelineStageFlagBits::eTopOfPipe, vk::PipelineStageFlagBits::eTransfer, {}, {}, {}, vk::ImageMemoryBarrier{
.image = sourceBacking,
.srcAccessMask = vk::AccessFlagBits::eMemoryRead | vk::AccessFlagBits::eMemoryWrite,
.dstAccessMask = vk::AccessFlagBits::eTransferRead,
.oldLayout = source->layout,
.newLayout = vk::ImageLayout::eTransferSrcOptimal,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.subresourceRange = {
.aspectMask = vk::ImageAspectFlagBits::eColor,
.levelCount = 1,
.layerCount = 1,
},
});
}
auto destinationBacking{GetBacking()};
if (layout != vk::ImageLayout::eTransferDstOptimal) {
commandBuffer.pipelineBarrier(layout != vk::ImageLayout::eUndefined ? vk::PipelineStageFlagBits::eTopOfPipe : vk::PipelineStageFlagBits::eBottomOfPipe, vk::PipelineStageFlagBits::eTransfer, {}, {}, {}, vk::ImageMemoryBarrier{
.image = image,
.image = destinationBacking,
.srcAccessMask = vk::AccessFlagBits::eMemoryRead | vk::AccessFlagBits::eMemoryWrite,
.dstAccessMask = vk::AccessFlagBits::eTransferWrite,
.oldLayout = layout,
@ -178,17 +330,21 @@ namespace skyline::gpu {
layout = vk::ImageLayout::eTransferDstOptimal;
}
commandBuffer.copyBufferToImage(stagingBuffer->vkBuffer, image, vk::ImageLayout::eTransferDstOptimal, vk::BufferImageCopy{
.imageExtent = dimensions,
.imageSubresource = {
commandBuffer.copyImage(sourceBacking, vk::ImageLayout::eTransferSrcOptimal, destinationBacking, vk::ImageLayout::eTransferDstOptimal, vk::ImageCopy{
.srcSubresource = {
.aspectMask = vk::ImageAspectFlagBits::eColor,
.layerCount = 1,
},
.dstSubresource = {
.aspectMask = vk::ImageAspectFlagBits::eColor,
.layerCount = 1,
},
.extent = dimensions,
});
if (layout != vk::ImageLayout::eTransferDstOptimal)
commandBuffer.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, vk::PipelineStageFlagBits::eTransfer, {}, {}, {}, vk::ImageMemoryBarrier{
.image = image,
.image = destinationBacking,
.srcAccessMask = vk::AccessFlagBits::eTransferWrite,
.dstAccessMask = vk::AccessFlagBits::eMemoryRead,
.oldLayout = vk::ImageLayout::eTransferDstOptimal,
@ -201,16 +357,23 @@ namespace skyline::gpu {
.layerCount = 1,
},
});
if (layout != vk::ImageLayout::eTransferSrcOptimal)
commandBuffer.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, vk::PipelineStageFlagBits::eTransfer, {}, {}, {}, vk::ImageMemoryBarrier{
.image = sourceBacking,
.srcAccessMask = vk::AccessFlagBits::eTransferRead,
.dstAccessMask = vk::AccessFlagBits::eMemoryWrite,
.oldLayout = vk::ImageLayout::eTransferSrcOptimal,
.newLayout = source->layout,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.subresourceRange = {
.aspectMask = vk::ImageAspectFlagBits::eColor,
.levelCount = 1,
.layerCount = 1,
},
});
});
cycle->AttachObject(stagingBuffer);
}
void Texture::SynchronizeGuest() {
WaitOnBacking();
WaitOnFence();
TRACE_EVENT("gpu", "Texture::SynchronizeGuest");
// TODO: Write Host -> Guest Synchronization
cycle->AttachObject(source);
}
}

View File

@ -14,13 +14,19 @@ namespace skyline::gpu {
constexpr Dimensions() : width(0), height(0), depth(0) {}
constexpr Dimensions(u32 width) : width(width), height(1), depth(1) {}
constexpr Dimensions(u32 width, u32 height) : width(width), height(height), depth(1) {}
constexpr Dimensions(u32 width, u32 height, u32 depth) : width(width), height(height), depth(depth) {}
constexpr Dimensions(vk::Extent2D extent) : Dimensions(extent.width, extent.height) {}
constexpr Dimensions(vk::Extent3D extent) : Dimensions(extent.width, extent.height, extent.depth) {}
auto operator<=>(const Dimensions &) const = default;
vk::ImageType GetType() {
constexpr vk::ImageType GetType() const {
if (depth)
return vk::ImageType::e3D;
else if (width)
@ -29,32 +35,39 @@ namespace skyline::gpu {
return vk::ImageType::e1D;
}
operator vk::Extent2D() {
constexpr operator vk::Extent2D() const {
return vk::Extent2D{
.width = width,
.height = height,
};
}
operator vk::Extent3D() {
constexpr operator vk::Extent3D() const {
return vk::Extent3D{
.width = width,
.height = height,
.depth = depth,
};
}
/**
* @return If the dimensions are valid and don't equate to zero
*/
constexpr operator bool() const {
return width && height && depth;
}
};
/**
* @note Blocks refers to the atomic unit of a compressed format (IE: The minimum amount of data that can be decompressed)
*/
struct Format {
u8 bpb; //!< Bytes Per Block, this is used instead of bytes per pixel as that might not be a whole number for compressed formats
u16 blockHeight; //!< The height of a block in pixels
u16 blockWidth; //!< The width of a block in pixels
vk::Format vkFormat;
u8 bpb{}; //!< Bytes Per Block, this is used instead of bytes per pixel as that might not be a whole number for compressed formats
u16 blockHeight{}; //!< The height of a block in pixels
u16 blockWidth{}; //!< The width of a block in pixels
vk::Format vkFormat{vk::Format::eUndefined};
constexpr bool IsCompressed() {
constexpr bool IsCompressed() const {
return (blockHeight != 1) || (blockWidth != 1);
}
@ -64,26 +77,30 @@ namespace skyline::gpu {
* @param depth The depth of the texture in layers
* @return The size of the texture in bytes
*/
constexpr size_t GetSize(u32 width, u32 height, u32 depth = 1) {
constexpr size_t GetSize(u32 width, u32 height, u32 depth = 1) const {
return (((width / blockWidth) * (height / blockHeight)) * bpb) * depth;
}
constexpr size_t GetSize(Dimensions dimensions) {
constexpr size_t GetSize(Dimensions dimensions) const {
return GetSize(dimensions.width, dimensions.height, dimensions.depth);
}
constexpr bool operator==(const Format &format) {
constexpr bool operator==(const Format &format) const {
return vkFormat == format.vkFormat;
}
constexpr bool operator!=(const Format &format) {
constexpr bool operator!=(const Format &format) const {
return vkFormat != format.vkFormat;
}
constexpr operator vk::Format() const {
return vkFormat;
}
/**
* @return If this format is actually valid or not
*/
constexpr operator bool() {
constexpr operator bool() const {
return bpb;
}
};
@ -171,7 +188,7 @@ namespace skyline::gpu {
texture::TileMode tileMode;
texture::TileConfig tileConfig;
GuestTexture(const DeviceState &state, u8 *pointer, texture::Dimensions dimensions, texture::Format format, texture::TileMode tileMode = texture::TileMode::Linear, texture::TileConfig tileConfig = {});
GuestTexture(const DeviceState &state, u8 *pointer, texture::Dimensions dimensions, const texture::Format& format, texture::TileMode tileMode = texture::TileMode::Linear, texture::TileConfig tileConfig = {});
constexpr size_t Size() {
return format.GetSize(dimensions);
@ -180,32 +197,39 @@ namespace skyline::gpu {
/**
* @brief Creates a corresponding host texture object for this guest texture
* @param backing The Vulkan Image that is used as the backing on the host, its lifetime is not managed by the host texture object
* @param dimensions The dimensions of the host texture (Defaults to the dimensions of the host texture)
* @param format The format of the host texture (Defaults to the format of the guest texture)
* @param tiling The tiling used by the image on host, this is the same as guest by default
* @param layout The initial layout of the Vulkan Image, this is used for efficient layout management
* @param format The format of the host texture (Defaults to the format of the guest texture)
* @param dimensions The dimensions of the host texture (Defaults to the dimensions of the host texture)
* @param swizzle The channel swizzle of the host texture (Defaults to no channel swizzling)
* @return A shared pointer to the host texture object
* @note There can only be one host texture for a corresponding guest texture
* @note If any of the supplied parameters do not match up with the backing then it's undefined behavior
*/
std::shared_ptr<Texture> InitializeTexture(vk::Image backing, std::optional<vk::ImageTiling> tiling = std::nullopt, vk::ImageLayout layout = vk::ImageLayout::eUndefined, std::optional<texture::Format> format = std::nullopt, std::optional<texture::Dimensions> dimensions = std::nullopt, texture::Swizzle swizzle = {});
std::shared_ptr<Texture> InitializeTexture(vk::Image backing, texture::Dimensions dimensions = {}, const texture::Format& format = {}, std::optional<vk::ImageTiling> tiling = std::nullopt, vk::ImageLayout layout = vk::ImageLayout::eUndefined, texture::Swizzle swizzle = {});
/**
* @note As a RAII object is used here, the lifetime of the backing is handled by the host texture
*/
std::shared_ptr<Texture> InitializeTexture(vk::raii::Image &&backing, std::optional<vk::ImageTiling> tiling = std::nullopt, vk::ImageLayout layout = vk::ImageLayout::eUndefined, std::optional<texture::Format> format = std::nullopt, std::optional<texture::Dimensions> dimensions = std::nullopt, texture::Swizzle swizzle = {});
std::shared_ptr<Texture> InitializeTexture(vk::raii::Image &&backing, std::optional<vk::ImageTiling> tiling = std::nullopt, vk::ImageLayout layout = vk::ImageLayout::eUndefined, const texture::Format& format = {}, texture::Dimensions dimensions = {}, texture::Swizzle swizzle = {});
/**
* @brief Similar to InitializeTexture but creation of the backing and allocation of memory for the backing is automatically performed by the function
* @param usage Usage flags that will applied aside from VK_IMAGE_USAGE_TRANSFER_SRC_BIT/VK_IMAGE_USAGE_TRANSFER_DST_BIT which are mandatory
*/
std::shared_ptr<Texture> CreateTexture(vk::ImageUsageFlags usage = {}, std::optional<vk::ImageTiling> tiling = std::nullopt, vk::ImageLayout initialLayout = vk::ImageLayout::eGeneral, const texture::Format& format = {}, texture::Dimensions dimensions = {}, texture::Swizzle swizzle = {});
};
/**
* @brief A texture which is backed by host constructs while being synchronized with the underlying guest texture
* @note This class conforms to the Lockable and BasicLockable C++ named requirements
*/
class Texture {
class Texture : public FenceCycleDependency {
private:
GPU &gpu;
std::mutex mutex; //!< Synchronizes any mutations to the texture or its backing
std::condition_variable backingCondition; //!< Signalled when a valid backing has been swapped in
using BackingType = std::variant<vk::Image, vk::raii::Image>;
using BackingType = std::variant<vk::Image, vk::raii::Image, memory::Image>;
BackingType backing; //!< The Vulkan image that backs this texture, it is nullable
std::shared_ptr<FenceCycle> cycle; //!< A fence cycle for when any host operation mutating the texture has completed, it must be waited on prior to any mutations to the backing
vk::ImageLayout layout;
@ -217,17 +241,26 @@ namespace skyline::gpu {
return std::visit(VariantVisitor{
[](vk::Image image) { return image; },
[](const vk::raii::Image &image) { return *image; },
[](const memory::Image &image) { return image.vkImage; },
}, backing);
}
public:
std::shared_ptr<GuestTexture> guest; //!< The guest texture from which this was created, it's required for syncing and not nullable
std::shared_ptr<GuestTexture> guest; //!< The guest texture from which this was created, it's required for syncing
texture::Dimensions dimensions;
texture::Format format;
vk::ImageTiling tiling;
vk::ComponentMapping mapping;
Texture(GPU &gpu, BackingType &&backing, vk::ImageLayout layout, std::shared_ptr<GuestTexture> guest, texture::Dimensions dimensions, texture::Format format, vk::ImageTiling tiling, vk::ComponentMapping mapping);
Texture(GPU &gpu, BackingType &&backing, std::shared_ptr<GuestTexture> guest, texture::Dimensions dimensions, const texture::Format& format, vk::ImageLayout layout, vk::ImageTiling tiling, vk::ComponentMapping mapping);
Texture(GPU &gpu, BackingType &&backing, texture::Dimensions dimensions, const texture::Format& format, vk::ImageLayout layout, vk::ImageTiling tiling, vk::ComponentMapping mapping = {});
/**
* @brief Creates and allocates memory for the backing to creates a texture object wrapping it
* @param usage Usage flags that will applied aside from VK_IMAGE_USAGE_TRANSFER_SRC_BIT/VK_IMAGE_USAGE_TRANSFER_DST_BIT which are mandatory
*/
Texture(GPU &gpu, texture::Dimensions dimensions, const texture::Format& format, vk::ImageLayout initialLayout = vk::ImageLayout::eGeneral, vk::ImageUsageFlags usage = {}, vk::ImageTiling tiling = vk::ImageTiling::eOptimal, vk::ComponentMapping mapping = {});
/**
* @brief Acquires an exclusive lock on the texture for the calling thread
@ -300,13 +333,20 @@ namespace skyline::gpu {
/**
* @brief Synchronizes the host texture with the guest after it has been modified
* @note The texture **must** be locked prior to calling this
* @note The guest texture should not be null prior to calling this
*/
void SynchronizeHost();
/**
* @brief Synchronizes the guest texture with the host texture after it has been modified
* @note The texture **must** be locked prior to calling this
* @note The guest texture should not be null prior to calling this
*/
void SynchronizeGuest();
/**
* @brief Copies the contents of the supplied source texture into the current texture
*/
void CopyFrom(std::shared_ptr<Texture> source);
};
}

View File

@ -14,7 +14,7 @@
#include "GraphicBufferProducer.h"
namespace skyline::service::hosbinder {
GraphicBufferProducer::GraphicBufferProducer(const DeviceState &state) : state(state) {}
GraphicBufferProducer::GraphicBufferProducer(const DeviceState &state) : state(state), bufferEvent(std::make_shared<kernel::type::KEvent>(state, true)) {}
u8 GraphicBufferProducer::GetPendingBufferCount() {
u8 count{};
@ -45,36 +45,53 @@ namespace skyline::service::hosbinder {
return AndroidStatus::BadValue;
}
constexpr i32 invalidGraphicBufferSlot{-1}; //!< https://cs.android.com/android/platform/superproject/+/android-5.1.1_r38:frameworks/native/include/gui/BufferQueueCore.h;l=61
slot = invalidGraphicBufferSlot;
constexpr i32 InvalidGraphicBufferSlot{-1}; //!< https://cs.android.com/android/platform/superproject/+/android-5.1.1_r38:frameworks/native/include/gui/BufferQueueCore.h;l=61
slot = InvalidGraphicBufferSlot;
std::lock_guard guard(mutex);
auto result{state.gpu->presentation.GetFreeTexture(async, slot)};
if (result != AndroidStatus::Ok) [[unlikely]] {
if (result == AndroidStatus::Busy)
state.logger->Warn("No free buffers to dequeue");
return result;
auto buffer{queue.end()};
while (true) {
size_t dequeuedSlotCount{};
for (auto it{queue.begin()}; it != queue.end(); it++) {
// We want to select the oldest slot that's free to use as we'd want all slots to be used
// If we go linearly then we have a higher preference for selecting the former slots and being out of order
if (it->state == BufferState::Free && it->texture) {
if (buffer == queue.end() || it->frameNumber < buffer->frameNumber)
buffer = it;
else if (it->state == BufferState::Dequeued)
dequeuedSlotCount++;
}
}
if (buffer != queue.end()) {
slot = std::distance(queue.begin(), buffer);
break;
} else if (async) {
return AndroidStatus::WouldBlock;
} else if (dequeuedSlotCount == queue.size()) {
state.logger->Warn("Client attempting to dequeue more buffers when all buffers are dequeued by the client: {}", dequeuedSlotCount);
return AndroidStatus::InvalidOperation;
}
}
width = width ? width : defaultWidth;
height = height ? height : defaultHeight;
format = (format != AndroidPixelFormat::None) ? format : defaultFormat;
auto &buffer{queue.at(slot)};
if (!buffer.graphicBuffer) {
if (!buffer->graphicBuffer) {
// Horizon OS doesn't ever allocate memory for the buffers on the GraphicBufferProducer end
// All buffers must be preallocated on the client application and attached to an Android buffer using SetPreallocatedBuffer
return AndroidStatus::NoMemory;
}
auto &surface{buffer.graphicBuffer->graphicHandle.surfaces.front()};
if (buffer.graphicBuffer->format != format || surface.width != width || surface.height != height || (buffer.graphicBuffer->usage & usage) != usage) {
state.logger->Warn("Buffer which has been dequeued isn't compatible with the supplied parameters: Dimensions: {}x{}={}x{}, Format: {}={}, Usage: 0x{:X}=0x{:X}", width, height, surface.width, surface.height, ToString(format), ToString(buffer.graphicBuffer->format), usage, buffer.graphicBuffer->usage);
auto &surface{buffer->graphicBuffer->graphicHandle.surfaces.front()};
if (buffer->graphicBuffer->format != format || surface.width != width || surface.height != height || (buffer->graphicBuffer->usage & usage) != usage) {
state.logger->Warn("Buffer which has been dequeued isn't compatible with the supplied parameters: Dimensions: {}x{}={}x{}, Format: {}={}, Usage: 0x{:X}=0x{:X}", width, height, surface.width, surface.height, ToString(format), ToString(buffer->graphicBuffer->format), usage, buffer->graphicBuffer->usage);
// Nintendo doesn't deallocate the slot which was picked in here and reallocate it as a compatible buffer
// This is related to the comment above, Nintendo only allocates buffers on the client side
return AndroidStatus::NoInit;
}
buffer.state = BufferState::Dequeued;
buffer->state = BufferState::Dequeued;
fence = AndroidFence{}; // We just let the presentation engine return a buffer which is ready to be written into, there is no need for further synchronization
state.logger->Debug("#{} - Dimensions: {}x{}, Format: {}, Usage: 0x{:X}, Is Async: {}", slot, width, height, ToString(format), usage, async);
@ -106,7 +123,7 @@ namespace skyline::service::hosbinder {
return AndroidStatus::BadValue;
} else if (!buffer.wasBufferRequested) [[unlikely]] {
state.logger->Warn("#{} was queued prior to being requested", slot);
return AndroidStatus::BadValue;
buffer.wasBufferRequested = true; // Switch ignores this and doesn't return an error, certain homebrew ends up depending on this behavior
}
auto graphicBuffer{*buffer.graphicBuffer};
@ -139,13 +156,16 @@ namespace skyline::service::hosbinder {
fence.Wait(state.soc->host1x);
{
std::scoped_lock textureLock(*buffer.texture);
buffer.texture->SynchronizeHost();
buffer.texture->WaitOnFence();
state.gpu->presentation.Present(slot);
state.gpu->presentation.bufferEvent->Signal();
auto &texture{buffer.texture};
std::scoped_lock textureLock(*texture);
texture->SynchronizeHost();
state.gpu->presentation.Present(texture, ++frameNumber);
}
buffer.frameNumber = frameNumber;
buffer.state = BufferState::Free;
bufferEvent->Signal();
width = defaultWidth;
height = defaultHeight;
transformHint = state.gpu->presentation.GetTransformHint();
@ -169,11 +189,10 @@ namespace skyline::service::hosbinder {
}
fence.Wait(state.soc->host1x);
state.gpu->presentation.Present(slot); // We use a present as a way to free the buffer so that it can be acquired in dequeueBuffer again
buffer.state = BufferState::Free;
buffer.frameNumber = 0;
state.gpu->presentation.bufferEvent->Signal();
bufferEvent->Signal();
state.logger->Debug("#{}", slot);
}
@ -349,7 +368,7 @@ namespace skyline::service::hosbinder {
throw exception("Surface doesn't fit into NvMap mapping of size 0x{:X} when mapped at 0x{:X} -> 0x{:X}", nvBuffer->size, surface.offset, surface.offset + surface.size);
gpu::texture::TileMode tileMode;
gpu::texture::TileConfig tileConfig;
gpu::texture::TileConfig tileConfig{};
if (surface.layout == NvSurfaceLayout::Blocklinear) {
tileMode = gpu::texture::TileMode::Block;
tileConfig = {
@ -373,11 +392,11 @@ namespace skyline::service::hosbinder {
buffer.frameNumber = 0;
buffer.wasBufferRequested = false;
buffer.graphicBuffer = std::make_unique<GraphicBuffer>(graphicBuffer);
buffer.texture = state.gpu->presentation.CreatePresentationTexture(texture, slot);
buffer.texture = texture->CreateTexture({}, vk::ImageTiling::eLinear, vk::ImageLayout::eGeneral);
activeSlotCount = hasBufferCount = std::count_if(queue.begin(), queue.end(), [](const BufferSlot &slot) { return static_cast<bool>(slot.graphicBuffer); });
state.gpu->presentation.bufferEvent->Signal();
bufferEvent->Signal();
state.logger->Debug("#{} - Dimensions: {}x{} [Stride: {}], Format: {}, Layout: {}, {}: {}, Usage: 0x{:X}, NvMap {}: {}, Buffer Start/End: 0x{:X} -> 0x{:X}", slot, surface.width, surface.height, handle.stride, ToString(graphicBuffer.format), ToString(surface.layout), surface.layout == NvSurfaceLayout::Blocklinear ? "Block Height" : "Pitch", surface.layout == NvSurfaceLayout::Blocklinear ? 1U << surface.blockHeightLog2 : surface.pitch, graphicBuffer.usage, surface.nvmapHandle ? "Handle" : "ID", surface.nvmapHandle ? surface.nvmapHandle : handle.nvmapId, surface.offset, surface.offset + surface.size);
return AndroidStatus::Ok;

View File

@ -5,6 +5,7 @@
#pragma once
#include <kernel/types/KEvent.h>
#include <services/common/parcel.h>
#include "android_types.h"
#include "native_window.h"
@ -80,12 +81,10 @@ namespace skyline::service::hosbinder {
* @url https://cs.android.com/android/platform/superproject/+/android-5.1.1_r38:frameworks/native/libs/gui/BufferQueueCore.cpp
*/
class GraphicBufferProducer {
public:
constexpr static u8 MaxSlotCount{16}; //!< The maximum amount of buffer slots that a buffer queue can hold, Android supports 64 but they go unused for applications like games so we've lowered this to 16 (https://cs.android.com/android/platform/superproject/+/android-5.1.1_r38:frameworks/native/include/gui/BufferQueueDefs.h;l=29)
private:
const DeviceState &state;
std::mutex mutex; //!< Synchronizes access to the buffer queue
constexpr static u8 MaxSlotCount{16}; //!< The maximum amount of buffer slots that a buffer queue can hold, Android supports 64 but they go unused for applications like games so we've lowered this to 16 (https://cs.android.com/android/platform/superproject/+/android-5.1.1_r38:frameworks/native/include/gui/BufferQueueDefs.h;l=29)
std::array<BufferSlot, MaxSlotCount> queue;
u8 activeSlotCount{2}; //!< The amount of slots in the queue that can be used
u8 hasBufferCount{}; //!< The amount of slots with buffers attached in the queue
@ -93,6 +92,7 @@ namespace skyline::service::hosbinder {
u32 defaultHeight{1}; //!< The assumed height of a buffer if none is supplied in DequeueBuffer
AndroidPixelFormat defaultFormat{AndroidPixelFormat::RGBA8888}; //!< The assumed format of a buffer if none is supplied in DequeueBuffer
NativeWindowApi connectedApi{NativeWindowApi::None}; //!< The API that the producer is currently connected to
u64 frameNumber{}; //!< The amount of frames that have been presented so far
/**
* @return The amount of buffers which have been queued onto the consumer
@ -156,6 +156,7 @@ namespace skyline::service::hosbinder {
AndroidStatus SetPreallocatedBuffer(i32 slot, const GraphicBuffer &graphicBuffer);
public:
std::shared_ptr<kernel::type::KEvent> bufferEvent; //!< Signalled every time a buffer in the queue is freed
DisplayId displayId{DisplayId::Null}; //!< The ID of this display
LayerStatus layerStatus{LayerStatus::Uninitialized}; //!< The status of the single layer the display has

View File

@ -40,7 +40,7 @@ namespace skyline::service::hosbinder {
}
Result IHOSBinderDriver::GetNativeHandle(type::KSession &session, ipc::IpcRequest &request, ipc::IpcResponse &response) {
KHandle handle{state.process->InsertItem(state.gpu->presentation.bufferEvent)};
KHandle handle{state.process->InsertItem(producer->bufferEvent)};
state.logger->Debug("Display Buffer Event Handle: 0x{:X}", handle);
response.copyHandles.push_back(handle);

View File

@ -45,12 +45,18 @@ namespace skyline::service::hosbinder {
/**
* @brief Nvidia and Nintendo's Android fence implementation, this significantly differs from the Android implementation (All FDs are inlined as integers rather than explicitly passed as FDs) but is a direct replacement
* @url https://cs.android.com/android/platform/superproject/+/android-5.1.1_r38:frameworks/native/include/ui/Fence.h
* @url https://cs.android.com/android/platform/superproject/+/android-5.1.1_r38:frameworks/native/libs/ui/Fence.cpp
*/
struct AndroidFence {
u32 fenceCount{}; //!< The amount of active fences in the array
std::array<nvdrv::Fence, 4> fences{}; //!< Nvidia's Android fence can hold a maximum of 4 fence FDs
AndroidFence() : fenceCount(0) {}
static constexpr u32 InvalidFenceId{0xFFFFFFFF}; //!< A magic value for the syncpoint ID of invalid fences (https://cs.android.com/android/platform/superproject/+/android-5.1.1_r38:frameworks/native/include/ui/Fence.h;l=61)
/**
* @url https://cs.android.com/android/platform/superproject/+/android-5.1.1_r38:frameworks/native/libs/ui/Fence.cpp;l=34-36
*/
AndroidFence() : fenceCount(0), fences({InvalidFenceId}) {}
/**
* @brief Wait on all native fences in this Android fence till they're signalled
@ -59,8 +65,8 @@ namespace skyline::service::hosbinder {
if (fenceCount > fences.size())
throw exception("Wait has larger fence count ({}) than storage size ({})", fenceCount, fences.size());
for (auto it{fences.begin()}, end{fences.begin() + fenceCount}; it < end; it++)
if (!host1x.syncpoints.at(it->id).Wait(it->value, std::chrono::steady_clock::duration::max()))
throw exception("Waiting on native fence #{} (Host1X Syncpoint: {}) has timed out", std::distance(fences.begin(), it), it->id);
if (it->id != InvalidFenceId)
host1x.syncpoints.at(it->id).Wait(it->value, std::chrono::steady_clock::duration::max());
}
};

View File

@ -7,7 +7,7 @@
namespace skyline::service::visrv {
/**
* @brief This is used to access the display
* @brief This is used by applications to access the display
* @url https://switchbrew.org/wiki/Display_services#IApplicationDisplayService
*/
class IApplicationDisplayService : public IDisplayService {

View File

@ -46,8 +46,6 @@ namespace skyline::soc::host1x {
std::condition_variable cv;
bool flag{};
if (timeout == std::chrono::steady_clock::duration::max())
timeout = std::chrono::seconds(1);
if (!RegisterWaiter(threshold, [&cv, &mtx, &flag] {
std::unique_lock lock(mtx);
flag = true;
@ -58,7 +56,12 @@ namespace skyline::soc::host1x {
}
std::unique_lock lock(mtx);
return cv.wait_for(lock, timeout, [&flag] { return flag; });
if (timeout == std::chrono::steady_clock::duration::max()) {
cv.wait(lock, [&flag] { return flag; });
return true;
} else {
return cv.wait_for(lock, timeout, [&flag] { return flag; });
}
}
}

View File

@ -47,6 +47,7 @@ namespace skyline::soc::host1x {
/**
* @brief Waits for the syncpoint to reach given threshold
* @return If the wait was successful (true) or timed out (false)
* @note Guaranteed to succeed when 'steady_clock::duration::max()' is used
*/
bool Wait(u32 threshold, std::chrono::steady_clock::duration timeout);
};