From 576bc6f37eef0c2a76e09940a133e154bcacf961 Mon Sep 17 00:00:00 2001 From: Billy Laws Date: Sun, 16 Oct 2022 20:50:06 +0100 Subject: [PATCH] Add CommandExecutor slot count setting --- .../cpp/skyline/common/android_settings.h | 1 + app/src/main/cpp/skyline/common/settings.h | 1 + app/src/main/cpp/skyline/gpu/fence_cycle.h | 4 +--- .../gpu/interconnect/command_executor.cpp | 24 +++++++++++++++---- .../gpu/interconnect/command_executor.h | 10 ++++---- .../java/emu/skyline/utils/NativeSettings.kt | 1 + .../emu/skyline/utils/PreferenceSettings.kt | 1 + app/src/main/res/values/strings.xml | 2 ++ app/src/main/res/xml/preferences.xml | 8 +++++++ 9 files changed, 41 insertions(+), 11 deletions(-) diff --git a/app/src/main/cpp/skyline/common/android_settings.h b/app/src/main/cpp/skyline/common/android_settings.h index 0ab69592..14472209 100644 --- a/app/src/main/cpp/skyline/common/android_settings.h +++ b/app/src/main/cpp/skyline/common/android_settings.h @@ -39,6 +39,7 @@ namespace skyline { disableFrameThrottling = ktSettings.GetBool("disableFrameThrottling"); gpuDriver = ktSettings.GetString("gpuDriver"); gpuDriverLibraryName = ktSettings.GetString("gpuDriverLibraryName"); + executorSlotCount = ktSettings.GetInt("executorSlotCount"); validationLayer = ktSettings.GetBool("validationLayer"); }; }; diff --git a/app/src/main/cpp/skyline/common/settings.h b/app/src/main/cpp/skyline/common/settings.h index 1852e689..c128bcef 100644 --- a/app/src/main/cpp/skyline/common/settings.h +++ b/app/src/main/cpp/skyline/common/settings.h @@ -71,6 +71,7 @@ namespace skyline { // GPU Setting gpuDriver; //!< The label of the GPU driver to use Setting gpuDriverLibraryName; //!< The name of the GPU driver library to use + Setting executorSlotCount; //!< Number of GPU executor slots that can be used concurrently // Debug Setting validationLayer; //!< If the vulkan validation layer is enabled diff --git a/app/src/main/cpp/skyline/gpu/fence_cycle.h b/app/src/main/cpp/skyline/gpu/fence_cycle.h index c38a30f2..04762e29 100644 --- a/app/src/main/cpp/skyline/gpu/fence_cycle.h +++ b/app/src/main/cpp/skyline/gpu/fence_cycle.h @@ -109,8 +109,6 @@ namespace skyline::gpu { lock.unlock(); chainedCycles.Iterate([&](const auto &cycle) { - if (!cycle->Find(this)) - raise(SIGTRAP); cycle->WaitSubmit(); }); lock.lock(); @@ -129,7 +127,7 @@ namespace skyline::gpu { return; } - chainedCycles.Iterate([shouldDestroy, this](auto &cycle) { + chainedCycles.Iterate([shouldDestroy](auto &cycle) { cycle->Wait(shouldDestroy); }); diff --git a/app/src/main/cpp/skyline/gpu/interconnect/command_executor.cpp b/app/src/main/cpp/skyline/gpu/interconnect/command_executor.cpp index 0cda42d9..3b8b3b7d 100644 --- a/app/src/main/cpp/skyline/gpu/interconnect/command_executor.cpp +++ b/app/src/main/cpp/skyline/gpu/interconnect/command_executor.cpp @@ -2,12 +2,17 @@ // Copyright © 2021 Skyline Team and Contributors (https://github.com/skyline-emu/) #include +#include #include #include #include "command_executor.h" namespace skyline::gpu::interconnect { - CommandRecordThread::CommandRecordThread(const DeviceState &state) : state{state}, thread{&CommandRecordThread::Run, this} {} + CommandRecordThread::CommandRecordThread(const DeviceState &state) + : state{state}, + incoming{*state.settings->executorSlotCount}, + outgoing{*state.settings->executorSlotCount}, + thread{&CommandRecordThread::Run, this} {} static vk::raii::CommandBuffer AllocateRaiiCommandBuffer(GPU &gpu, vk::raii::CommandPool &pool) { return {gpu.vkDevice, (*gpu.vkDevice).allocateCommandBuffers( @@ -31,6 +36,13 @@ namespace skyline::gpu::interconnect { semaphore{gpu.vkDevice, vk::SemaphoreCreateInfo{}}, cycle{std::make_shared(gpu.vkDevice, *fence, *semaphore, true)} {} + CommandRecordThread::Slot::Slot(Slot &&other) + : commandPool{std::move(other.commandPool)}, + commandBuffer{std::move(other.commandBuffer)}, + fence{std::move(other.fence)}, + semaphore{std::move(other.semaphore)}, + cycle{std::move(other.cycle)} {} + std::shared_ptr CommandRecordThread::Slot::Reset(GPU &gpu) { cycle->Wait(); cycle = std::make_shared(*cycle); @@ -79,7 +91,10 @@ namespace skyline::gpu::interconnect { void CommandRecordThread::Run() { auto &gpu{*state.gpu}; - std::array slots{{gpu, gpu, gpu, gpu, gpu, gpu}}; + + std::vector slots{}; + std::generate_n(std::back_inserter(slots), *state.settings->executorSlotCount, [&] () -> Slot { return gpu; }); + outgoing.AppendTranform(span(slots), [](auto &slot) { return &slot; }); if (int result{pthread_setname_np(pthread_self(), "Sky-CmdRecord")}) @@ -116,7 +131,8 @@ namespace skyline::gpu::interconnect { } CommandExecutor::CommandExecutor(const DeviceState &state) - : gpu{*state.gpu}, + : state{state}, + gpu{*state.gpu}, recordThread{state}, tag{AllocateTag()} { RotateRecordSlot(); @@ -401,7 +417,7 @@ namespace skyline::gpu::interconnect { allocator->Reset(); // Periodically clear preserve attachments just in case there are new waiters which would otherwise end up waiting forever - if ((submissionNumber % CommandRecordThread::ActiveRecordSlots * 2) == 0) { + if ((submissionNumber % (*state.settings->executorSlotCount * 2)) == 0) { preserveAttachedBuffers.clear(); preserveAttachedTextures.clear(); } diff --git a/app/src/main/cpp/skyline/gpu/interconnect/command_executor.h b/app/src/main/cpp/skyline/gpu/interconnect/command_executor.h index 92da3c47..12571bbc 100644 --- a/app/src/main/cpp/skyline/gpu/interconnect/command_executor.h +++ b/app/src/main/cpp/skyline/gpu/interconnect/command_executor.h @@ -15,7 +15,6 @@ namespace skyline::gpu::interconnect { */ class CommandRecordThread { public: - static constexpr size_t ActiveRecordSlots{6}; //!< Maximum number of simultaneously active slots /** * @brief Single execution slot, buffered back and forth between the GPFIFO thread and the record thread @@ -32,6 +31,8 @@ namespace skyline::gpu::interconnect { Slot(GPU &gpu); + Slot(Slot &&other); + /** * @brief Waits on the fence and resets the command buffer * @note A new fence cycle for the reset command buffer @@ -41,10 +42,10 @@ namespace skyline::gpu::interconnect { private: const DeviceState &state; - std::thread thread; + CircularQueue incoming; //!< Slots pending recording + CircularQueue outgoing; //!< Slots that have been submitted, may still be active on the GPU - CircularQueue incoming{ActiveRecordSlots}; //!< Slots pending recording - CircularQueue outgoing{ActiveRecordSlots}; //!< Slots that have been submitted, may still be active on the GPU + std::thread thread; void ProcessSlot(Slot *slot); @@ -70,6 +71,7 @@ namespace skyline::gpu::interconnect { */ class CommandExecutor { private: + const DeviceState &state; GPU &gpu; CommandRecordThread recordThread; CommandRecordThread::Slot *slot{}; diff --git a/app/src/main/java/emu/skyline/utils/NativeSettings.kt b/app/src/main/java/emu/skyline/utils/NativeSettings.kt index 89d5439a..bd60e259 100644 --- a/app/src/main/java/emu/skyline/utils/NativeSettings.kt +++ b/app/src/main/java/emu/skyline/utils/NativeSettings.kt @@ -25,6 +25,7 @@ class NativeSettings(context : Context, pref : PreferenceSettings) { // GPU var gpuDriver : String = if (pref.gpuDriver == PreferenceSettings.SYSTEM_GPU_DRIVER) "" else pref.gpuDriver var gpuDriverLibraryName : String = if (pref.gpuDriver == PreferenceSettings.SYSTEM_GPU_DRIVER) "" else GpuDriverHelper.getLibraryName(context, pref.gpuDriver) + var executorSlotCount : Int = pref.executorSlotCount // Debug var validationLayer : Boolean = BuildConfig.BUILD_TYPE != "release" && pref.validationLayer diff --git a/app/src/main/java/emu/skyline/utils/PreferenceSettings.kt b/app/src/main/java/emu/skyline/utils/PreferenceSettings.kt index ce4c91e6..8c490322 100644 --- a/app/src/main/java/emu/skyline/utils/PreferenceSettings.kt +++ b/app/src/main/java/emu/skyline/utils/PreferenceSettings.kt @@ -38,6 +38,7 @@ class PreferenceSettings @Inject constructor(@ApplicationContext private val con // GPU var gpuDriver by sharedPreferences(context, SYSTEM_GPU_DRIVER) + var executorSlotCount by sharedPreferences(context, 6) // Debug var validationLayer by sharedPreferences(context, false) diff --git a/app/src/main/res/values/strings.xml b/app/src/main/res/values/strings.xml index dd48b368..a25dd499 100644 --- a/app/src/main/res/values/strings.xml +++ b/app/src/main/res/values/strings.xml @@ -70,6 +70,8 @@ Respect Display Cutout Do not draw UI elements in the cutout area Allow UI elements to be drawn in the cutout area + Executor Slot Count + Maximum number of simultaneous GPU executions (Higher may sometimes perform better but will use more RAM) Debug Enable validation layer diff --git a/app/src/main/res/xml/preferences.xml b/app/src/main/res/xml/preferences.xml index 994c5ae8..f4a7cc0a 100644 --- a/app/src/main/res/xml/preferences.xml +++ b/app/src/main/res/xml/preferences.xml @@ -127,6 +127,14 @@ android:summaryOn="@string/respect_display_cutout_enabled" app:key="respect_display_cutout" app:title="@string/respect_display_cutout" /> +