Dynamically apply GPU turbo clocks only when GPU submissions are queued

Allows for the GPU to clock down in cases where it's idle for most of the time, while still forcing maximum clocks when we care.
This commit is contained in:
Billy Laws 2022-12-28 20:36:52 +00:00
parent 81f3ff348c
commit 28b2a7a8a1
6 changed files with 27 additions and 10 deletions

View File

@ -43,6 +43,7 @@ namespace skyline {
executorSlotCountScale = ktSettings.GetInt<u32>("executorSlotCountScale"); executorSlotCountScale = ktSettings.GetInt<u32>("executorSlotCountScale");
executorFlushThreshold = ktSettings.GetInt<u32>("executorFlushThreshold"); executorFlushThreshold = ktSettings.GetInt<u32>("executorFlushThreshold");
useDirectMemoryImport = ktSettings.GetBool("useDirectMemoryImport"); useDirectMemoryImport = ktSettings.GetBool("useDirectMemoryImport");
forceMaxGpuClocks = ktSettings.GetBool("forceMaxGpuClocks");
enableFastGpuReadbackHack = ktSettings.GetBool("enableFastGpuReadbackHack"); enableFastGpuReadbackHack = ktSettings.GetBool("enableFastGpuReadbackHack");
isAudioOutputDisabled = ktSettings.GetBool("isAudioOutputDisabled"); isAudioOutputDisabled = ktSettings.GetBool("isAudioOutputDisabled");
validationLayer = ktSettings.GetBool("validationLayer"); validationLayer = ktSettings.GetBool("validationLayer");

View File

@ -75,6 +75,7 @@ namespace skyline {
Setting<u32> executorSlotCountScale; //!< Number of GPU executor slots that can be used concurrently Setting<u32> executorSlotCountScale; //!< Number of GPU executor slots that can be used concurrently
Setting<u32> executorFlushThreshold; //!< Number of commands that need to accumulate before they're flushed to the GPU Setting<u32> executorFlushThreshold; //!< Number of commands that need to accumulate before they're flushed to the GPU
Setting<bool> useDirectMemoryImport; //!< If buffer emulation should be done by importing guest buffer mappings Setting<bool> useDirectMemoryImport; //!< If buffer emulation should be done by importing guest buffer mappings
Setting<bool> forceMaxGpuClocks; //!< If the GPU should be forced to run at maximum clocks
// Hacks // Hacks
Setting<bool> enableFastGpuReadbackHack; //!< If the CPU texture readback skipping hack should be used Setting<bool> enableFastGpuReadbackHack; //!< If the CPU texture readback skipping hack should be used

View File

@ -2,6 +2,7 @@
// Copyright © 2021 Skyline Team and Contributors (https://github.com/skyline-emu/) // Copyright © 2021 Skyline Team and Contributors (https://github.com/skyline-emu/)
#include <range/v3/view.hpp> #include <range/v3/view.hpp>
#include <adrenotools/driver.h>
#include <common/settings.h> #include <common/settings.h>
#include <loader/loader.h> #include <loader/loader.h>
#include <gpu.h> #include <gpu.h>
@ -195,13 +196,29 @@ namespace skyline::gpu::interconnect {
void ExecutionWaiterThread::Run() { void ExecutionWaiterThread::Run() {
signal::SetSignalHandler({SIGSEGV}, nce::NCE::HostSignalHandler); // We may access NCE trapped memory signal::SetSignalHandler({SIGSEGV}, nce::NCE::HostSignalHandler); // We may access NCE trapped memory
// Enable turbo clocks to begin with if requested
if (*state.settings->forceMaxGpuClocks)
adrenotools_set_turbo(true);
while (true) { while (true) {
std::pair<std::shared_ptr<FenceCycle>, std::function<void()>> item{}; std::pair<std::shared_ptr<FenceCycle>, std::function<void()>> item{};
{ {
std::unique_lock lock{mutex}; std::unique_lock lock{mutex};
if (pendingSignalQueue.empty()) {
idle = true; idle = true;
// Don't force turbo clocks when the GPU is idle
if (*state.settings->forceMaxGpuClocks)
adrenotools_set_turbo(false);
condition.wait(lock, [this] { return !pendingSignalQueue.empty(); }); condition.wait(lock, [this] { return !pendingSignalQueue.empty(); });
// Once we have work to do, force turbo clocks is enabled
if (*state.settings->forceMaxGpuClocks)
adrenotools_set_turbo(true);
idle = false; idle = false;
}
item = std::move(pendingSignalQueue.front()); item = std::move(pendingSignalQueue.front());
pendingSignalQueue.pop(); pendingSignalQueue.pop();
} }
@ -216,7 +233,7 @@ namespace skyline::gpu::interconnect {
} }
} }
ExecutionWaiterThread::ExecutionWaiterThread() : thread{&ExecutionWaiterThread::Run, this} {} ExecutionWaiterThread::ExecutionWaiterThread(const DeviceState &state) : state{state}, thread{&ExecutionWaiterThread::Run, this} {}
bool ExecutionWaiterThread::IsIdle() const { bool ExecutionWaiterThread::IsIdle() const {
return idle; return idle;
@ -232,6 +249,7 @@ namespace skyline::gpu::interconnect {
: state{state}, : state{state},
gpu{*state.gpu}, gpu{*state.gpu},
recordThread{state}, recordThread{state},
waiterThread{state},
tag{AllocateTag()} { tag{AllocateTag()} {
RotateRecordSlot(); RotateRecordSlot();
} }

View File

@ -97,6 +97,7 @@ namespace skyline::gpu::interconnect {
*/ */
class ExecutionWaiterThread { class ExecutionWaiterThread {
private: private:
const DeviceState &state;
std::thread thread; std::thread thread;
std::mutex mutex; std::mutex mutex;
std::condition_variable condition; std::condition_variable condition;
@ -106,7 +107,7 @@ namespace skyline::gpu::interconnect {
void Run(); void Run();
public: public:
ExecutionWaiterThread(); ExecutionWaiterThread(const DeviceState &state);
bool IsIdle() const; bool IsIdle() const;

View File

@ -256,8 +256,6 @@ class EmulationActivity : AppCompatActivity(), SurfaceHolder.Callback, View.OnTo
force60HzRefreshRate(!preferenceSettings.maxRefreshRate) force60HzRefreshRate(!preferenceSettings.maxRefreshRate)
getSystemService<DisplayManager>()?.registerDisplayListener(this, null) getSystemService<DisplayManager>()?.registerDisplayListener(this, null)
if (preferenceSettings.forceMaxGpuClocks)
GpuDriverHelper.forceMaxGpuClocks(true)
binding.gameView.setOnTouchListener(this) binding.gameView.setOnTouchListener(this)
@ -291,9 +289,6 @@ class EmulationActivity : AppCompatActivity(), SurfaceHolder.Callback, View.OnTo
override fun onResume() { override fun onResume() {
super.onResume() super.onResume()
if (preferenceSettings.forceMaxGpuClocks)
GpuDriverHelper.forceMaxGpuClocks(true)
changeAudioStatus(true) changeAudioStatus(true)
if (Build.VERSION.SDK_INT <= Build.VERSION_CODES.R) { if (Build.VERSION.SDK_INT <= Build.VERSION_CODES.R) {

View File

@ -29,6 +29,7 @@ class NativeSettings(context : Context, pref : PreferenceSettings) {
var executorSlotCountScale : Int = pref.executorSlotCountScale var executorSlotCountScale : Int = pref.executorSlotCountScale
var executorFlushThreshold : Int = pref.executorFlushThreshold var executorFlushThreshold : Int = pref.executorFlushThreshold
var useDirectMemoryImport : Boolean = pref.useDirectMemoryImport var useDirectMemoryImport : Boolean = pref.useDirectMemoryImport
var forceMaxGpuClocks : Boolean = pref.forceMaxGpuClocks
// Hacks // Hacks
var enableFastGpuReadbackHack : Boolean = pref.enableFastGpuReadbackHack var enableFastGpuReadbackHack : Boolean = pref.enableFastGpuReadbackHack