Core: Move CountPerformanceMarker to VideoInterface to eliminate a Throttle call. PerformanceMetrics: Fixes/Cleanups.

This commit is contained in:
Jordan Woyak 2025-03-07 19:26:47 -06:00
parent 61ab662733
commit c42dab6388
6 changed files with 52 additions and 50 deletions

View File

@ -457,6 +457,8 @@ void CoreTimingManager::LogPendingEvents() const
// Should only be called from the CPU thread after the PPC clock has changed
void CoreTimingManager::AdjustEventQueueTimes(u32 new_ppc_clock, u32 old_ppc_clock)
{
g_perf_metrics.AdjustClockSpeed(m_globals.global_timer, new_ppc_clock, old_ppc_clock);
m_throttle_clock_per_sec = new_ppc_clock;
for (Event& ev : m_event_queue)

View File

@ -50,8 +50,6 @@ IPC_HLE_PERIOD: For the Wii Remote this is the call schedule:
#include "AudioCommon/Mixer.h"
#include "Common/CommonTypes.h"
#include "Common/Logging/Log.h"
#include "Common/Thread.h"
#include "Common/Timer.h"
#include "Core/Config/MainSettings.h"
#include "Core/Core.h"
@ -122,21 +120,6 @@ void SystemTimersManager::GPUSleepCallback(Core::System& system, u64 userdata, s
system_timers.m_event_type_gpu_sleeper);
}
void SystemTimersManager::PerfTrackerCallback(Core::System& system, u64 userdata, s64 cycles_late)
{
auto& core_timing = system.GetCoreTiming();
// Throttle for accurate performance metrics.
core_timing.Throttle(core_timing.GetTicks() - cycles_late);
g_perf_metrics.CountPerformanceMarker(system, cycles_late);
// Call this performance tracker again in 1/100th of a second.
// The tracker stores 256 values so this will let us summarize the last 2.56 seconds.
// The performance metrics require this to be called at 100hz for the speed% is correct.
auto& system_timers = system.GetSystemTimers();
core_timing.ScheduleEvent(system_timers.GetTicksPerSecond() / 100 - cycles_late,
system_timers.m_event_type_perf_tracker);
}
void SystemTimersManager::VICallback(Core::System& system, u64 userdata, s64 cycles_late)
{
auto& core_timing = system.GetCoreTiming();
@ -293,10 +276,8 @@ void SystemTimersManager::Init()
m_event_type_ipc_hle =
core_timing.RegisterEvent("IPC_HLE_UpdateCallback", IPC_HLE_UpdateCallback);
m_event_type_gpu_sleeper = core_timing.RegisterEvent("GPUSleeper", GPUSleepCallback);
m_event_type_perf_tracker = core_timing.RegisterEvent("PerfTracker", PerfTrackerCallback);
m_event_type_patch_engine = core_timing.RegisterEvent("PatchEngine", PatchEngineCallback);
core_timing.ScheduleEvent(0, m_event_type_perf_tracker);
core_timing.ScheduleEvent(0, m_event_type_gpu_sleeper);
core_timing.ScheduleEvent(vi.GetTicksPerHalfLine(), m_event_type_vi);
core_timing.ScheduleEvent(0, m_event_type_dsp);

View File

@ -94,7 +94,6 @@ private:
static void AudioDMACallback(Core::System& system, u64 userdata, s64 cycles_late);
static void IPC_HLE_UpdateCallback(Core::System& system, u64 userdata, s64 cycles_late);
static void GPUSleepCallback(Core::System& system, u64 userdata, s64 cycles_late);
static void PerfTrackerCallback(Core::System& system, u64 userdata, s64 cycles_late);
static void VICallback(Core::System& system, u64 userdata, s64 cycles_late);
static void DecrementerCallback(Core::System& system, u64 userdata, s64 cycles_late);
static void PatchEngineCallback(Core::System& system, u64 userdata, s64 cycles_late);
@ -116,7 +115,6 @@ private:
CoreTiming::EventType* m_event_type_dsp = nullptr;
CoreTiming::EventType* m_event_type_ipc_hle = nullptr;
CoreTiming::EventType* m_event_type_gpu_sleeper = nullptr;
CoreTiming::EventType* m_event_type_perf_tracker = nullptr;
// PatchEngine updates every 1/60th of a second by default
CoreTiming::EventType* m_event_type_patch_engine = nullptr;
};

View File

@ -914,6 +914,10 @@ void VideoInterfaceManager::Update(u64 ticks)
{
// Throttle before SI poll so user input is taken just before needed. (lower input latency)
core_timing.Throttle(ticks);
// This is a nice place to measure performance so we don't have to Throttle elsewhere.
g_perf_metrics.CountPerformanceMarker(ticks, m_system.GetSystemTimers().GetTicksPerSecond());
Core::UpdateInputGate(!Config::Get(Config::MAIN_INPUT_BACKGROUND_INPUT),
Config::Get(Config::MAIN_LOCK_CURSOR));
auto& si = m_system.GetSerialInterface();

View File

@ -9,10 +9,6 @@
#include <implot.h>
#include "Core/Config/GraphicsSettings.h"
#include "Core/CoreTiming.h"
#include "Core/HW/SystemTimers.h"
#include "Core/HW/VideoInterface.h"
#include "Core/System.h"
#include "VideoCommon/VideoConfig.h"
PerformanceMetrics g_perf_metrics;
@ -21,11 +17,11 @@ void PerformanceMetrics::Reset()
{
m_fps_counter.Reset();
m_vps_counter.Reset();
m_speed_counter.Reset();
m_time_sleeping = DT::zero();
m_real_times.fill(Clock::now());
m_core_ticks.fill(0);
m_samples = {};
m_speed = 0;
m_max_speed = 0;
}
@ -44,23 +40,36 @@ void PerformanceMetrics::CountThrottleSleep(DT sleep)
m_time_sleeping += sleep;
}
void PerformanceMetrics::CountPerformanceMarker(Core::System& system, s64 cycles_late)
void PerformanceMetrics::AdjustClockSpeed(s64 ticks, u32 new_ppc_clock, u32 old_ppc_clock)
{
m_speed_counter.Count();
m_speed_counter.UpdateStats();
for (auto& sample : m_samples)
{
const s64 diff = (sample.core_ticks - ticks) * new_ppc_clock / old_ppc_clock;
sample.core_ticks = ticks + diff;
}
}
const auto ticks = system.GetCoreTiming().GetTicks() - cycles_late;
const auto real_time = Clock::now() - m_time_sleeping;
void PerformanceMetrics::CountPerformanceMarker(s64 core_ticks, u32 ticks_per_second)
{
const auto clock_time = Clock::now();
const auto work_time = clock_time - m_time_sleeping;
auto& oldest_ticks = m_core_ticks[m_time_index];
auto& oldest_time = m_real_times[m_time_index];
m_samples.emplace_back(
PerfSample{.clock_time = clock_time, .work_time = work_time, .core_ticks = core_ticks});
m_max_speed = DT_s(ticks - oldest_ticks) / system.GetSystemTimers().GetTicksPerSecond() /
(real_time - oldest_time);
const auto sample_window = std::chrono::microseconds{g_ActiveConfig.iPerfSampleUSec};
while (clock_time - m_samples.front().clock_time > sample_window)
m_samples.pop_front();
oldest_ticks = ticks;
oldest_time = real_time;
++m_time_index;
// Avoid division by zero when we just have one sample.
if (m_samples.size() < 2)
return;
const PerfSample& oldest = m_samples.front();
const auto elapsed_core_time = DT_s(core_ticks - oldest.core_ticks) / ticks_per_second;
m_speed.store(elapsed_core_time / (clock_time - oldest.clock_time), std::memory_order_relaxed);
m_max_speed.store(elapsed_core_time / (work_time - oldest.work_time), std::memory_order_relaxed);
}
double PerformanceMetrics::GetFPS() const
@ -75,12 +84,12 @@ double PerformanceMetrics::GetVPS() const
double PerformanceMetrics::GetSpeed() const
{
return m_speed_counter.GetHzAvg() / 100.0;
return m_speed.load(std::memory_order_relaxed);
}
double PerformanceMetrics::GetMaxSpeed() const
{
return m_max_speed;
return m_max_speed.load(std::memory_order_relaxed);
}
void PerformanceMetrics::DrawImGuiStats(const float backbuffer_scale)

View File

@ -3,8 +3,8 @@
#pragma once
#include <array>
#include <atomic>
#include <deque>
#include "Common/CommonTypes.h"
#include "VideoCommon/PerformanceTracker.h"
@ -25,15 +25,17 @@ public:
PerformanceMetrics(PerformanceMetrics&&) = delete;
PerformanceMetrics& operator=(PerformanceMetrics&&) = delete;
// Count Functions
void Reset();
void CountFrame();
void CountVBlank();
// Call from CPU thread.
void CountThrottleSleep(DT sleep);
void CountPerformanceMarker(Core::System& system, s64 cyclesLate);
void AdjustClockSpeed(s64 ticks, u32 new_ppc_clock, u32 old_ppc_clock);
void CountPerformanceMarker(s64 ticks, u32 ticks_per_second);
// Getter Functions
// Getter Functions. May be called from any thread.
double GetFPS() const;
double GetVPS() const;
double GetSpeed() const;
@ -45,14 +47,20 @@ public:
private:
PerformanceTracker m_fps_counter{"render_times.txt"};
PerformanceTracker m_vps_counter{"vblank_times.txt"};
PerformanceTracker m_speed_counter{std::nullopt, std::chrono::seconds{1}};
double m_graph_max_time = 0.0;
std::atomic<double> m_speed{};
std::atomic<double> m_max_speed{};
u8 m_time_index = 0;
std::array<TimePoint, 256> m_real_times{};
std::array<u64, 256> m_core_ticks{};
struct PerfSample
{
TimePoint clock_time;
TimePoint work_time;
s64 core_ticks;
};
std::deque<PerfSample> m_samples;
DT m_time_sleeping{};
};