From c42dab6388de9318bed370900c3431d8231a5376 Mon Sep 17 00:00:00 2001
From: Jordan Woyak <jordan.woyak@gmail.com>
Date: Fri, 7 Mar 2025 19:26:47 -0600
Subject: [PATCH 1/2] Core: Move CountPerformanceMarker to VideoInterface to
 eliminate a Throttle call. PerformanceMetrics: Fixes/Cleanups.

---
 Source/Core/Core/CoreTiming.cpp               |  2 +
 Source/Core/Core/HW/SystemTimers.cpp          | 19 -------
 Source/Core/Core/HW/SystemTimers.h            |  2 -
 Source/Core/Core/HW/VideoInterface.cpp        |  4 ++
 .../Core/VideoCommon/PerformanceMetrics.cpp   | 51 +++++++++++--------
 Source/Core/VideoCommon/PerformanceMetrics.h  | 24 ++++++---
 6 files changed, 52 insertions(+), 50 deletions(-)

diff --git a/Source/Core/Core/CoreTiming.cpp b/Source/Core/Core/CoreTiming.cpp
index 6893bc2b91..c37bc5f03e 100644
--- a/Source/Core/Core/CoreTiming.cpp
+++ b/Source/Core/Core/CoreTiming.cpp
@@ -457,6 +457,8 @@ void CoreTimingManager::LogPendingEvents() const
 // Should only be called from the CPU thread after the PPC clock has changed
 void CoreTimingManager::AdjustEventQueueTimes(u32 new_ppc_clock, u32 old_ppc_clock)
 {
+  g_perf_metrics.AdjustClockSpeed(m_globals.global_timer, new_ppc_clock, old_ppc_clock);
+
   m_throttle_clock_per_sec = new_ppc_clock;
 
   for (Event& ev : m_event_queue)
diff --git a/Source/Core/Core/HW/SystemTimers.cpp b/Source/Core/Core/HW/SystemTimers.cpp
index 508b445288..c318ca08d4 100644
--- a/Source/Core/Core/HW/SystemTimers.cpp
+++ b/Source/Core/Core/HW/SystemTimers.cpp
@@ -50,8 +50,6 @@ IPC_HLE_PERIOD: For the Wii Remote this is the call schedule:
 
 #include "AudioCommon/Mixer.h"
 #include "Common/CommonTypes.h"
-#include "Common/Logging/Log.h"
-#include "Common/Thread.h"
 #include "Common/Timer.h"
 #include "Core/Config/MainSettings.h"
 #include "Core/Core.h"
@@ -122,21 +120,6 @@ void SystemTimersManager::GPUSleepCallback(Core::System& system, u64 userdata, s
                             system_timers.m_event_type_gpu_sleeper);
 }
 
-void SystemTimersManager::PerfTrackerCallback(Core::System& system, u64 userdata, s64 cycles_late)
-{
-  auto& core_timing = system.GetCoreTiming();
-  // Throttle for accurate performance metrics.
-  core_timing.Throttle(core_timing.GetTicks() - cycles_late);
-  g_perf_metrics.CountPerformanceMarker(system, cycles_late);
-
-  // Call this performance tracker again in 1/100th of a second.
-  // The tracker stores 256 values so this will let us summarize the last 2.56 seconds.
-  // The performance metrics require this to be called at 100hz for the speed% is correct.
-  auto& system_timers = system.GetSystemTimers();
-  core_timing.ScheduleEvent(system_timers.GetTicksPerSecond() / 100 - cycles_late,
-                            system_timers.m_event_type_perf_tracker);
-}
-
 void SystemTimersManager::VICallback(Core::System& system, u64 userdata, s64 cycles_late)
 {
   auto& core_timing = system.GetCoreTiming();
@@ -293,10 +276,8 @@ void SystemTimersManager::Init()
   m_event_type_ipc_hle =
       core_timing.RegisterEvent("IPC_HLE_UpdateCallback", IPC_HLE_UpdateCallback);
   m_event_type_gpu_sleeper = core_timing.RegisterEvent("GPUSleeper", GPUSleepCallback);
-  m_event_type_perf_tracker = core_timing.RegisterEvent("PerfTracker", PerfTrackerCallback);
   m_event_type_patch_engine = core_timing.RegisterEvent("PatchEngine", PatchEngineCallback);
 
-  core_timing.ScheduleEvent(0, m_event_type_perf_tracker);
   core_timing.ScheduleEvent(0, m_event_type_gpu_sleeper);
   core_timing.ScheduleEvent(vi.GetTicksPerHalfLine(), m_event_type_vi);
   core_timing.ScheduleEvent(0, m_event_type_dsp);
diff --git a/Source/Core/Core/HW/SystemTimers.h b/Source/Core/Core/HW/SystemTimers.h
index 8d5f051692..b0002c49ee 100644
--- a/Source/Core/Core/HW/SystemTimers.h
+++ b/Source/Core/Core/HW/SystemTimers.h
@@ -94,7 +94,6 @@ private:
   static void AudioDMACallback(Core::System& system, u64 userdata, s64 cycles_late);
   static void IPC_HLE_UpdateCallback(Core::System& system, u64 userdata, s64 cycles_late);
   static void GPUSleepCallback(Core::System& system, u64 userdata, s64 cycles_late);
-  static void PerfTrackerCallback(Core::System& system, u64 userdata, s64 cycles_late);
   static void VICallback(Core::System& system, u64 userdata, s64 cycles_late);
   static void DecrementerCallback(Core::System& system, u64 userdata, s64 cycles_late);
   static void PatchEngineCallback(Core::System& system, u64 userdata, s64 cycles_late);
@@ -116,7 +115,6 @@ private:
   CoreTiming::EventType* m_event_type_dsp = nullptr;
   CoreTiming::EventType* m_event_type_ipc_hle = nullptr;
   CoreTiming::EventType* m_event_type_gpu_sleeper = nullptr;
-  CoreTiming::EventType* m_event_type_perf_tracker = nullptr;
   // PatchEngine updates every 1/60th of a second by default
   CoreTiming::EventType* m_event_type_patch_engine = nullptr;
 };
diff --git a/Source/Core/Core/HW/VideoInterface.cpp b/Source/Core/Core/HW/VideoInterface.cpp
index 21346534ec..ee7a0889a5 100644
--- a/Source/Core/Core/HW/VideoInterface.cpp
+++ b/Source/Core/Core/HW/VideoInterface.cpp
@@ -914,6 +914,10 @@ void VideoInterfaceManager::Update(u64 ticks)
   {
     // Throttle before SI poll so user input is taken just before needed. (lower input latency)
     core_timing.Throttle(ticks);
+
+    // This is a nice place to measure performance so we don't have to Throttle elsewhere.
+    g_perf_metrics.CountPerformanceMarker(ticks, m_system.GetSystemTimers().GetTicksPerSecond());
+
     Core::UpdateInputGate(!Config::Get(Config::MAIN_INPUT_BACKGROUND_INPUT),
                           Config::Get(Config::MAIN_LOCK_CURSOR));
     auto& si = m_system.GetSerialInterface();
diff --git a/Source/Core/VideoCommon/PerformanceMetrics.cpp b/Source/Core/VideoCommon/PerformanceMetrics.cpp
index db61ea8978..321e110996 100644
--- a/Source/Core/VideoCommon/PerformanceMetrics.cpp
+++ b/Source/Core/VideoCommon/PerformanceMetrics.cpp
@@ -9,10 +9,6 @@
 #include <implot.h>
 
 #include "Core/Config/GraphicsSettings.h"
-#include "Core/CoreTiming.h"
-#include "Core/HW/SystemTimers.h"
-#include "Core/HW/VideoInterface.h"
-#include "Core/System.h"
 #include "VideoCommon/VideoConfig.h"
 
 PerformanceMetrics g_perf_metrics;
@@ -21,11 +17,11 @@ void PerformanceMetrics::Reset()
 {
   m_fps_counter.Reset();
   m_vps_counter.Reset();
-  m_speed_counter.Reset();
 
   m_time_sleeping = DT::zero();
-  m_real_times.fill(Clock::now());
-  m_core_ticks.fill(0);
+  m_samples = {};
+
+  m_speed = 0;
   m_max_speed = 0;
 }
 
@@ -44,23 +40,36 @@ void PerformanceMetrics::CountThrottleSleep(DT sleep)
   m_time_sleeping += sleep;
 }
 
-void PerformanceMetrics::CountPerformanceMarker(Core::System& system, s64 cycles_late)
+void PerformanceMetrics::AdjustClockSpeed(s64 ticks, u32 new_ppc_clock, u32 old_ppc_clock)
 {
-  m_speed_counter.Count();
-  m_speed_counter.UpdateStats();
+  for (auto& sample : m_samples)
+  {
+    const s64 diff = (sample.core_ticks - ticks) * new_ppc_clock / old_ppc_clock;
+    sample.core_ticks = ticks + diff;
+  }
+}
 
-  const auto ticks = system.GetCoreTiming().GetTicks() - cycles_late;
-  const auto real_time = Clock::now() - m_time_sleeping;
+void PerformanceMetrics::CountPerformanceMarker(s64 core_ticks, u32 ticks_per_second)
+{
+  const auto clock_time = Clock::now();
+  const auto work_time = clock_time - m_time_sleeping;
 
-  auto& oldest_ticks = m_core_ticks[m_time_index];
-  auto& oldest_time = m_real_times[m_time_index];
+  m_samples.emplace_back(
+      PerfSample{.clock_time = clock_time, .work_time = work_time, .core_ticks = core_ticks});
 
-  m_max_speed = DT_s(ticks - oldest_ticks) / system.GetSystemTimers().GetTicksPerSecond() /
-                (real_time - oldest_time);
+  const auto sample_window = std::chrono::microseconds{g_ActiveConfig.iPerfSampleUSec};
+  while (clock_time - m_samples.front().clock_time > sample_window)
+    m_samples.pop_front();
 
-  oldest_ticks = ticks;
-  oldest_time = real_time;
-  ++m_time_index;
+  // Avoid division by zero when we just have one sample.
+  if (m_samples.size() < 2)
+    return;
+
+  const PerfSample& oldest = m_samples.front();
+  const auto elapsed_core_time = DT_s(core_ticks - oldest.core_ticks) / ticks_per_second;
+
+  m_speed.store(elapsed_core_time / (clock_time - oldest.clock_time), std::memory_order_relaxed);
+  m_max_speed.store(elapsed_core_time / (work_time - oldest.work_time), std::memory_order_relaxed);
 }
 
 double PerformanceMetrics::GetFPS() const
@@ -75,12 +84,12 @@ double PerformanceMetrics::GetVPS() const
 
 double PerformanceMetrics::GetSpeed() const
 {
-  return m_speed_counter.GetHzAvg() / 100.0;
+  return m_speed.load(std::memory_order_relaxed);
 }
 
 double PerformanceMetrics::GetMaxSpeed() const
 {
-  return m_max_speed;
+  return m_max_speed.load(std::memory_order_relaxed);
 }
 
 void PerformanceMetrics::DrawImGuiStats(const float backbuffer_scale)
diff --git a/Source/Core/VideoCommon/PerformanceMetrics.h b/Source/Core/VideoCommon/PerformanceMetrics.h
index ca035b108e..7beac394dc 100644
--- a/Source/Core/VideoCommon/PerformanceMetrics.h
+++ b/Source/Core/VideoCommon/PerformanceMetrics.h
@@ -3,8 +3,8 @@
 
 #pragma once
 
-#include <array>
 #include <atomic>
+#include <deque>
 
 #include "Common/CommonTypes.h"
 #include "VideoCommon/PerformanceTracker.h"
@@ -25,15 +25,17 @@ public:
   PerformanceMetrics(PerformanceMetrics&&) = delete;
   PerformanceMetrics& operator=(PerformanceMetrics&&) = delete;
 
-  // Count Functions
   void Reset();
+
   void CountFrame();
   void CountVBlank();
 
+  // Call from CPU thread.
   void CountThrottleSleep(DT sleep);
-  void CountPerformanceMarker(Core::System& system, s64 cyclesLate);
+  void AdjustClockSpeed(s64 ticks, u32 new_ppc_clock, u32 old_ppc_clock);
+  void CountPerformanceMarker(s64 ticks, u32 ticks_per_second);
 
-  // Getter Functions
+  // Getter Functions. May be called from any thread.
   double GetFPS() const;
   double GetVPS() const;
   double GetSpeed() const;
@@ -45,14 +47,20 @@ public:
 private:
   PerformanceTracker m_fps_counter{"render_times.txt"};
   PerformanceTracker m_vps_counter{"vblank_times.txt"};
-  PerformanceTracker m_speed_counter{std::nullopt, std::chrono::seconds{1}};
 
   double m_graph_max_time = 0.0;
 
+  std::atomic<double> m_speed{};
   std::atomic<double> m_max_speed{};
-  u8 m_time_index = 0;
-  std::array<TimePoint, 256> m_real_times{};
-  std::array<u64, 256> m_core_ticks{};
+
+  struct PerfSample
+  {
+    TimePoint clock_time;
+    TimePoint work_time;
+    s64 core_ticks;
+  };
+
+  std::deque<PerfSample> m_samples;
   DT m_time_sleeping{};
 };
 

From af1f07207f18c006a94ec72f60ccf5c9b7e6716e Mon Sep 17 00:00:00 2001
From: Jordan Woyak <jordan.woyak@gmail.com>
Date: Fri, 28 Mar 2025 21:44:50 -0500
Subject: [PATCH 2/2] VideoInterface: Throttle for VBlank only when necessary.

---
 Source/Core/Core/HW/VideoInterface.cpp | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/Source/Core/Core/HW/VideoInterface.cpp b/Source/Core/Core/HW/VideoInterface.cpp
index ee7a0889a5..b03a8c5988 100644
--- a/Source/Core/Core/HW/VideoInterface.cpp
+++ b/Source/Core/Core/HW/VideoInterface.cpp
@@ -853,10 +853,14 @@ void VideoInterfaceManager::EndField(FieldType field, u64 ticks)
   if (!Config::Get(Config::GFX_HACK_EARLY_XFB_OUTPUT))
     OutputField(field, ticks);
 
-  // Note: We really only need to Throttle prior to to presentation,
-  //  but it is needed here if we want accurate "VBlank" statistics,
-  //  when using GPU-on-Thread or Early/Immediate XFB.
-  m_system.GetCoreTiming().Throttle(ticks);
+  // Note: OutputField above doesn't present when using GPU-on-Thread or Early/Immediate XFB,
+  //  giving "VBlank" measurements here poor pacing without a Throttle call.
+  // If the user actually wants the data, we'll Throttle to make the numbers nice.
+  const bool is_vblank_data_wanted = g_ActiveConfig.bShowVPS || g_ActiveConfig.bShowVTimes ||
+                                     g_ActiveConfig.bLogRenderTimeToFile ||
+                                     g_ActiveConfig.bShowGraphs;
+  if (is_vblank_data_wanted)
+    m_system.GetCoreTiming().Throttle(ticks);
 
   g_perf_metrics.CountVBlank();
   VIEndFieldEvent::Trigger();