From 46e0952e97bae2cfdecaeed88f7768d028718918 Mon Sep 17 00:00:00 2001 From: Jordan Woyak Date: Wed, 5 Mar 2025 03:26:34 -0600 Subject: [PATCH] PerformanceTracker: Use SPSCQueue and atomic to eliminate need for a mutex. Clean up some math. --- .../Core/VideoCommon/PerformanceMetrics.cpp | 4 + .../Core/VideoCommon/PerformanceTracker.cpp | 164 ++++++++---------- Source/Core/VideoCommon/PerformanceTracker.h | 38 ++-- 3 files changed, 93 insertions(+), 113 deletions(-) diff --git a/Source/Core/VideoCommon/PerformanceMetrics.cpp b/Source/Core/VideoCommon/PerformanceMetrics.cpp index 773dc44383..2a4928cd7e 100644 --- a/Source/Core/VideoCommon/PerformanceMetrics.cpp +++ b/Source/Core/VideoCommon/PerformanceMetrics.cpp @@ -48,6 +48,7 @@ void PerformanceMetrics::CountPerformanceMarker(Core::System& system, s64 cycles { std::unique_lock lock(m_time_lock); m_speed_counter.Count(); + m_speed_counter.UpdateStats(); m_real_times[m_time_index] = Clock::now() - m_time_sleeping; m_cpu_times[m_time_index] = system.GetCoreTiming().GetCPUTimePoint(cyclesLate); @@ -84,6 +85,9 @@ double PerformanceMetrics::GetLastSpeedDenominator() const void PerformanceMetrics::DrawImGuiStats(const float backbuffer_scale) { + m_vps_counter.UpdateStats(); + m_fps_counter.UpdateStats(); + const int movable_flag = Config::Get(Config::GFX_MOVABLE_PERFORMANCE_METRICS) ? ImGuiWindowFlags_None : ImGuiWindowFlags_NoMove; diff --git a/Source/Core/VideoCommon/PerformanceTracker.cpp b/Source/Core/VideoCommon/PerformanceTracker.cpp index 302adc6a57..f61f8fe63e 100644 --- a/Source/Core/VideoCommon/PerformanceTracker.cpp +++ b/Source/Core/VideoCommon/PerformanceTracker.cpp @@ -6,12 +6,12 @@ #include #include #include -#include #include #include "Common/CommonTypes.h" #include "Common/FileUtil.h" +#include "Common/MathUtil.h" #include "Core/Core.h" #include "VideoCommon/VideoConfig.h" @@ -21,14 +21,11 @@ static constexpr u64 MAX_QUALITY_GRAPH_SIZE = 1UL << 8; PerformanceTracker::PerformanceTracker(const std::optional log_name, const std::optional
sample_window_duration) - : m_on_state_changed_handle{Core::AddOnStateChangedCallback([this](Core::State state) { - if (state == Core::State::Paused) - SetPaused(true); - else if (state == Core::State::Running) - SetPaused(false); - })}, - m_log_name{log_name}, m_sample_window_duration{sample_window_duration} + : m_log_name{log_name}, m_sample_window_duration{sample_window_duration} { + m_on_state_changed_handle = + Core::AddOnStateChangedCallback([this](Core::State state) { m_is_last_time_sane = false; }); + Reset(); } @@ -39,112 +36,110 @@ PerformanceTracker::~PerformanceTracker() void PerformanceTracker::Reset() { - std::unique_lock lock{m_mutex}; + m_raw_dts.Clear(); + m_dt_queue.clear(); m_dt_total = DT::zero(); - m_dt_queue.clear(); + m_last_raw_dt = DT::zero(); m_last_time = Clock::now(); m_hz_avg = 0.0; m_dt_avg = DT::zero(); - m_dt_std = std::nullopt; + m_dt_std = DT::zero(); + m_is_last_time_sane = false; } void PerformanceTracker::Count() { - std::unique_lock lock{m_mutex}; + const TimePoint current_time{Clock::now()}; - if (m_paused) + const DT diff{current_time - m_last_time}; + m_last_time = current_time; + + if (!m_is_last_time_sane) + { + m_is_last_time_sane = true; return; + } - const DT window{GetSampleWindow()}; + m_last_raw_dt = diff; + m_raw_dts.Push(diff); +} - const TimePoint time{Clock::now()}; - const DT diff{time - m_last_time}; +void PerformanceTracker::UpdateStats() +{ + DT diff{}; + while (m_raw_dts.Pop(diff)) + HandleRawDt(diff); - m_last_time = time; + // Update Std Dev + MathUtil::RunningVariance variance; + for (auto dt : m_dt_queue) + variance.Push(DT_s(dt).count()); + m_dt_std = std::chrono::duration_cast
(DT_s(variance.PopulationStandardDeviation())); +} + +void PerformanceTracker::HandleRawDt(DT diff) +{ + if (m_dt_queue.size() == MAX_DT_QUEUE_SIZE) + PopBack(); PushFront(diff); - if (m_dt_queue.size() == MAX_DT_QUEUE_SIZE) - PopBack(); + const DT window{GetSampleWindow()}; while (m_dt_total - m_dt_queue.back() >= window) PopBack(); // Simple Moving Average Throughout the Window - m_dt_avg = m_dt_total / m_dt_queue.size(); - const double hz = DT_s(1.0) / m_dt_avg; + const DT dt_avg = m_dt_total / m_dt_queue.size(); + const double hz = DT_s(1.0) / dt_avg; + m_dt_avg = dt_avg; // Exponential Moving Average const DT_s rc = SAMPLE_RC_RATIO * std::min(window, m_dt_total); const double a = 1.0 - std::exp(-(DT_s(diff) / rc)); // Sometimes euler averages can break when the average is inf/nan - if (std::isfinite(m_hz_avg)) - m_hz_avg += a * (hz - m_hz_avg); + const auto hz_avg = m_hz_avg.load(); + if (std::isfinite(hz_avg)) + m_hz_avg = hz_avg + a * (hz - hz_avg); else m_hz_avg = hz; - m_dt_std = std::nullopt; - LogRenderTimeToFile(diff); } DT PerformanceTracker::GetSampleWindow() const { - // This reads a constant value and thus does not need a mutex return m_sample_window_duration.value_or( duration_cast
(DT_us{std::max(1, g_ActiveConfig.iPerfSampleUSec)})); } double PerformanceTracker::GetHzAvg() const { - std::shared_lock lock{m_mutex}; return m_hz_avg; } DT PerformanceTracker::GetDtAvg() const { - std::shared_lock lock{m_mutex}; return m_dt_avg; } DT PerformanceTracker::GetDtStd() const { - std::unique_lock lock{m_mutex}; - - if (m_dt_std) - return *m_dt_std; - - if (m_dt_queue.empty()) - return *(m_dt_std = DT::zero()); - - double total = 0.0; - for (auto dt : m_dt_queue) - { - double diff = DT_s(dt - m_dt_avg).count(); - total += diff * diff; - } - - // This is a weighted standard deviation - return *(m_dt_std = std::chrono::duration_cast
(DT_s(std::sqrt(total / m_dt_queue.size())))); + return m_dt_std; } DT PerformanceTracker::GetLastRawDt() const { - std::shared_lock lock{m_mutex}; - - if (m_dt_queue.empty()) - return DT::zero(); - - return m_dt_queue.front(); + return m_last_raw_dt; } void PerformanceTracker::ImPlotPlotLines(const char* label) const { - static std::array x, y; - - std::shared_lock lock{m_mutex}; + // "quality" graph uses twice as many points. + static_assert(MAX_QUALITY_GRAPH_SIZE * 2 <= MAX_DT_QUEUE_SIZE); + static std::array x, y; if (m_dt_queue.empty()) return; @@ -152,38 +147,32 @@ void PerformanceTracker::ImPlotPlotLines(const char* label) const // Decides if there are too many points to plot using rectangles const bool quality = m_dt_queue.size() < MAX_QUALITY_GRAPH_SIZE; - const DT update_time = Clock::now() - m_last_time; - const float predicted_frame_time = DT_ms(std::max(update_time, m_dt_queue.front())).count(); - - std::size_t points = 0; - if (quality) - { - x[points] = 0.f; - y[points] = predicted_frame_time; - ++points; - } - - x[points] = DT_ms(update_time).count(); - y[points] = predicted_frame_time; - ++points; - - for (auto dt : m_dt_queue) - { - const float frame_time_ms = DT_ms(dt).count(); + std::size_t point_index = 0; + const auto add_point = [&](DT dt, DT shift_x, float prev_ms) { + const float ms = DT_ms{dt}.count(); if (quality) { - x[points] = x[points - 1]; - y[points] = frame_time_ms; - ++points; + x[point_index] = prev_ms; + y[point_index] = ms; + ++point_index; } - x[points] = x[points - 1] + frame_time_ms; - y[points] = frame_time_ms; - ++points; - } + x[point_index] = prev_ms + DT_ms{shift_x}.count(); + y[point_index] = ms; + ++point_index; + }; - ImPlot::PlotLine(label, x.data(), y.data(), static_cast(points)); + // Rightmost point. + const auto update_time = Clock::now() - m_last_time; + const auto predicted_frame_time = std::max(update_time, m_dt_queue.front()); + add_point(predicted_frame_time, DT{}, 0); + + // Other points, right to left. + for (auto dt : m_dt_queue) + add_point(dt, dt, x[point_index - 1]); + + ImPlot::PlotLine(label, x.data(), y.data(), static_cast(point_index)); } void PerformanceTracker::PushFront(DT value) @@ -211,18 +200,3 @@ void PerformanceTracker::LogRenderTimeToFile(DT val) m_bench_file << std::fixed << std::setprecision(8) << DT_ms(val).count() << std::endl; } - -void PerformanceTracker::SetPaused(bool paused) -{ - std::unique_lock lock{m_mutex}; - - m_paused = paused; - if (m_paused) - { - m_last_time = TimePoint::max(); - } - else - { - m_last_time = Clock::now(); - } -} diff --git a/Source/Core/VideoCommon/PerformanceTracker.h b/Source/Core/VideoCommon/PerformanceTracker.h index 8f39994d18..e45b5aab4e 100644 --- a/Source/Core/VideoCommon/PerformanceTracker.h +++ b/Source/Core/VideoCommon/PerformanceTracker.h @@ -3,12 +3,13 @@ #pragma once +#include #include #include #include -#include #include "Common/CommonTypes.h" +#include "Common/SPSCQueue.h" class PerformanceTracker { @@ -22,30 +23,30 @@ public: PerformanceTracker(PerformanceTracker&&) = delete; PerformanceTracker& operator=(PerformanceTracker&&) = delete; - // Functions for recording performance information void Reset(); + + // Calls must come from the same thread. + // UpdateStats is expected to be called regularly to empty the SPSC queue. + void UpdateStats(); + void ImPlotPlotLines(const char* label) const; + + // May call from any thread, but not concurrently, not that you'd want to.. void Count(); - // Functions for reading performance information + // May call from any thread. DT GetSampleWindow() const; - double GetHzAvg() const; - DT GetDtAvg() const; DT GetDtStd() const; - DT GetLastRawDt() const; - void ImPlotPlotLines(const char* label) const; - private: void LogRenderTimeToFile(DT val); - void SetPaused(bool paused); + void HandleRawDt(DT value); void PushFront(DT value); void PopBack(); - bool m_paused = false; int m_on_state_changed_handle; // Name of log file and file stream @@ -54,6 +55,12 @@ private: // Last time Count() was called TimePoint m_last_time; + std::atomic m_is_last_time_sane = false; + + // Push'd from Count() + // and Pop'd from UpdateStats() + Common::SPSCQueue m_raw_dts; + std::atomic
m_last_raw_dt = DT::zero(); // Amount of time to sample dt's over (defaults to config) const std::optional
m_sample_window_duration; @@ -63,12 +70,7 @@ private: std::deque
m_dt_queue; // Average rate/time throughout the window - DT m_dt_avg = DT::zero(); // Uses Moving Average - double m_hz_avg = 0.0; // Uses Moving Average + Euler Average - - // Used to initialize this on demand instead of on every Count() - mutable std::optional
m_dt_std = std::nullopt; - - // Used to enable thread safety with the performance tracker - mutable std::shared_mutex m_mutex; + std::atomic
m_dt_avg = DT::zero(); // Uses Moving Average + std::atomic m_hz_avg = 0.0; // Uses Moving Average + Euler Average + std::atomic
m_dt_std = DT::zero(); };