mirror of
https://github.com/dolphin-emu/dolphin.git
synced 2025-01-14 10:09:13 +01:00
c08de82e90
Turns out it was helpful. (Most improvement in ubershaders.) This time with much better auto mode.
92 lines
2.5 KiB
Plaintext
92 lines
2.5 KiB
Plaintext
// Copyright 2022 Dolphin Emulator Project
|
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
|
|
|
#include "VideoBackends/Metal/MTLPerfQuery.h"
|
|
|
|
#include "VideoBackends/Metal/MTLStateTracker.h"
|
|
|
|
void Metal::PerfQuery::EnableQuery(PerfQueryGroup type)
|
|
{
|
|
if (type == PQG_ZCOMP_ZCOMPLOC || type == PQG_ZCOMP)
|
|
g_state_tracker->EnablePerfQuery(type, m_current_query);
|
|
}
|
|
|
|
void Metal::PerfQuery::DisableQuery(PerfQueryGroup type)
|
|
{
|
|
if (type == PQG_ZCOMP_ZCOMPLOC || type == PQG_ZCOMP)
|
|
g_state_tracker->DisablePerfQuery();
|
|
}
|
|
|
|
void Metal::PerfQuery::ResetQuery()
|
|
{
|
|
std::lock_guard<std::mutex> lock(m_results_mtx);
|
|
m_current_query++;
|
|
for (std::atomic<u32>& result : m_results)
|
|
result.store(0, std::memory_order_relaxed);
|
|
}
|
|
|
|
u32 Metal::PerfQuery::GetQueryResult(PerfQueryType type)
|
|
{
|
|
u32 result = 0;
|
|
if (type == PQ_ZCOMP_INPUT_ZCOMPLOC || type == PQ_ZCOMP_OUTPUT_ZCOMPLOC)
|
|
{
|
|
result = m_results[PQG_ZCOMP_ZCOMPLOC].load(std::memory_order_relaxed);
|
|
}
|
|
else if (type == PQ_ZCOMP_INPUT || type == PQ_ZCOMP_OUTPUT)
|
|
{
|
|
result = m_results[PQG_ZCOMP].load(std::memory_order_relaxed);
|
|
}
|
|
else if (type == PQ_BLEND_INPUT)
|
|
{
|
|
result = m_results[PQG_ZCOMP].load(std::memory_order_relaxed) +
|
|
m_results[PQG_ZCOMP_ZCOMPLOC].load(std::memory_order_relaxed);
|
|
}
|
|
else if (type == PQ_EFB_COPY_CLOCKS)
|
|
{
|
|
result = m_results[PQG_EFB_COPY_CLOCKS].load(std::memory_order_relaxed);
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
void Metal::PerfQuery::FlushResults()
|
|
{
|
|
if (IsFlushed())
|
|
return;
|
|
|
|
// There's a possibility that some active performance queries are unflushed
|
|
g_state_tracker->FlushEncoders();
|
|
g_state_tracker->NotifyOfCPUGPUSync();
|
|
|
|
std::unique_lock<std::mutex> lock(m_results_mtx);
|
|
while (!IsFlushed())
|
|
m_cv.wait(lock);
|
|
}
|
|
|
|
bool Metal::PerfQuery::IsFlushed() const
|
|
{
|
|
return m_query_count.load(std::memory_order_acquire) == 0;
|
|
}
|
|
|
|
void Metal::PerfQuery::ReturnResults(const u64* data, const PerfQueryGroup* groups, size_t count,
|
|
u32 query_id)
|
|
{
|
|
{
|
|
std::lock_guard<std::mutex> lock(m_results_mtx);
|
|
if (m_current_query == query_id)
|
|
{
|
|
for (size_t i = 0; i < count; ++i)
|
|
{
|
|
u64 native_res_result = data[i] * (EFB_WIDTH * EFB_HEIGHT) /
|
|
(g_renderer->GetTargetWidth() * g_renderer->GetTargetHeight());
|
|
|
|
native_res_result /= g_ActiveConfig.iMultisamples;
|
|
|
|
m_results[groups[i]].fetch_add(native_res_result, std::memory_order_relaxed);
|
|
}
|
|
}
|
|
m_query_count.fetch_sub(1, std::memory_order_release);
|
|
}
|
|
m_cv.notify_one();
|
|
}
|