mirror of
https://github.com/dolphin-emu/dolphin.git
synced 2025-01-18 12:01:15 +01:00
e149ad4f0a
SPDX standardizes how source code conveys its copyright and licensing information. See https://spdx.github.io/spdx-spec/1-rationale/ . SPDX tags are adopted in many large projects, including things like the Linux kernel.
230 lines
7.4 KiB
C++
230 lines
7.4 KiB
C++
// Copyright 2016 Dolphin Emulator Project
|
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
|
|
|
#include "VideoBackends/Vulkan/VKPerfQuery.h"
|
|
|
|
#include <algorithm>
|
|
#include <cstring>
|
|
#include <functional>
|
|
|
|
#include "Common/Assert.h"
|
|
#include "Common/Logging/Log.h"
|
|
#include "Common/MsgHandler.h"
|
|
|
|
#include "VideoBackends/Vulkan/CommandBufferManager.h"
|
|
#include "VideoBackends/Vulkan/StateTracker.h"
|
|
#include "VideoBackends/Vulkan/VKRenderer.h"
|
|
#include "VideoBackends/Vulkan/VulkanContext.h"
|
|
#include "VideoCommon/VideoCommon.h"
|
|
|
|
namespace Vulkan
|
|
{
|
|
PerfQuery::PerfQuery() = default;
|
|
|
|
PerfQuery::~PerfQuery()
|
|
{
|
|
if (m_query_pool != VK_NULL_HANDLE)
|
|
vkDestroyQueryPool(g_vulkan_context->GetDevice(), m_query_pool, nullptr);
|
|
}
|
|
|
|
bool PerfQuery::Initialize()
|
|
{
|
|
if (!CreateQueryPool())
|
|
{
|
|
PanicAlertFmt("Failed to create query pool");
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
void PerfQuery::EnableQuery(PerfQueryGroup type)
|
|
{
|
|
// Block if there are no free slots.
|
|
// Otherwise, try to keep half of them available.
|
|
const u32 query_count = m_query_count.load(std::memory_order_relaxed);
|
|
if (query_count > m_query_buffer.size() / 2)
|
|
PartialFlush(query_count == PERF_QUERY_BUFFER_SIZE);
|
|
|
|
// Ensure command buffer is ready to go before beginning the query, that way we don't submit
|
|
// a buffer with open queries.
|
|
StateTracker::GetInstance()->Bind();
|
|
|
|
if (type == PQG_ZCOMP_ZCOMPLOC || type == PQG_ZCOMP)
|
|
{
|
|
ActiveQuery& entry = m_query_buffer[m_query_next_pos];
|
|
DEBUG_ASSERT(!entry.has_value);
|
|
entry.has_value = true;
|
|
|
|
// Use precise queries if supported, otherwise boolean (which will be incorrect).
|
|
VkQueryControlFlags flags =
|
|
g_vulkan_context->SupportsPreciseOcclusionQueries() ? VK_QUERY_CONTROL_PRECISE_BIT : 0;
|
|
|
|
// Ensure the query starts within a render pass.
|
|
StateTracker::GetInstance()->BeginRenderPass();
|
|
vkCmdBeginQuery(g_command_buffer_mgr->GetCurrentCommandBuffer(), m_query_pool, m_query_next_pos,
|
|
flags);
|
|
}
|
|
}
|
|
|
|
void PerfQuery::DisableQuery(PerfQueryGroup type)
|
|
{
|
|
if (type == PQG_ZCOMP_ZCOMPLOC || type == PQG_ZCOMP)
|
|
{
|
|
vkCmdEndQuery(g_command_buffer_mgr->GetCurrentCommandBuffer(), m_query_pool, m_query_next_pos);
|
|
m_query_next_pos = (m_query_next_pos + 1) % PERF_QUERY_BUFFER_SIZE;
|
|
m_query_count.fetch_add(1, std::memory_order_relaxed);
|
|
}
|
|
}
|
|
|
|
void PerfQuery::ResetQuery()
|
|
{
|
|
m_query_count.store(0, std::memory_order_relaxed);
|
|
m_query_readback_pos = 0;
|
|
m_query_next_pos = 0;
|
|
for (size_t i = 0; i < m_results.size(); ++i)
|
|
m_results[i].store(0, std::memory_order_relaxed);
|
|
|
|
// Reset entire query pool, ensuring all queries are ready to write to.
|
|
StateTracker::GetInstance()->EndRenderPass();
|
|
vkCmdResetQueryPool(g_command_buffer_mgr->GetCurrentCommandBuffer(), m_query_pool, 0,
|
|
PERF_QUERY_BUFFER_SIZE);
|
|
|
|
std::memset(m_query_buffer.data(), 0, sizeof(ActiveQuery) * m_query_buffer.size());
|
|
}
|
|
|
|
u32 PerfQuery::GetQueryResult(PerfQueryType type)
|
|
{
|
|
u32 result = 0;
|
|
if (type == PQ_ZCOMP_INPUT_ZCOMPLOC || type == PQ_ZCOMP_OUTPUT_ZCOMPLOC)
|
|
{
|
|
result = m_results[PQG_ZCOMP_ZCOMPLOC].load(std::memory_order_relaxed);
|
|
}
|
|
else if (type == PQ_ZCOMP_INPUT || type == PQ_ZCOMP_OUTPUT)
|
|
{
|
|
result = m_results[PQG_ZCOMP].load(std::memory_order_relaxed);
|
|
}
|
|
else if (type == PQ_BLEND_INPUT)
|
|
{
|
|
result = m_results[PQG_ZCOMP].load(std::memory_order_relaxed) +
|
|
m_results[PQG_ZCOMP_ZCOMPLOC].load(std::memory_order_relaxed);
|
|
}
|
|
else if (type == PQ_EFB_COPY_CLOCKS)
|
|
{
|
|
result = m_results[PQG_EFB_COPY_CLOCKS].load(std::memory_order_relaxed);
|
|
}
|
|
|
|
return result / 4;
|
|
}
|
|
|
|
void PerfQuery::FlushResults()
|
|
{
|
|
while (!IsFlushed())
|
|
PartialFlush(true);
|
|
}
|
|
|
|
bool PerfQuery::IsFlushed() const
|
|
{
|
|
return m_query_count.load(std::memory_order_relaxed) == 0;
|
|
}
|
|
|
|
bool PerfQuery::CreateQueryPool()
|
|
{
|
|
VkQueryPoolCreateInfo info = {
|
|
VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO, // VkStructureType sType
|
|
nullptr, // const void* pNext
|
|
0, // VkQueryPoolCreateFlags flags
|
|
VK_QUERY_TYPE_OCCLUSION, // VkQueryType queryType
|
|
PERF_QUERY_BUFFER_SIZE, // uint32_t queryCount
|
|
0 // VkQueryPipelineStatisticFlags pipelineStatistics;
|
|
};
|
|
|
|
VkResult res = vkCreateQueryPool(g_vulkan_context->GetDevice(), &info, nullptr, &m_query_pool);
|
|
if (res != VK_SUCCESS)
|
|
{
|
|
LOG_VULKAN_ERROR(res, "vkCreateQueryPool failed: ");
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
void PerfQuery::ReadbackQueries()
|
|
{
|
|
const u64 completed_fence_counter = g_command_buffer_mgr->GetCompletedFenceCounter();
|
|
|
|
// Need to save these since ProcessResults will modify them.
|
|
const u32 outstanding_queries = m_query_count.load(std::memory_order_relaxed);
|
|
u32 readback_count = 0;
|
|
for (u32 i = 0; i < outstanding_queries; i++)
|
|
{
|
|
u32 index = (m_query_readback_pos + readback_count) % PERF_QUERY_BUFFER_SIZE;
|
|
const ActiveQuery& entry = m_query_buffer[index];
|
|
if (entry.fence_counter > completed_fence_counter)
|
|
break;
|
|
|
|
// If this wrapped around, we need to flush the entries before the end of the buffer.
|
|
if (index < m_query_readback_pos)
|
|
{
|
|
ReadbackQueries(readback_count);
|
|
DEBUG_ASSERT(m_query_readback_pos == 0);
|
|
readback_count = 0;
|
|
}
|
|
|
|
readback_count++;
|
|
}
|
|
|
|
if (readback_count > 0)
|
|
ReadbackQueries(readback_count);
|
|
}
|
|
|
|
void PerfQuery::ReadbackQueries(u32 query_count)
|
|
{
|
|
// Should be at maximum query_count queries pending.
|
|
ASSERT(query_count <= m_query_count.load(std::memory_order_relaxed) &&
|
|
(m_query_readback_pos + query_count) <= PERF_QUERY_BUFFER_SIZE);
|
|
|
|
// Read back from the GPU.
|
|
VkResult res =
|
|
vkGetQueryPoolResults(g_vulkan_context->GetDevice(), m_query_pool, m_query_readback_pos,
|
|
query_count, query_count * sizeof(PerfQueryDataType),
|
|
m_query_result_buffer.data(), sizeof(PerfQueryDataType), 0);
|
|
if (res != VK_SUCCESS)
|
|
LOG_VULKAN_ERROR(res, "vkGetQueryPoolResults failed: ");
|
|
|
|
// Remove pending queries.
|
|
for (u32 i = 0; i < query_count; i++)
|
|
{
|
|
u32 index = (m_query_readback_pos + i) % PERF_QUERY_BUFFER_SIZE;
|
|
ActiveQuery& entry = m_query_buffer[index];
|
|
|
|
// Should have a fence associated with it (waiting for a result).
|
|
DEBUG_ASSERT(entry.fence_counter != 0);
|
|
entry.fence_counter = 0;
|
|
entry.has_value = false;
|
|
|
|
// NOTE: Reported pixel metrics should be referenced to native resolution
|
|
const u64 native_res_result = static_cast<u64>(m_query_result_buffer[i]) * EFB_WIDTH /
|
|
g_renderer->GetTargetWidth() * EFB_HEIGHT /
|
|
g_renderer->GetTargetHeight();
|
|
m_results[entry.query_type].fetch_add(static_cast<u32>(native_res_result),
|
|
std::memory_order_relaxed);
|
|
}
|
|
|
|
m_query_readback_pos = (m_query_readback_pos + query_count) % PERF_QUERY_BUFFER_SIZE;
|
|
m_query_count.fetch_sub(query_count, std::memory_order_relaxed);
|
|
}
|
|
|
|
void PerfQuery::PartialFlush(bool blocking)
|
|
{
|
|
// Submit a command buffer in the background if the front query is not bound to one.
|
|
if (blocking || m_query_buffer[m_query_readback_pos].fence_counter ==
|
|
g_command_buffer_mgr->GetCurrentFenceCounter())
|
|
{
|
|
Renderer::GetInstance()->ExecuteCommandBuffer(true, blocking);
|
|
}
|
|
|
|
ReadbackQueries();
|
|
}
|
|
} // namespace Vulkan
|