213 lines
6.5 KiB
C++

// Copyright 2016 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.
#include "VideoBackends/Vulkan/PerfQuery.h"
#include <algorithm>
#include <cstring>
#include <functional>
#include "Common/Assert.h"
#include "Common/Logging/Log.h"
#include "Common/MsgHandler.h"
#include "VideoBackends/Vulkan/CommandBufferManager.h"
#include "VideoBackends/Vulkan/Renderer.h"
#include "VideoBackends/Vulkan/StateTracker.h"
#include "VideoBackends/Vulkan/VulkanContext.h"
namespace Vulkan
{
PerfQuery::PerfQuery() = default;
PerfQuery::~PerfQuery()
{
if (m_query_pool != VK_NULL_HANDLE)
vkDestroyQueryPool(g_vulkan_context->GetDevice(), m_query_pool, nullptr);
}
bool PerfQuery::Initialize()
{
if (!CreateQueryPool())
{
PanicAlert("Failed to create query pool");
return false;
}
return true;
}
void PerfQuery::EnableQuery(PerfQueryGroup type)
{
// Block if there are no free slots.
// Otherwise, try to keep half of them available.
if (m_query_count > m_query_buffer.size() / 2)
PartialFlush(m_query_count == PERF_QUERY_BUFFER_SIZE);
if (type == PQG_ZCOMP_ZCOMPLOC || type == PQG_ZCOMP)
{
ActiveQuery& entry = m_query_buffer[m_query_next_pos];
DEBUG_ASSERT(!entry.has_value);
entry.has_value = true;
// Use precise queries if supported, otherwise boolean (which will be incorrect).
VkQueryControlFlags flags =
g_vulkan_context->SupportsPreciseOcclusionQueries() ? VK_QUERY_CONTROL_PRECISE_BIT : 0;
// Ensure the query starts within a render pass.
StateTracker::GetInstance()->BeginRenderPass();
vkCmdBeginQuery(g_command_buffer_mgr->GetCurrentCommandBuffer(), m_query_pool, m_query_next_pos,
flags);
}
}
void PerfQuery::DisableQuery(PerfQueryGroup type)
{
if (type == PQG_ZCOMP_ZCOMPLOC || type == PQG_ZCOMP)
{
vkCmdEndQuery(g_command_buffer_mgr->GetCurrentCommandBuffer(), m_query_pool, m_query_next_pos);
m_query_next_pos = (m_query_next_pos + 1) % PERF_QUERY_BUFFER_SIZE;
m_query_count++;
}
}
void PerfQuery::ResetQuery()
{
m_query_count = 0;
m_query_readback_pos = 0;
m_query_next_pos = 0;
std::fill_n(m_results, ArraySize(m_results), 0);
// Reset entire query pool, ensuring all queries are ready to write to.
StateTracker::GetInstance()->EndRenderPass();
vkCmdResetQueryPool(g_command_buffer_mgr->GetCurrentCommandBuffer(), m_query_pool, 0,
PERF_QUERY_BUFFER_SIZE);
std::memset(m_query_buffer.data(), 0, sizeof(ActiveQuery) * m_query_buffer.size());
}
u32 PerfQuery::GetQueryResult(PerfQueryType type)
{
u32 result = 0;
if (type == PQ_ZCOMP_INPUT_ZCOMPLOC || type == PQ_ZCOMP_OUTPUT_ZCOMPLOC)
result = m_results[PQG_ZCOMP_ZCOMPLOC];
else if (type == PQ_ZCOMP_INPUT || type == PQ_ZCOMP_OUTPUT)
result = m_results[PQG_ZCOMP];
else if (type == PQ_BLEND_INPUT)
result = m_results[PQG_ZCOMP] + m_results[PQG_ZCOMP_ZCOMPLOC];
else if (type == PQ_EFB_COPY_CLOCKS)
result = m_results[PQG_EFB_COPY_CLOCKS];
return result / 4;
}
void PerfQuery::FlushResults()
{
while (!IsFlushed())
PartialFlush(true);
}
bool PerfQuery::IsFlushed() const
{
return m_query_count == 0;
}
bool PerfQuery::CreateQueryPool()
{
VkQueryPoolCreateInfo info = {
VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO, // VkStructureType sType
nullptr, // const void* pNext
0, // VkQueryPoolCreateFlags flags
VK_QUERY_TYPE_OCCLUSION, // VkQueryType queryType
PERF_QUERY_BUFFER_SIZE, // uint32_t queryCount
0 // VkQueryPipelineStatisticFlags pipelineStatistics;
};
VkResult res = vkCreateQueryPool(g_vulkan_context->GetDevice(), &info, nullptr, &m_query_pool);
if (res != VK_SUCCESS)
{
LOG_VULKAN_ERROR(res, "vkCreateQueryPool failed: ");
return false;
}
return true;
}
void PerfQuery::ReadbackQueries()
{
const u64 completed_fence_counter = g_command_buffer_mgr->GetCompletedFenceCounter();
// Need to save these since ProcessResults will modify them.
const u32 outstanding_queries = m_query_count;
u32 readback_count = 0;
for (u32 i = 0; i < outstanding_queries; i++)
{
u32 index = (m_query_readback_pos + readback_count) % PERF_QUERY_BUFFER_SIZE;
const ActiveQuery& entry = m_query_buffer[index];
if (entry.fence_counter > completed_fence_counter)
break;
// If this wrapped around, we need to flush the entries before the end of the buffer.
if (index < m_query_readback_pos)
{
ReadbackQueries(readback_count);
DEBUG_ASSERT(m_query_readback_pos == 0);
readback_count = 0;
}
readback_count++;
}
if (readback_count > 0)
ReadbackQueries(readback_count);
}
void PerfQuery::ReadbackQueries(u32 query_count)
{
// Should be at maximum query_count queries pending.
ASSERT(query_count <= m_query_count &&
(m_query_readback_pos + query_count) <= PERF_QUERY_BUFFER_SIZE);
// Read back from the GPU.
VkResult res =
vkGetQueryPoolResults(g_vulkan_context->GetDevice(), m_query_pool, m_query_readback_pos,
query_count, query_count * sizeof(PerfQueryDataType),
m_query_result_buffer.data(), sizeof(PerfQueryDataType), 0);
if (res != VK_SUCCESS)
LOG_VULKAN_ERROR(res, "vkGetQueryPoolResults failed: ");
// Remove pending queries.
for (u32 i = 0; i < query_count; i++)
{
u32 index = (m_query_readback_pos + i) % PERF_QUERY_BUFFER_SIZE;
ActiveQuery& entry = m_query_buffer[index];
// Should have a fence associated with it (waiting for a result).
DEBUG_ASSERT(entry.fence_counter != 0);
entry.fence_counter = 0;
entry.has_value = false;
// NOTE: Reported pixel metrics should be referenced to native resolution
m_results[entry.query_type] +=
static_cast<u32>(static_cast<u64>(m_query_result_buffer[i]) * EFB_WIDTH /
g_renderer->GetTargetWidth() * EFB_HEIGHT / g_renderer->GetTargetHeight());
}
m_query_readback_pos = (m_query_readback_pos + query_count) % PERF_QUERY_BUFFER_SIZE;
m_query_count -= query_count;
}
void PerfQuery::PartialFlush(bool blocking)
{
// Submit a command buffer in the background if the front query is not bound to one.
if (blocking || m_query_buffer[m_query_readback_pos].fence_counter ==
g_command_buffer_mgr->GetCurrentFenceCounter())
{
Renderer::GetInstance()->ExecuteCommandBuffer(true, blocking);
}
ReadbackQueries();
}
} // namespace Vulkan