mirror of
https://github.com/dolphin-emu/dolphin.git
synced 2025-01-24 15:01:16 +01:00
Merge pull request #11286 from K0bin/vk-query-fix
VideoBackends: Query fixes and cleanups
This commit is contained in:
commit
fb8aa9744e
@ -24,7 +24,7 @@ PerfQuery::PerfQuery() : m_query_read_pos()
|
|||||||
|
|
||||||
PerfQuery::~PerfQuery() = default;
|
PerfQuery::~PerfQuery() = default;
|
||||||
|
|
||||||
void PerfQuery::EnableQuery(PerfQueryGroup type)
|
void PerfQuery::EnableQuery(PerfQueryGroup group)
|
||||||
{
|
{
|
||||||
u32 query_count = m_query_count.load(std::memory_order_relaxed);
|
u32 query_count = m_query_count.load(std::memory_order_relaxed);
|
||||||
|
|
||||||
@ -44,21 +44,21 @@ void PerfQuery::EnableQuery(PerfQueryGroup type)
|
|||||||
}
|
}
|
||||||
|
|
||||||
// start query
|
// start query
|
||||||
if (type == PQG_ZCOMP_ZCOMPLOC || type == PQG_ZCOMP)
|
if (group == PQG_ZCOMP_ZCOMPLOC || group == PQG_ZCOMP)
|
||||||
{
|
{
|
||||||
auto& entry = m_query_buffer[(m_query_read_pos + query_count) % m_query_buffer.size()];
|
auto& entry = m_query_buffer[(m_query_read_pos + query_count) % m_query_buffer.size()];
|
||||||
|
|
||||||
D3D::context->Begin(entry.query.Get());
|
D3D::context->Begin(entry.query.Get());
|
||||||
entry.query_type = type;
|
entry.query_group = group;
|
||||||
|
|
||||||
m_query_count.fetch_add(1, std::memory_order_relaxed);
|
m_query_count.fetch_add(1, std::memory_order_relaxed);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void PerfQuery::DisableQuery(PerfQueryGroup type)
|
void PerfQuery::DisableQuery(PerfQueryGroup group)
|
||||||
{
|
{
|
||||||
// stop query
|
// stop query
|
||||||
if (type == PQG_ZCOMP_ZCOMPLOC || type == PQG_ZCOMP)
|
if (group == PQG_ZCOMP_ZCOMPLOC || group == PQG_ZCOMP)
|
||||||
{
|
{
|
||||||
auto& entry = m_query_buffer[(m_query_read_pos + m_query_count.load(std::memory_order_relaxed) +
|
auto& entry = m_query_buffer[(m_query_read_pos + m_query_count.load(std::memory_order_relaxed) +
|
||||||
m_query_buffer.size() - 1) %
|
m_query_buffer.size() - 1) %
|
||||||
@ -116,8 +116,8 @@ void PerfQuery::FlushOne()
|
|||||||
// hardware behavior when drawing triangles.
|
// hardware behavior when drawing triangles.
|
||||||
const u64 native_res_result = result * EFB_WIDTH / g_renderer->GetTargetWidth() * EFB_HEIGHT /
|
const u64 native_res_result = result * EFB_WIDTH / g_renderer->GetTargetWidth() * EFB_HEIGHT /
|
||||||
g_renderer->GetTargetHeight();
|
g_renderer->GetTargetHeight();
|
||||||
m_results[entry.query_type].fetch_add(static_cast<u32>(native_res_result),
|
m_results[entry.query_group].fetch_add(static_cast<u32>(native_res_result),
|
||||||
std::memory_order_relaxed);
|
std::memory_order_relaxed);
|
||||||
|
|
||||||
m_query_read_pos = (m_query_read_pos + 1) % m_query_buffer.size();
|
m_query_read_pos = (m_query_read_pos + 1) % m_query_buffer.size();
|
||||||
m_query_count.fetch_sub(1, std::memory_order_relaxed);
|
m_query_count.fetch_sub(1, std::memory_order_relaxed);
|
||||||
@ -145,8 +145,8 @@ void PerfQuery::WeakFlush()
|
|||||||
// NOTE: Reported pixel metrics should be referenced to native resolution
|
// NOTE: Reported pixel metrics should be referenced to native resolution
|
||||||
const u64 native_res_result = result * EFB_WIDTH / g_renderer->GetTargetWidth() * EFB_HEIGHT /
|
const u64 native_res_result = result * EFB_WIDTH / g_renderer->GetTargetWidth() * EFB_HEIGHT /
|
||||||
g_renderer->GetTargetHeight();
|
g_renderer->GetTargetHeight();
|
||||||
m_results[entry.query_type].store(static_cast<u32>(native_res_result),
|
m_results[entry.query_group].store(static_cast<u32>(native_res_result),
|
||||||
std::memory_order_relaxed);
|
std::memory_order_relaxed);
|
||||||
|
|
||||||
m_query_read_pos = (m_query_read_pos + 1) % m_query_buffer.size();
|
m_query_read_pos = (m_query_read_pos + 1) % m_query_buffer.size();
|
||||||
m_query_count.fetch_sub(1, std::memory_order_relaxed);
|
m_query_count.fetch_sub(1, std::memory_order_relaxed);
|
||||||
|
@ -15,8 +15,8 @@ public:
|
|||||||
PerfQuery();
|
PerfQuery();
|
||||||
~PerfQuery();
|
~PerfQuery();
|
||||||
|
|
||||||
void EnableQuery(PerfQueryGroup type) override;
|
void EnableQuery(PerfQueryGroup group) override;
|
||||||
void DisableQuery(PerfQueryGroup type) override;
|
void DisableQuery(PerfQueryGroup group) override;
|
||||||
void ResetQuery() override;
|
void ResetQuery() override;
|
||||||
u32 GetQueryResult(PerfQueryType type) override;
|
u32 GetQueryResult(PerfQueryType type) override;
|
||||||
void FlushResults() override;
|
void FlushResults() override;
|
||||||
@ -26,7 +26,7 @@ private:
|
|||||||
struct ActiveQuery
|
struct ActiveQuery
|
||||||
{
|
{
|
||||||
ComPtr<ID3D11Query> query;
|
ComPtr<ID3D11Query> query;
|
||||||
PerfQueryGroup query_type{};
|
PerfQueryGroup query_group{};
|
||||||
};
|
};
|
||||||
|
|
||||||
void WeakFlush();
|
void WeakFlush();
|
||||||
|
@ -48,7 +48,7 @@ bool PerfQuery::Initialize()
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
void PerfQuery::EnableQuery(PerfQueryGroup type)
|
void PerfQuery::EnableQuery(PerfQueryGroup group)
|
||||||
{
|
{
|
||||||
// Block if there are no free slots.
|
// Block if there are no free slots.
|
||||||
// Otherwise, try to keep half of them available.
|
// Otherwise, try to keep half of them available.
|
||||||
@ -66,20 +66,21 @@ void PerfQuery::EnableQuery(PerfQueryGroup type)
|
|||||||
// this assumes that the caller has bound all required state prior to enabling the query.
|
// this assumes that the caller has bound all required state prior to enabling the query.
|
||||||
Renderer::GetInstance()->ApplyState();
|
Renderer::GetInstance()->ApplyState();
|
||||||
|
|
||||||
if (type == PQG_ZCOMP_ZCOMPLOC || type == PQG_ZCOMP)
|
if (group == PQG_ZCOMP_ZCOMPLOC || group == PQG_ZCOMP)
|
||||||
{
|
{
|
||||||
ActiveQuery& entry = m_query_buffer[m_query_next_pos];
|
ActiveQuery& entry = m_query_buffer[m_query_next_pos];
|
||||||
ASSERT(!entry.has_value && !entry.resolved);
|
ASSERT(!entry.has_value && !entry.resolved);
|
||||||
entry.has_value = true;
|
entry.has_value = true;
|
||||||
|
entry.query_group = group;
|
||||||
|
|
||||||
g_dx_context->GetCommandList()->BeginQuery(m_query_heap.Get(), D3D12_QUERY_TYPE_OCCLUSION,
|
g_dx_context->GetCommandList()->BeginQuery(m_query_heap.Get(), D3D12_QUERY_TYPE_OCCLUSION,
|
||||||
m_query_next_pos);
|
m_query_next_pos);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void PerfQuery::DisableQuery(PerfQueryGroup type)
|
void PerfQuery::DisableQuery(PerfQueryGroup group)
|
||||||
{
|
{
|
||||||
if (type == PQG_ZCOMP_ZCOMPLOC || type == PQG_ZCOMP)
|
if (group == PQG_ZCOMP_ZCOMPLOC || group == PQG_ZCOMP)
|
||||||
{
|
{
|
||||||
g_dx_context->GetCommandList()->EndQuery(m_query_heap.Get(), D3D12_QUERY_TYPE_OCCLUSION,
|
g_dx_context->GetCommandList()->EndQuery(m_query_heap.Get(), D3D12_QUERY_TYPE_OCCLUSION,
|
||||||
m_query_next_pos);
|
m_query_next_pos);
|
||||||
@ -245,8 +246,8 @@ void PerfQuery::AccumulateQueriesFromBuffer(u32 query_count)
|
|||||||
const u64 native_res_result = static_cast<u64>(result) * EFB_WIDTH /
|
const u64 native_res_result = static_cast<u64>(result) * EFB_WIDTH /
|
||||||
g_renderer->GetTargetWidth() * EFB_HEIGHT /
|
g_renderer->GetTargetWidth() * EFB_HEIGHT /
|
||||||
g_renderer->GetTargetHeight();
|
g_renderer->GetTargetHeight();
|
||||||
m_results[entry.query_type].fetch_add(static_cast<u32>(native_res_result),
|
m_results[entry.query_group].fetch_add(static_cast<u32>(native_res_result),
|
||||||
std::memory_order_relaxed);
|
std::memory_order_relaxed);
|
||||||
}
|
}
|
||||||
|
|
||||||
constexpr D3D12_RANGE write_range = {0, 0};
|
constexpr D3D12_RANGE write_range = {0, 0};
|
||||||
|
@ -20,8 +20,8 @@ public:
|
|||||||
bool Initialize();
|
bool Initialize();
|
||||||
void ResolveQueries();
|
void ResolveQueries();
|
||||||
|
|
||||||
void EnableQuery(PerfQueryGroup type) override;
|
void EnableQuery(PerfQueryGroup group) override;
|
||||||
void DisableQuery(PerfQueryGroup type) override;
|
void DisableQuery(PerfQueryGroup group) override;
|
||||||
void ResetQuery() override;
|
void ResetQuery() override;
|
||||||
u32 GetQueryResult(PerfQueryType type) override;
|
u32 GetQueryResult(PerfQueryType type) override;
|
||||||
void FlushResults() override;
|
void FlushResults() override;
|
||||||
@ -31,7 +31,7 @@ private:
|
|||||||
struct ActiveQuery
|
struct ActiveQuery
|
||||||
{
|
{
|
||||||
u64 fence_value;
|
u64 fence_value;
|
||||||
PerfQueryType query_type;
|
PerfQueryGroup query_group;
|
||||||
bool has_value;
|
bool has_value;
|
||||||
bool resolved;
|
bool resolved;
|
||||||
};
|
};
|
||||||
|
@ -30,14 +30,14 @@ PerfQuery::PerfQuery() : m_query_read_pos()
|
|||||||
ResetQuery();
|
ResetQuery();
|
||||||
}
|
}
|
||||||
|
|
||||||
void PerfQuery::EnableQuery(PerfQueryGroup type)
|
void PerfQuery::EnableQuery(PerfQueryGroup group)
|
||||||
{
|
{
|
||||||
m_query->EnableQuery(type);
|
m_query->EnableQuery(group);
|
||||||
}
|
}
|
||||||
|
|
||||||
void PerfQuery::DisableQuery(PerfQueryGroup type)
|
void PerfQuery::DisableQuery(PerfQueryGroup group)
|
||||||
{
|
{
|
||||||
m_query->DisableQuery(type);
|
m_query->DisableQuery(group);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool PerfQuery::IsFlushed() const
|
bool PerfQuery::IsFlushed() const
|
||||||
@ -96,7 +96,7 @@ PerfQueryGL::~PerfQueryGL()
|
|||||||
glDeleteQueries(1, &query.query_id);
|
glDeleteQueries(1, &query.query_id);
|
||||||
}
|
}
|
||||||
|
|
||||||
void PerfQueryGL::EnableQuery(PerfQueryGroup type)
|
void PerfQueryGL::EnableQuery(PerfQueryGroup group)
|
||||||
{
|
{
|
||||||
u32 query_count = m_query_count.load(std::memory_order_relaxed);
|
u32 query_count = m_query_count.load(std::memory_order_relaxed);
|
||||||
|
|
||||||
@ -115,20 +115,20 @@ void PerfQueryGL::EnableQuery(PerfQueryGroup type)
|
|||||||
}
|
}
|
||||||
|
|
||||||
// start query
|
// start query
|
||||||
if (type == PQG_ZCOMP_ZCOMPLOC || type == PQG_ZCOMP)
|
if (group == PQG_ZCOMP_ZCOMPLOC || group == PQG_ZCOMP)
|
||||||
{
|
{
|
||||||
auto& entry = m_query_buffer[(m_query_read_pos + query_count) % m_query_buffer.size()];
|
auto& entry = m_query_buffer[(m_query_read_pos + query_count) % m_query_buffer.size()];
|
||||||
|
|
||||||
glBeginQuery(m_query_type, entry.query_id);
|
glBeginQuery(m_query_type, entry.query_id);
|
||||||
entry.query_type = type;
|
entry.query_group = group;
|
||||||
|
|
||||||
m_query_count.fetch_add(1, std::memory_order_relaxed);
|
m_query_count.fetch_add(1, std::memory_order_relaxed);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
void PerfQueryGL::DisableQuery(PerfQueryGroup type)
|
void PerfQueryGL::DisableQuery(PerfQueryGroup group)
|
||||||
{
|
{
|
||||||
// stop query
|
// stop query
|
||||||
if (type == PQG_ZCOMP_ZCOMPLOC || type == PQG_ZCOMP)
|
if (group == PQG_ZCOMP_ZCOMPLOC || group == PQG_ZCOMP)
|
||||||
{
|
{
|
||||||
glEndQuery(m_query_type);
|
glEndQuery(m_query_type);
|
||||||
}
|
}
|
||||||
@ -171,7 +171,7 @@ void PerfQueryGL::FlushOne()
|
|||||||
if (g_ActiveConfig.iMultisamples > 1)
|
if (g_ActiveConfig.iMultisamples > 1)
|
||||||
result /= g_ActiveConfig.iMultisamples;
|
result /= g_ActiveConfig.iMultisamples;
|
||||||
|
|
||||||
m_results[entry.query_type].fetch_add(result, std::memory_order_relaxed);
|
m_results[entry.query_group].fetch_add(result, std::memory_order_relaxed);
|
||||||
|
|
||||||
m_query_read_pos = (m_query_read_pos + 1) % m_query_buffer.size();
|
m_query_read_pos = (m_query_read_pos + 1) % m_query_buffer.size();
|
||||||
m_query_count.fetch_sub(1, std::memory_order_relaxed);
|
m_query_count.fetch_sub(1, std::memory_order_relaxed);
|
||||||
@ -196,7 +196,7 @@ PerfQueryGLESNV::~PerfQueryGLESNV()
|
|||||||
glDeleteOcclusionQueriesNV(1, &query.query_id);
|
glDeleteOcclusionQueriesNV(1, &query.query_id);
|
||||||
}
|
}
|
||||||
|
|
||||||
void PerfQueryGLESNV::EnableQuery(PerfQueryGroup type)
|
void PerfQueryGLESNV::EnableQuery(PerfQueryGroup group)
|
||||||
{
|
{
|
||||||
u32 query_count = m_query_count.load(std::memory_order_relaxed);
|
u32 query_count = m_query_count.load(std::memory_order_relaxed);
|
||||||
|
|
||||||
@ -215,20 +215,20 @@ void PerfQueryGLESNV::EnableQuery(PerfQueryGroup type)
|
|||||||
}
|
}
|
||||||
|
|
||||||
// start query
|
// start query
|
||||||
if (type == PQG_ZCOMP_ZCOMPLOC || type == PQG_ZCOMP)
|
if (group == PQG_ZCOMP_ZCOMPLOC || group == PQG_ZCOMP)
|
||||||
{
|
{
|
||||||
auto& entry = m_query_buffer[(m_query_read_pos + query_count) % m_query_buffer.size()];
|
auto& entry = m_query_buffer[(m_query_read_pos + query_count) % m_query_buffer.size()];
|
||||||
|
|
||||||
glBeginOcclusionQueryNV(entry.query_id);
|
glBeginOcclusionQueryNV(entry.query_id);
|
||||||
entry.query_type = type;
|
entry.query_group = group;
|
||||||
|
|
||||||
m_query_count.fetch_add(1, std::memory_order_relaxed);
|
m_query_count.fetch_add(1, std::memory_order_relaxed);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
void PerfQueryGLESNV::DisableQuery(PerfQueryGroup type)
|
void PerfQueryGLESNV::DisableQuery(PerfQueryGroup group)
|
||||||
{
|
{
|
||||||
// stop query
|
// stop query
|
||||||
if (type == PQG_ZCOMP_ZCOMPLOC || type == PQG_ZCOMP)
|
if (group == PQG_ZCOMP_ZCOMPLOC || group == PQG_ZCOMP)
|
||||||
{
|
{
|
||||||
glEndOcclusionQueryNV();
|
glEndOcclusionQueryNV();
|
||||||
}
|
}
|
||||||
@ -266,8 +266,8 @@ void PerfQueryGLESNV::FlushOne()
|
|||||||
// hardware behavior when drawing triangles.
|
// hardware behavior when drawing triangles.
|
||||||
const u64 native_res_result = static_cast<u64>(result) * EFB_WIDTH * EFB_HEIGHT /
|
const u64 native_res_result = static_cast<u64>(result) * EFB_WIDTH * EFB_HEIGHT /
|
||||||
(g_renderer->GetTargetWidth() * g_renderer->GetTargetHeight());
|
(g_renderer->GetTargetWidth() * g_renderer->GetTargetHeight());
|
||||||
m_results[entry.query_type].fetch_add(static_cast<u32>(native_res_result),
|
m_results[entry.query_group].fetch_add(static_cast<u32>(native_res_result),
|
||||||
std::memory_order_relaxed);
|
std::memory_order_relaxed);
|
||||||
|
|
||||||
m_query_read_pos = (m_query_read_pos + 1) % m_query_buffer.size();
|
m_query_read_pos = (m_query_read_pos + 1) % m_query_buffer.size();
|
||||||
m_query_count.fetch_sub(1, std::memory_order_relaxed);
|
m_query_count.fetch_sub(1, std::memory_order_relaxed);
|
||||||
|
@ -19,8 +19,8 @@ class PerfQuery : public PerfQueryBase
|
|||||||
public:
|
public:
|
||||||
PerfQuery();
|
PerfQuery();
|
||||||
~PerfQuery() {}
|
~PerfQuery() {}
|
||||||
void EnableQuery(PerfQueryGroup type) override;
|
void EnableQuery(PerfQueryGroup group) override;
|
||||||
void DisableQuery(PerfQueryGroup type) override;
|
void DisableQuery(PerfQueryGroup group) override;
|
||||||
void ResetQuery() override;
|
void ResetQuery() override;
|
||||||
u32 GetQueryResult(PerfQueryType type) override;
|
u32 GetQueryResult(PerfQueryType type) override;
|
||||||
void FlushResults() override;
|
void FlushResults() override;
|
||||||
@ -30,7 +30,7 @@ protected:
|
|||||||
struct ActiveQuery
|
struct ActiveQuery
|
||||||
{
|
{
|
||||||
GLuint query_id;
|
GLuint query_id;
|
||||||
PerfQueryGroup query_type;
|
PerfQueryGroup query_group;
|
||||||
};
|
};
|
||||||
|
|
||||||
// when testing in SMS: 64 was too small, 128 was ok
|
// when testing in SMS: 64 was too small, 128 was ok
|
||||||
@ -52,8 +52,8 @@ public:
|
|||||||
PerfQueryGL(GLenum query_type);
|
PerfQueryGL(GLenum query_type);
|
||||||
~PerfQueryGL();
|
~PerfQueryGL();
|
||||||
|
|
||||||
void EnableQuery(PerfQueryGroup type) override;
|
void EnableQuery(PerfQueryGroup group) override;
|
||||||
void DisableQuery(PerfQueryGroup type) override;
|
void DisableQuery(PerfQueryGroup group) override;
|
||||||
void FlushResults() override;
|
void FlushResults() override;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
@ -70,8 +70,8 @@ public:
|
|||||||
PerfQueryGLESNV();
|
PerfQueryGLESNV();
|
||||||
~PerfQueryGLESNV();
|
~PerfQueryGLESNV();
|
||||||
|
|
||||||
void EnableQuery(PerfQueryGroup type) override;
|
void EnableQuery(PerfQueryGroup group) override;
|
||||||
void DisableQuery(PerfQueryGroup type) override;
|
void DisableQuery(PerfQueryGroup group) override;
|
||||||
void FlushResults() override;
|
void FlushResults() override;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
@ -73,11 +73,6 @@ void StagingBuffer::InvalidateGPUCache(VkCommandBuffer command_buffer,
|
|||||||
VkPipelineStageFlagBits dest_pipeline_stage,
|
VkPipelineStageFlagBits dest_pipeline_stage,
|
||||||
VkDeviceSize offset, VkDeviceSize size)
|
VkDeviceSize offset, VkDeviceSize size)
|
||||||
{
|
{
|
||||||
VkMemoryPropertyFlags flags = 0;
|
|
||||||
vmaGetAllocationMemoryProperties(g_vulkan_context->GetMemoryAllocator(), m_alloc, &flags);
|
|
||||||
if (flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) [[likely]]
|
|
||||||
return;
|
|
||||||
|
|
||||||
ASSERT((offset + size) <= m_size || (offset < m_size && size == VK_WHOLE_SIZE));
|
ASSERT((offset + size) <= m_size || (offset < m_size && size == VK_WHOLE_SIZE));
|
||||||
BufferMemoryBarrier(command_buffer, m_buffer, VK_ACCESS_HOST_WRITE_BIT, dest_access_flags, offset,
|
BufferMemoryBarrier(command_buffer, m_buffer, VK_ACCESS_HOST_WRITE_BIT, dest_access_flags, offset,
|
||||||
size, VK_PIPELINE_STAGE_HOST_BIT, dest_pipeline_stage);
|
size, VK_PIPELINE_STAGE_HOST_BIT, dest_pipeline_stage);
|
||||||
@ -88,25 +83,15 @@ void StagingBuffer::PrepareForGPUWrite(VkCommandBuffer command_buffer,
|
|||||||
VkPipelineStageFlagBits dst_pipeline_stage,
|
VkPipelineStageFlagBits dst_pipeline_stage,
|
||||||
VkDeviceSize offset, VkDeviceSize size)
|
VkDeviceSize offset, VkDeviceSize size)
|
||||||
{
|
{
|
||||||
VkMemoryPropertyFlags flags = 0;
|
|
||||||
vmaGetAllocationMemoryProperties(g_vulkan_context->GetMemoryAllocator(), m_alloc, &flags);
|
|
||||||
if (flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) [[likely]]
|
|
||||||
return;
|
|
||||||
|
|
||||||
ASSERT((offset + size) <= m_size || (offset < m_size && size == VK_WHOLE_SIZE));
|
ASSERT((offset + size) <= m_size || (offset < m_size && size == VK_WHOLE_SIZE));
|
||||||
BufferMemoryBarrier(command_buffer, m_buffer, 0, dst_access_flags, offset, size,
|
BufferMemoryBarrier(command_buffer, m_buffer, VK_ACCESS_MEMORY_WRITE_BIT, dst_access_flags,
|
||||||
VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, dst_pipeline_stage);
|
offset, size, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, dst_pipeline_stage);
|
||||||
}
|
}
|
||||||
|
|
||||||
void StagingBuffer::FlushGPUCache(VkCommandBuffer command_buffer, VkAccessFlagBits src_access_flags,
|
void StagingBuffer::FlushGPUCache(VkCommandBuffer command_buffer, VkAccessFlagBits src_access_flags,
|
||||||
VkPipelineStageFlagBits src_pipeline_stage, VkDeviceSize offset,
|
VkPipelineStageFlagBits src_pipeline_stage, VkDeviceSize offset,
|
||||||
VkDeviceSize size)
|
VkDeviceSize size)
|
||||||
{
|
{
|
||||||
VkMemoryPropertyFlags flags = 0;
|
|
||||||
vmaGetAllocationMemoryProperties(g_vulkan_context->GetMemoryAllocator(), m_alloc, &flags);
|
|
||||||
if (flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) [[likely]]
|
|
||||||
return;
|
|
||||||
|
|
||||||
ASSERT((offset + size) <= m_size || (offset < m_size && size == VK_WHOLE_SIZE));
|
ASSERT((offset + size) <= m_size || (offset < m_size && size == VK_WHOLE_SIZE));
|
||||||
BufferMemoryBarrier(command_buffer, m_buffer, src_access_flags, VK_ACCESS_HOST_READ_BIT, offset,
|
BufferMemoryBarrier(command_buffer, m_buffer, src_access_flags, VK_ACCESS_HOST_READ_BIT, offset,
|
||||||
size, src_pipeline_stage, VK_PIPELINE_STAGE_HOST_BIT);
|
size, src_pipeline_stage, VK_PIPELINE_STAGE_HOST_BIT);
|
||||||
|
@ -35,10 +35,13 @@ bool PerfQuery::Initialize()
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Vulkan requires query pools to be reset after creation
|
||||||
|
ResetQuery();
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
void PerfQuery::EnableQuery(PerfQueryGroup type)
|
void PerfQuery::EnableQuery(PerfQueryGroup group)
|
||||||
{
|
{
|
||||||
// Block if there are no free slots.
|
// Block if there are no free slots.
|
||||||
// Otherwise, try to keep half of them available.
|
// Otherwise, try to keep half of them available.
|
||||||
@ -50,11 +53,12 @@ void PerfQuery::EnableQuery(PerfQueryGroup type)
|
|||||||
// a buffer with open queries.
|
// a buffer with open queries.
|
||||||
StateTracker::GetInstance()->Bind();
|
StateTracker::GetInstance()->Bind();
|
||||||
|
|
||||||
if (type == PQG_ZCOMP_ZCOMPLOC || type == PQG_ZCOMP)
|
if (group == PQG_ZCOMP_ZCOMPLOC || group == PQG_ZCOMP)
|
||||||
{
|
{
|
||||||
ActiveQuery& entry = m_query_buffer[m_query_next_pos];
|
ActiveQuery& entry = m_query_buffer[m_query_next_pos];
|
||||||
DEBUG_ASSERT(!entry.has_value);
|
DEBUG_ASSERT(!entry.has_value);
|
||||||
entry.has_value = true;
|
entry.has_value = true;
|
||||||
|
entry.query_group = group;
|
||||||
|
|
||||||
// Use precise queries if supported, otherwise boolean (which will be incorrect).
|
// Use precise queries if supported, otherwise boolean (which will be incorrect).
|
||||||
VkQueryControlFlags flags =
|
VkQueryControlFlags flags =
|
||||||
@ -67,11 +71,14 @@ void PerfQuery::EnableQuery(PerfQueryGroup type)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void PerfQuery::DisableQuery(PerfQueryGroup type)
|
void PerfQuery::DisableQuery(PerfQueryGroup group)
|
||||||
{
|
{
|
||||||
if (type == PQG_ZCOMP_ZCOMPLOC || type == PQG_ZCOMP)
|
if (group == PQG_ZCOMP_ZCOMPLOC || group == PQG_ZCOMP)
|
||||||
{
|
{
|
||||||
vkCmdEndQuery(g_command_buffer_mgr->GetCurrentCommandBuffer(), m_query_pool, m_query_next_pos);
|
vkCmdEndQuery(g_command_buffer_mgr->GetCurrentCommandBuffer(), m_query_pool, m_query_next_pos);
|
||||||
|
ActiveQuery& entry = m_query_buffer[m_query_next_pos];
|
||||||
|
entry.fence_counter = g_command_buffer_mgr->GetCurrentFenceCounter();
|
||||||
|
|
||||||
m_query_next_pos = (m_query_next_pos + 1) % PERF_QUERY_BUFFER_SIZE;
|
m_query_next_pos = (m_query_next_pos + 1) % PERF_QUERY_BUFFER_SIZE;
|
||||||
m_query_count.fetch_add(1, std::memory_order_relaxed);
|
m_query_count.fetch_add(1, std::memory_order_relaxed);
|
||||||
}
|
}
|
||||||
@ -119,8 +126,10 @@ u32 PerfQuery::GetQueryResult(PerfQueryType type)
|
|||||||
|
|
||||||
void PerfQuery::FlushResults()
|
void PerfQuery::FlushResults()
|
||||||
{
|
{
|
||||||
while (!IsFlushed())
|
if (!IsFlushed())
|
||||||
PartialFlush(true);
|
PartialFlush(true);
|
||||||
|
|
||||||
|
ASSERT(IsFlushed());
|
||||||
}
|
}
|
||||||
|
|
||||||
bool PerfQuery::IsFlushed() const
|
bool PerfQuery::IsFlushed() const
|
||||||
@ -185,13 +194,17 @@ void PerfQuery::ReadbackQueries(u32 query_count)
|
|||||||
(m_query_readback_pos + query_count) <= PERF_QUERY_BUFFER_SIZE);
|
(m_query_readback_pos + query_count) <= PERF_QUERY_BUFFER_SIZE);
|
||||||
|
|
||||||
// Read back from the GPU.
|
// Read back from the GPU.
|
||||||
VkResult res =
|
VkResult res = vkGetQueryPoolResults(
|
||||||
vkGetQueryPoolResults(g_vulkan_context->GetDevice(), m_query_pool, m_query_readback_pos,
|
g_vulkan_context->GetDevice(), m_query_pool, m_query_readback_pos, query_count,
|
||||||
query_count, query_count * sizeof(PerfQueryDataType),
|
query_count * sizeof(PerfQueryDataType), m_query_result_buffer.data(),
|
||||||
m_query_result_buffer.data(), sizeof(PerfQueryDataType), 0);
|
sizeof(PerfQueryDataType), VK_QUERY_RESULT_WAIT_BIT);
|
||||||
if (res != VK_SUCCESS)
|
if (res != VK_SUCCESS)
|
||||||
LOG_VULKAN_ERROR(res, "vkGetQueryPoolResults failed: ");
|
LOG_VULKAN_ERROR(res, "vkGetQueryPoolResults failed: ");
|
||||||
|
|
||||||
|
StateTracker::GetInstance()->EndRenderPass();
|
||||||
|
vkCmdResetQueryPool(g_command_buffer_mgr->GetCurrentCommandBuffer(), m_query_pool,
|
||||||
|
m_query_readback_pos, query_count);
|
||||||
|
|
||||||
// Remove pending queries.
|
// Remove pending queries.
|
||||||
for (u32 i = 0; i < query_count; i++)
|
for (u32 i = 0; i < query_count; i++)
|
||||||
{
|
{
|
||||||
@ -207,8 +220,8 @@ void PerfQuery::ReadbackQueries(u32 query_count)
|
|||||||
const u64 native_res_result = static_cast<u64>(m_query_result_buffer[i]) * EFB_WIDTH /
|
const u64 native_res_result = static_cast<u64>(m_query_result_buffer[i]) * EFB_WIDTH /
|
||||||
g_renderer->GetTargetWidth() * EFB_HEIGHT /
|
g_renderer->GetTargetWidth() * EFB_HEIGHT /
|
||||||
g_renderer->GetTargetHeight();
|
g_renderer->GetTargetHeight();
|
||||||
m_results[entry.query_type].fetch_add(static_cast<u32>(native_res_result),
|
m_results[entry.query_group].fetch_add(static_cast<u32>(native_res_result),
|
||||||
std::memory_order_relaxed);
|
std::memory_order_relaxed);
|
||||||
}
|
}
|
||||||
|
|
||||||
m_query_readback_pos = (m_query_readback_pos + query_count) % PERF_QUERY_BUFFER_SIZE;
|
m_query_readback_pos = (m_query_readback_pos + query_count) % PERF_QUERY_BUFFER_SIZE;
|
||||||
|
@ -22,8 +22,8 @@ public:
|
|||||||
|
|
||||||
bool Initialize();
|
bool Initialize();
|
||||||
|
|
||||||
void EnableQuery(PerfQueryGroup type) override;
|
void EnableQuery(PerfQueryGroup group) override;
|
||||||
void DisableQuery(PerfQueryGroup type) override;
|
void DisableQuery(PerfQueryGroup group) override;
|
||||||
void ResetQuery() override;
|
void ResetQuery() override;
|
||||||
u32 GetQueryResult(PerfQueryType type) override;
|
u32 GetQueryResult(PerfQueryType type) override;
|
||||||
void FlushResults() override;
|
void FlushResults() override;
|
||||||
@ -40,7 +40,7 @@ private:
|
|||||||
struct ActiveQuery
|
struct ActiveQuery
|
||||||
{
|
{
|
||||||
u64 fence_counter;
|
u64 fence_counter;
|
||||||
PerfQueryType query_type;
|
PerfQueryGroup query_group;
|
||||||
bool has_value;
|
bool has_value;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user