From e7ac19979dce98f28947c8761ce4391fc7abe91b Mon Sep 17 00:00:00 2001 From: Samuliak Date: Wed, 30 Oct 2024 09:20:50 +0100 Subject: [PATCH] make occlusion queries accumulate with draws --- .../HW/Latte/Renderer/Metal/MetalCommon.h | 7 +++++ .../HW/Latte/Renderer/Metal/MetalQuery.cpp | 17 ++++++----- src/Cafe/HW/Latte/Renderer/Metal/MetalQuery.h | 7 ++++- .../HW/Latte/Renderer/Metal/MetalRenderer.cpp | 17 +++++------ .../HW/Latte/Renderer/Metal/MetalRenderer.h | 29 +++++++------------ 5 files changed, 40 insertions(+), 37 deletions(-) diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalCommon.h b/src/Cafe/HW/Latte/Renderer/Metal/MetalCommon.h index a1fe7f82..d7de0a28 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalCommon.h +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalCommon.h @@ -22,6 +22,13 @@ struct MetalPixelFormatSupport } }; +// TODO: don't define a new struct for this +struct MetalQueryRange +{ + uint32 begin; + uint32 end; +}; + #define MAX_MTL_BUFFERS 31 // Buffer indices 28-30 are reserved for the helper shaders #define GET_MTL_VERTEX_BUFFER_INDEX(index) (MAX_MTL_BUFFERS - index - 4) diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalQuery.cpp b/src/Cafe/HW/Latte/Renderer/Metal/MetalQuery.cpp index 91f252e8..6e6b14c3 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalQuery.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalQuery.cpp @@ -1,6 +1,5 @@ #include "Cafe/HW/Latte/Renderer/Metal/MetalQuery.h" #include "Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h" -#include "HW/Latte/Renderer/Metal/MetalCommon.h" bool LatteQueryObjectMtl::getResult(uint64& numSamplesPassed) { @@ -13,29 +12,31 @@ bool LatteQueryObjectMtl::getResult(uint64& numSamplesPassed) if (!CommandBufferCompleted(m_commandBuffer)) return false; - numSamplesPassed = m_mtlr->GetOcclusionQueryResultsPtr()[m_queryIndex]; + uint64* resultPtr = m_mtlr->GetOcclusionQueryResultsPtr(); + + numSamplesPassed = 0; + for (uint32 i = m_range.begin; i != m_range.end; i = (i + 1) % MetalRenderer::OCCLUSION_QUERY_POOL_SIZE) + numSamplesPassed += resultPtr[i]; return true; } LatteQueryObjectMtl::~LatteQueryObjectMtl() { - if (m_queryIndex != INVALID_UINT32) - m_mtlr->ReleaseOcclusionQueryIndex(m_queryIndex); - if (m_commandBuffer) m_commandBuffer->release(); } void LatteQueryObjectMtl::begin() { - m_queryIndex = m_mtlr->GetAvailableOcclusionQueryIndex(); - m_mtlr->SetActiveOcclusionQueryIndex(m_queryIndex); + m_range.begin = m_mtlr->GetOcclusionQueryIndex(); + m_mtlr->BeginOcclusionQuery(); } void LatteQueryObjectMtl::end() { - m_mtlr->SetActiveOcclusionQueryIndex(INVALID_UINT32); + m_range.end = m_mtlr->GetOcclusionQueryIndex(); + m_mtlr->EndOcclusionQuery(); if (m_mtlr->IsCommandBufferActive()) { m_commandBuffer = m_mtlr->GetCurrentCommandBuffer()->retain(); diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalQuery.h b/src/Cafe/HW/Latte/Renderer/Metal/MetalQuery.h index 554cdace..7c9bc2cf 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalQuery.h +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalQuery.h @@ -14,10 +14,15 @@ public: void begin() override; void end() override; + void GrowRange() + { + m_range.end++; + } + private: class MetalRenderer* m_mtlr; - uint32 m_queryIndex = INVALID_UINT32; + MetalQueryRange m_range = {INVALID_UINT32, INVALID_UINT32}; // TODO: make this a list of command buffers MTL::CommandBuffer* m_commandBuffer = nullptr; }; diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp index 4252bea3..8b3377ac 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp @@ -31,7 +31,6 @@ #include "imgui/imgui_impl_metal.h" #define DEFAULT_COMMIT_TRESHOLD 196 -#define OCCLUSION_QUERY_POOL_SIZE 1024 extern bool hasValidFramebufferAttached; @@ -98,10 +97,6 @@ MetalRenderer::MetalRenderer() #endif m_occlusionQuery.m_resultsPtr = (uint64*)m_occlusionQuery.m_resultBuffer->contents(); - m_occlusionQuery.m_availableIndices.reserve(OCCLUSION_QUERY_POOL_SIZE); - for (uint32 i = 0; i < OCCLUSION_QUERY_POOL_SIZE; i++) - m_occlusionQuery.m_availableIndices.push_back(i); - // Initialize state for (uint32 i = 0; i < METAL_SHADER_TYPE_TOTAL; i++) { @@ -1115,11 +1110,10 @@ void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32 } // Visibility result mode - if (m_occlusionQuery.m_activeIndex != encoderState.m_visibilityResultOffset) + if (m_occlusionQuery.m_active) { - auto mode = (m_occlusionQuery.m_activeIndex == INVALID_UINT32 ? MTL::VisibilityResultModeDisabled : MTL::VisibilityResultModeCounting); - renderCommandEncoder->setVisibilityResultMode(mode, m_occlusionQuery.m_activeIndex * sizeof(uint64)); - encoderState.m_visibilityResultOffset = m_occlusionQuery.m_activeIndex; + auto mode = (m_occlusionQuery.m_currentIndex == INVALID_UINT32 ? MTL::VisibilityResultModeDisabled : MTL::VisibilityResultModeCounting); + renderCommandEncoder->setVisibilityResultMode(mode, m_occlusionQuery.m_currentIndex * sizeof(uint64)); } // todo - how does culling behave with rects? @@ -1302,6 +1296,11 @@ void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32 m_state.m_isFirstDrawInRenderPass = false; + // Occlusion queries + if (m_occlusionQuery.m_active) + m_occlusionQuery.m_currentIndex = (m_occlusionQuery.m_currentIndex + 1) % OCCLUSION_QUERY_POOL_SIZE; + + // Streamout LatteStreamout_FinishDrawcall(false); // Debug diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h index 99d95ac7..9c1bb2dc 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h @@ -104,7 +104,6 @@ struct MetalEncoderState uint32 m_depthSlope = 0; uint32 m_depthClamp = 0; bool m_depthClipEnable = true; - uint32 m_visibilityResultOffset = INVALID_UINT32; struct { MTL::Buffer* m_buffer; size_t m_offset; @@ -170,7 +169,8 @@ enum class MetalEncoderType class MetalRenderer : public Renderer { public: - static const inline int TEXTURE_READBACK_SIZE = 32 * 1024 * 1024; // 32 MB + static constexpr uint32 OCCLUSION_QUERY_POOL_SIZE = 1024; + static constexpr uint32 TEXTURE_READBACK_SIZE = 32 * 1024 * 1024; // 32 MB MetalRenderer(); ~MetalRenderer() override; @@ -428,28 +428,19 @@ public: return m_occlusionQuery.m_resultsPtr; } - uint32 GetAvailableOcclusionQueryIndex() + uint32 GetOcclusionQueryIndex() { - if (m_occlusionQuery.m_availableIndices.empty()) - { - cemuLog_log(LogType::Force, "No occlusion query index available"); - return 0; - } - - uint32 queryIndex = m_occlusionQuery.m_availableIndices.back(); - m_occlusionQuery.m_availableIndices.pop_back(); - - return queryIndex; + return m_occlusionQuery.m_currentIndex; } - void ReleaseOcclusionQueryIndex(uint32 queryIndex) + void BeginOcclusionQuery() { - m_occlusionQuery.m_availableIndices.push_back(queryIndex); + m_occlusionQuery.m_active = true; } - void SetActiveOcclusionQueryIndex(uint32 queryIndex) + void EndOcclusionQuery() { - m_occlusionQuery.m_activeIndex = queryIndex; + m_occlusionQuery.m_active = false; } private: @@ -505,8 +496,8 @@ private: { MTL::Buffer* m_resultBuffer; uint64* m_resultsPtr; - std::vector m_availableIndices; - uint32 m_activeIndex = INVALID_UINT32; + uint32 m_currentIndex = 0; + bool m_active = false; } m_occlusionQuery; // Active objects