From 4cce3699f35ac4472fa6abcf085cb76ec64f4c67 Mon Sep 17 00:00:00 2001 From: Samuliak Date: Thu, 12 Sep 2024 08:05:27 +0200 Subject: [PATCH 1/5] put query object into a separate file --- src/Cafe/CMakeLists.txt | 2 + .../HW/Latte/Renderer/Metal/MetalQuery.cpp | 17 +++++++ src/Cafe/HW/Latte/Renderer/Metal/MetalQuery.h | 19 ++++++++ .../HW/Latte/Renderer/Metal/MetalRenderer.cpp | 19 ++++++++ .../HW/Latte/Renderer/Metal/MetalRenderer.h | 47 ++----------------- 5 files changed, 61 insertions(+), 43 deletions(-) create mode 100644 src/Cafe/HW/Latte/Renderer/Metal/MetalQuery.cpp create mode 100644 src/Cafe/HW/Latte/Renderer/Metal/MetalQuery.h diff --git a/src/Cafe/CMakeLists.txt b/src/Cafe/CMakeLists.txt index 634014b7..10c85270 100644 --- a/src/Cafe/CMakeLists.txt +++ b/src/Cafe/CMakeLists.txt @@ -567,6 +567,8 @@ if(ENABLE_METAL) HW/Latte/Renderer/Metal/MetalSamplerCache.h HW/Latte/Renderer/Metal/MetalHybridComputePipeline.cpp HW/Latte/Renderer/Metal/MetalHybridComputePipeline.h + HW/Latte/Renderer/Metal/MetalQuery.cpp + HW/Latte/Renderer/Metal/MetalQuery.h HW/Latte/Renderer/Metal/MetalPerformanceMonitor.h HW/Latte/Renderer/Metal/UtilityShaderSource.h ) diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalQuery.cpp b/src/Cafe/HW/Latte/Renderer/Metal/MetalQuery.cpp new file mode 100644 index 00000000..40c73fd4 --- /dev/null +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalQuery.cpp @@ -0,0 +1,17 @@ +#include "Cafe/HW/Latte/Renderer/Metal/MetalQuery.h" + +bool LatteQueryObjectMtl::getResult(uint64& numSamplesPassed) +{ + cemuLog_log(LogType::MetalLogging, "LatteQueryObjectMtl::getResult: occlusion queries are not yet supported on Metal"); + return true; +} + +void LatteQueryObjectMtl::begin() +{ + cemuLog_log(LogType::MetalLogging, "LatteQueryObjectMtl::begin: occlusion queries are not yet supported on Metal"); +} + +void LatteQueryObjectMtl::end() +{ + cemuLog_log(LogType::MetalLogging, "LatteQueryObjectMtl::end: occlusion queries are not yet supported on Metal"); +} diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalQuery.h b/src/Cafe/HW/Latte/Renderer/Metal/MetalQuery.h new file mode 100644 index 00000000..ea2be227 --- /dev/null +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalQuery.h @@ -0,0 +1,19 @@ +#pragma once + +#include "Cafe/HW/Latte/Core/LatteQueryObject.h" + +#include "Cafe/HW/Latte/Renderer/Metal/MetalCommon.h" + +// HACK: Dummy occlusion query object +class LatteQueryObjectMtl : public LatteQueryObject +{ +public: + LatteQueryObjectMtl(class MetalRenderer* mtlRenderer) : m_mtlr{mtlRenderer} {} + + bool getResult(uint64& numSamplesPassed) override; + void begin() override; + void end() override; + +private: + class MetalRenderer* m_mtlr; +}; diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp index 4ff1a3b0..ffb8fb72 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp @@ -9,6 +9,7 @@ #include "Cafe/HW/Latte/Renderer/Metal/MetalSamplerCache.h" #include "Cafe/HW/Latte/Renderer/Metal/LatteTextureReadbackMtl.h" #include "Cafe/HW/Latte/Renderer/Metal/MetalHybridComputePipeline.h" +#include "Cafe/HW/Latte/Renderer/Metal/MetalQuery.h" #include "Cafe/HW/Latte/Renderer/Metal/LatteToMtl.h" #include "Cafe/HW/Latte/Renderer/Metal/UtilityShaderSource.h" @@ -1282,6 +1283,24 @@ void MetalRenderer::indexData_uploadIndexMemory(uint32 bufferIndex, uint32 offse */ } +LatteQueryObject* MetalRenderer::occlusionQuery_create() { + cemuLog_log(LogType::MetalLogging, "MetalRenderer::occlusionQuery_create: Occlusion queries are not yet supported on Metal"); + + return new LatteQueryObjectMtl(this); +} + +void MetalRenderer::occlusionQuery_destroy(LatteQueryObject* queryObj) { + cemuLog_log(LogType::MetalLogging, "MetalRenderer::occlusionQuery_destroy: occlusion queries are not yet supported on Metal"); +} + +void MetalRenderer::occlusionQuery_flush() { + cemuLog_log(LogType::MetalLogging, "MetalRenderer::occlusionQuery_flush: occlusion queries are not yet supported on Metal"); +} + +void MetalRenderer::occlusionQuery_updateState() { + cemuLog_log(LogType::MetalLogging, "MetalRenderer::occlusionQuery_updateState: occlusion queries are not yet supported on Metal"); +} + void MetalRenderer::SetBuffer(MTL::RenderCommandEncoder* renderCommandEncoder, MetalShaderType shaderType, MTL::Buffer* buffer, size_t offset, uint32 index) { auto& boundBuffer = m_state.m_encoderState.m_buffers[shaderType][index]; diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h index 25051a97..f00f814c 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h @@ -155,32 +155,6 @@ enum class MetalEncoderType Blit, }; -// HACK: Dummy occlusion query object for Metal -class LatteQueryObjectMtl : public LatteQueryObject -{ -public: - LatteQueryObjectMtl(class MetalRenderer* mtlRenderer) : m_mtlr{mtlRenderer} {} - - bool getResult(uint64& numSamplesPassed) override - { - cemuLog_log(LogType::MetalLogging, "LatteQueryObjectMtl::getResult: occlusion queries are not yet supported on Metal"); - return true; - } - - void begin() override - { - cemuLog_log(LogType::MetalLogging, "LatteQueryObjectMtl::begin: occlusion queries are not yet supported on Metal"); - } - - void end() override - { - cemuLog_log(LogType::MetalLogging, "LatteQueryObjectMtl::end: occlusion queries are not yet supported on Metal"); - } - -private: - class MetalRenderer* m_mtlr; -}; - class MetalRenderer : public Renderer { public: @@ -296,23 +270,10 @@ public: void indexData_uploadIndexMemory(uint32 bufferIndex, uint32 offset, uint32 size) override; // occlusion queries - LatteQueryObject* occlusionQuery_create() override { - cemuLog_log(LogType::MetalLogging, "MetalRenderer::occlusionQuery_create: Occlusion queries are not yet supported on Metal"); - - return new LatteQueryObjectMtl(this); - } - - void occlusionQuery_destroy(LatteQueryObject* queryObj) override { - cemuLog_log(LogType::MetalLogging, "MetalRenderer::occlusionQuery_destroy: occlusion queries are not yet supported on Metal"); - } - - void occlusionQuery_flush() override { - cemuLog_log(LogType::MetalLogging, "MetalRenderer::occlusionQuery_flush: occlusion queries are not yet supported on Metal"); - } - - void occlusionQuery_updateState() override { - cemuLog_log(LogType::MetalLogging, "MetalRenderer::occlusionQuery_updateState: occlusion queries are not yet supported on Metal"); - } + LatteQueryObject* occlusionQuery_create() override; + void occlusionQuery_destroy(LatteQueryObject* queryObj) override; + void occlusionQuery_flush() override; + void occlusionQuery_updateState() override; // Helpers MetalPerformanceMonitor& GetPerformanceMonitor() { return m_performanceMonitor; } From eb7c10e89f27d04b5f8a13886ee3c64244a08bfd Mon Sep 17 00:00:00 2001 From: Samuliak Date: Fri, 13 Sep 2024 10:41:03 +0200 Subject: [PATCH 2/5] implement occlusion queries --- .../HW/Latte/Renderer/Metal/CachedFBOMtl.cpp | 9 ++-- .../HW/Latte/Renderer/Metal/CachedFBOMtl.h | 7 +--- .../HW/Latte/Renderer/Metal/MetalQuery.cpp | 14 +++++-- src/Cafe/HW/Latte/Renderer/Metal/MetalQuery.h | 5 ++- .../HW/Latte/Renderer/Metal/MetalRenderer.cpp | 34 ++++++++++++--- .../HW/Latte/Renderer/Metal/MetalRenderer.h | 41 +++++++++++++++++++ 6 files changed, 91 insertions(+), 19 deletions(-) diff --git a/src/Cafe/HW/Latte/Renderer/Metal/CachedFBOMtl.cpp b/src/Cafe/HW/Latte/Renderer/Metal/CachedFBOMtl.cpp index a9e673f6..85adbfb9 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/CachedFBOMtl.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/CachedFBOMtl.cpp @@ -1,9 +1,9 @@ #include "Cafe/HW/Latte/Renderer/Metal/CachedFBOMtl.h" #include "Cafe/HW/Latte/Renderer/Metal/LatteTextureViewMtl.h" -#include "HW/Latte/Renderer/Metal/LatteToMtl.h" -#include "Metal/MTLRenderPass.hpp" +#include "Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h" +#include "Cafe/HW/Latte/Renderer/Metal/LatteToMtl.h" -void CachedFBOMtl::CreateRenderPass() +CachedFBOMtl::CachedFBOMtl(class MetalRenderer* metalRenderer, uint64 key) : LatteCachedFBO(key) { m_renderPassDescriptor = MTL::RenderPassDescriptor::alloc()->init(); @@ -39,6 +39,9 @@ void CachedFBOMtl::CreateRenderPass() stencilAttachment->setStoreAction(MTL::StoreActionStore); } } + + // Visibility buffer + m_renderPassDescriptor->setVisibilityResultBuffer(metalRenderer->GetOcclusionQueryResultBuffer()); } CachedFBOMtl::~CachedFBOMtl() diff --git a/src/Cafe/HW/Latte/Renderer/Metal/CachedFBOMtl.h b/src/Cafe/HW/Latte/Renderer/Metal/CachedFBOMtl.h index 0d926e7e..f1221eb2 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/CachedFBOMtl.h +++ b/src/Cafe/HW/Latte/Renderer/Metal/CachedFBOMtl.h @@ -8,10 +8,7 @@ class CachedFBOMtl : public LatteCachedFBO { public: - CachedFBOMtl(uint64 key) : LatteCachedFBO(key) - { - CreateRenderPass(); - } + CachedFBOMtl(class MetalRenderer* metalRenderer, uint64 key); ~CachedFBOMtl(); @@ -22,6 +19,4 @@ public: private: MTL::RenderPassDescriptor* m_renderPassDescriptor = nullptr; - - void CreateRenderPass(); }; diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalQuery.cpp b/src/Cafe/HW/Latte/Renderer/Metal/MetalQuery.cpp index 40c73fd4..c27a5620 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalQuery.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalQuery.cpp @@ -1,17 +1,25 @@ #include "Cafe/HW/Latte/Renderer/Metal/MetalQuery.h" +#include "Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h" +#include "HW/Latte/Renderer/Metal/MetalCommon.h" bool LatteQueryObjectMtl::getResult(uint64& numSamplesPassed) { - cemuLog_log(LogType::MetalLogging, "LatteQueryObjectMtl::getResult: occlusion queries are not yet supported on Metal"); + if (!m_mtlr->CommandBufferCompleted(m_commandBuffer)) + return false; + + numSamplesPassed = m_mtlr->GetOcclusionQueryResultsPtr()[m_queryIndex]; + return true; } void LatteQueryObjectMtl::begin() { - cemuLog_log(LogType::MetalLogging, "LatteQueryObjectMtl::begin: occlusion queries are not yet supported on Metal"); + m_queryIndex = m_mtlr->GetAvailableOcclusionQueryIndex(); + m_mtlr->SetActiveOcclusionQueryIndex(m_queryIndex); } void LatteQueryObjectMtl::end() { - cemuLog_log(LogType::MetalLogging, "LatteQueryObjectMtl::end: occlusion queries are not yet supported on Metal"); + m_mtlr->SetActiveOcclusionQueryIndex(INVALID_UINT32); + // TODO: request soon submit of the command buffer } diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalQuery.h b/src/Cafe/HW/Latte/Renderer/Metal/MetalQuery.h index ea2be227..8fa53497 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalQuery.h +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalQuery.h @@ -4,7 +4,6 @@ #include "Cafe/HW/Latte/Renderer/Metal/MetalCommon.h" -// HACK: Dummy occlusion query object class LatteQueryObjectMtl : public LatteQueryObject { public: @@ -16,4 +15,8 @@ public: private: class MetalRenderer* m_mtlr; + + uint32 m_queryIndex; + MTL::CommandBuffer* m_commandBuffer; + uint64 m_acccumulatedSum; }; diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp index ffb8fb72..045ca77e 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp @@ -21,12 +21,14 @@ #include "HW/Latte/Renderer/Metal/MetalCommon.h" #include "HW/Latte/Renderer/Metal/MetalLayerHandle.h" #include "HW/Latte/Renderer/Renderer.h" +#include "Metal/MTLRenderCommandEncoder.hpp" #define IMGUI_IMPL_METAL_CPP #include "imgui/imgui_extension.h" #include "imgui/imgui_impl_metal.h" #define COMMIT_TRESHOLD 256 +#define OCCLUSION_QUERY_POOL_SIZE 1024 extern bool hasValidFramebufferAttached; @@ -94,6 +96,17 @@ MetalRenderer::MetalRenderer() m_xfbRingBuffer->setLabel(GetLabel("Transform feedback buffer", m_xfbRingBuffer)); #endif + // Occlusion queries + m_occlusionQuery.m_resultBuffer = m_device->newBuffer(OCCLUSION_QUERY_POOL_SIZE * sizeof(uint64), MTL::ResourceStorageModeShared); +#ifdef CEMU_DEBUG_ASSERT + m_occlusionQuery.m_resultBuffer->setLabel(GetLabel("Occlusion query result buffer", m_occlusionQuery.m_resultBuffer)); +#endif + m_occlusionQuery.m_resultsPtr = (uint64*)m_occlusionQuery.m_resultBuffer->contents(); + + m_occlusionQuery.m_availableIndices.reserve(OCCLUSION_QUERY_POOL_SIZE); + for (uint32 i = 0; i < OCCLUSION_QUERY_POOL_SIZE; i++) + m_occlusionQuery.m_availableIndices.push_back(i); + // Initialize state for (uint32 i = 0; i < METAL_SHADER_TYPE_TOTAL; i++) { @@ -467,7 +480,7 @@ void MetalRenderer::renderTarget_setScissor(sint32 scissorX, sint32 scissorY, si LatteCachedFBO* MetalRenderer::rendertarget_createCachedFBO(uint64 key) { - return new CachedFBOMtl(key); + return new CachedFBOMtl(this, key); } void MetalRenderer::rendertarget_deleteCachedFBO(LatteCachedFBO* cfbo) @@ -1042,6 +1055,14 @@ void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32 encoderState.m_depthClipEnable = zClipEnable; } + // Visibility result mode + if (m_occlusionQuery.m_activeIndex != encoderState.m_visibilityResultOffset) + { + auto mode = (m_occlusionQuery.m_activeIndex == INVALID_UINT32 ? MTL::VisibilityResultModeDisabled : MTL::VisibilityResultModeCounting); + renderCommandEncoder->setVisibilityResultMode(mode, m_occlusionQuery.m_activeIndex); + encoderState.m_visibilityResultOffset = m_occlusionQuery.m_activeIndex; + } + // todo - how does culling behave with rects? // right now we just assume that their winding is always CW if (isPrimitiveRect) @@ -1284,21 +1305,20 @@ void MetalRenderer::indexData_uploadIndexMemory(uint32 bufferIndex, uint32 offse } LatteQueryObject* MetalRenderer::occlusionQuery_create() { - cemuLog_log(LogType::MetalLogging, "MetalRenderer::occlusionQuery_create: Occlusion queries are not yet supported on Metal"); - return new LatteQueryObjectMtl(this); } void MetalRenderer::occlusionQuery_destroy(LatteQueryObject* queryObj) { - cemuLog_log(LogType::MetalLogging, "MetalRenderer::occlusionQuery_destroy: occlusion queries are not yet supported on Metal"); + // TODO: do something? } void MetalRenderer::occlusionQuery_flush() { - cemuLog_log(LogType::MetalLogging, "MetalRenderer::occlusionQuery_flush: occlusion queries are not yet supported on Metal"); + // TODO: implement + debug_printf("Occlusion query flush is not implemented\n"); } void MetalRenderer::occlusionQuery_updateState() { - cemuLog_log(LogType::MetalLogging, "MetalRenderer::occlusionQuery_updateState: occlusion queries are not yet supported on Metal"); + // TODO } void MetalRenderer::SetBuffer(MTL::RenderCommandEncoder* renderCommandEncoder, MetalShaderType shaderType, MTL::Buffer* buffer, size_t offset, uint32 index) @@ -1574,6 +1594,8 @@ void MetalRenderer::CommitCommandBuffer() m_memoryManager->GetTemporaryBufferAllocator().SetActiveCommandBuffer(nullptr); + m_occlusionQuery.m_availableIndices.insert(m_occlusionQuery.m_availableIndices.end(), m_occlusionQuery.m_crntCmdBuffIndices.begin(), m_occlusionQuery.m_crntCmdBuffIndices.end()); + // Debug //m_commandQueue->insertDebugCaptureBoundary(); } diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h index f00f814c..4f7376b6 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h @@ -99,6 +99,7 @@ struct MetalEncoderState uint32 m_depthSlope = 0; uint32 m_depthClamp = 0; bool m_depthClipEnable = true; + uint32 m_visibilityResultOffset = INVALID_UINT32; struct { MTL::Buffer* m_buffer; size_t m_offset; @@ -376,6 +377,36 @@ public: return m_readbackBuffer; } + MTL::Buffer* GetOcclusionQueryResultBuffer() const + { + return m_occlusionQuery.m_resultBuffer; + } + + uint64* GetOcclusionQueryResultsPtr() + { + return m_occlusionQuery.m_resultsPtr; + } + + uint32 GetAvailableOcclusionQueryIndex() + { + if (m_occlusionQuery.m_availableIndices.empty()) + { + cemuLog_log(LogType::Force, "No occlusion query index available"); + return 0; + } + + uint32 queryIndex = m_occlusionQuery.m_availableIndices.back(); + m_occlusionQuery.m_availableIndices.pop_back(); + m_occlusionQuery.m_crntCmdBuffIndices.push_back(queryIndex); + + return queryIndex; + } + + void SetActiveOcclusionQueryIndex(uint32 queryIndex) + { + m_occlusionQuery.m_activeIndex = queryIndex; + } + private: MetalLayerHandle m_mainLayer; MetalLayerHandle m_padLayer; @@ -423,6 +454,16 @@ private: // Transform feedback MTL::Buffer* m_xfbRingBuffer; + // Occlusion queries + struct + { + MTL::Buffer* m_resultBuffer; + uint64* m_resultsPtr; + std::vector m_availableIndices; + std::vector m_crntCmdBuffIndices; + uint32 m_activeIndex = INVALID_UINT32; + } m_occlusionQuery; + // Active objects std::vector m_commandBuffers; MetalEncoderType m_encoderType = MetalEncoderType::None; From fd16488e4c238e83a5faf52fc2f3ed1ebca365d7 Mon Sep 17 00:00:00 2001 From: Samuliak Date: Fri, 13 Sep 2024 11:19:56 +0200 Subject: [PATCH 3/5] improve command buffer sync --- .../Metal/LatteTextureReadbackMtl.cpp | 4 ++-- .../Renderer/Metal/MetalBufferAllocator.h | 2 +- .../HW/Latte/Renderer/Metal/MetalCommon.h | 6 +++++ .../HW/Latte/Renderer/Metal/MetalQuery.cpp | 13 +++++++++-- src/Cafe/HW/Latte/Renderer/Metal/MetalQuery.h | 5 ++-- .../HW/Latte/Renderer/Metal/MetalRenderer.cpp | 23 ++++--------------- .../HW/Latte/Renderer/Metal/MetalRenderer.h | 9 ++++---- 7 files changed, 33 insertions(+), 29 deletions(-) diff --git a/src/Cafe/HW/Latte/Renderer/Metal/LatteTextureReadbackMtl.cpp b/src/Cafe/HW/Latte/Renderer/Metal/LatteTextureReadbackMtl.cpp index f2c03709..d7adc25c 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/LatteTextureReadbackMtl.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/LatteTextureReadbackMtl.cpp @@ -38,12 +38,12 @@ bool LatteTextureReadbackInfoMtl::IsFinished() if (m_mtlr->GetCurrentCommandBuffer() == m_commandBuffer) m_mtlr->CommitCommandBuffer(); - return m_mtlr->CommandBufferCompleted(m_commandBuffer); + return CommandBufferCompleted(m_commandBuffer); } void LatteTextureReadbackInfoMtl::ForceFinish() { - m_mtlr->WaitForCommandBufferCompletion(m_commandBuffer); + m_commandBuffer->waitUntilCompleted(); } uint8* LatteTextureReadbackInfoMtl::GetData() diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalBufferAllocator.h b/src/Cafe/HW/Latte/Renderer/Metal/MetalBufferAllocator.h index b8a3c760..198d9978 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalBufferAllocator.h +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalBufferAllocator.h @@ -255,7 +255,7 @@ public: auto& buffer = m_buffers[i]; for (uint32_t j = 0; j < buffer.m_data.m_commandBuffers.size(); j++) { - if (m_mtlr->CommandBufferCompleted(buffer.m_data.m_commandBuffers[j])) + if (CommandBufferCompleted(buffer.m_data.m_commandBuffers[j])) { if (buffer.m_data.m_commandBuffers.size() == 1) { diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalCommon.h b/src/Cafe/HW/Latte/Renderer/Metal/MetalCommon.h index 8a6daa92..a1fe7f82 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalCommon.h +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalCommon.h @@ -69,3 +69,9 @@ inline bool IsValidDepthTextureType(Latte::E_DIM dim) { return (dim == Latte::E_DIM::DIM_2D || dim == Latte::E_DIM::DIM_2D_MSAA || dim == Latte::E_DIM::DIM_2D_ARRAY || dim == Latte::E_DIM::DIM_2D_ARRAY_MSAA || dim == Latte::E_DIM::DIM_CUBEMAP); } + +inline bool CommandBufferCompleted(MTL::CommandBuffer* commandBuffer) +{ + auto status = commandBuffer->status(); + return (status == MTL::CommandBufferStatusCompleted || status == MTL::CommandBufferStatusError); +} diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalQuery.cpp b/src/Cafe/HW/Latte/Renderer/Metal/MetalQuery.cpp index c27a5620..0119209b 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalQuery.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalQuery.cpp @@ -4,14 +4,21 @@ bool LatteQueryObjectMtl::getResult(uint64& numSamplesPassed) { - if (!m_mtlr->CommandBufferCompleted(m_commandBuffer)) + if (!CommandBufferCompleted(m_commandBuffer)) return false; numSamplesPassed = m_mtlr->GetOcclusionQueryResultsPtr()[m_queryIndex]; + printf("Num samples: %llu\n", numSamplesPassed); return true; } +LatteQueryObjectMtl::~LatteQueryObjectMtl() +{ + if (m_queryIndex != INVALID_UINT32) + m_mtlr->ReleaseOcclusionQueryIndex(m_queryIndex); +} + void LatteQueryObjectMtl::begin() { m_queryIndex = m_mtlr->GetAvailableOcclusionQueryIndex(); @@ -21,5 +28,7 @@ void LatteQueryObjectMtl::begin() void LatteQueryObjectMtl::end() { m_mtlr->SetActiveOcclusionQueryIndex(INVALID_UINT32); - // TODO: request soon submit of the command buffer + m_commandBuffer = m_mtlr->GetCurrentCommandBuffer(); + // TODO: request soon submit instead? + m_mtlr->CommitCommandBuffer(); } diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalQuery.h b/src/Cafe/HW/Latte/Renderer/Metal/MetalQuery.h index 8fa53497..58b4e266 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalQuery.h +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalQuery.h @@ -8,6 +8,7 @@ class LatteQueryObjectMtl : public LatteQueryObject { public: LatteQueryObjectMtl(class MetalRenderer* mtlRenderer) : m_mtlr{mtlRenderer} {} + ~LatteQueryObjectMtl(); bool getResult(uint64& numSamplesPassed) override; void begin() override; @@ -16,7 +17,7 @@ public: private: class MetalRenderer* m_mtlr; - uint32 m_queryIndex; + uint32 m_queryIndex = INVALID_UINT32; + // TODO: make this a list of command buffers MTL::CommandBuffer* m_commandBuffer; - uint64 m_acccumulatedSum; }; diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp index 045ca77e..e4b25d55 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp @@ -334,7 +334,7 @@ void MetalRenderer::Flush(bool waitIdle) { cemu_assert_debug(commandBuffer.m_commited); - WaitForCommandBufferCompletion(commandBuffer.m_commandBuffer); + commandBuffer.m_commandBuffer->waitUntilCompleted(); } } } @@ -1059,7 +1059,7 @@ void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32 if (m_occlusionQuery.m_activeIndex != encoderState.m_visibilityResultOffset) { auto mode = (m_occlusionQuery.m_activeIndex == INVALID_UINT32 ? MTL::VisibilityResultModeDisabled : MTL::VisibilityResultModeCounting); - renderCommandEncoder->setVisibilityResultMode(mode, m_occlusionQuery.m_activeIndex); + renderCommandEncoder->setVisibilityResultMode(mode, m_occlusionQuery.m_activeIndex * sizeof(uint64)); encoderState.m_visibilityResultOffset = m_occlusionQuery.m_activeIndex; } @@ -1309,16 +1309,16 @@ LatteQueryObject* MetalRenderer::occlusionQuery_create() { } void MetalRenderer::occlusionQuery_destroy(LatteQueryObject* queryObj) { - // TODO: do something? + auto queryObjMtl = static_cast(queryObj); + delete queryObjMtl; } void MetalRenderer::occlusionQuery_flush() { // TODO: implement - debug_printf("Occlusion query flush is not implemented\n"); } void MetalRenderer::occlusionQuery_updateState() { - // TODO + // TODO: implement } void MetalRenderer::SetBuffer(MTL::RenderCommandEncoder* renderCommandEncoder, MetalShaderType shaderType, MTL::Buffer* buffer, size_t offset, uint32 index) @@ -1420,17 +1420,6 @@ MTL::CommandBuffer* MetalRenderer::GetCommandBuffer() } } -bool MetalRenderer::CommandBufferCompleted(MTL::CommandBuffer* commandBuffer) -{ - auto status = commandBuffer->status(); - return (status == MTL::CommandBufferStatusCompleted || status == MTL::CommandBufferStatusError); -} - -void MetalRenderer::WaitForCommandBufferCompletion(MTL::CommandBuffer* commandBuffer) -{ - commandBuffer->waitUntilCompleted(); -} - MTL::RenderCommandEncoder* MetalRenderer::GetTemporaryRenderCommandEncoder(MTL::RenderPassDescriptor* renderPassDescriptor) { EndEncoding(); @@ -1594,8 +1583,6 @@ void MetalRenderer::CommitCommandBuffer() m_memoryManager->GetTemporaryBufferAllocator().SetActiveCommandBuffer(nullptr); - m_occlusionQuery.m_availableIndices.insert(m_occlusionQuery.m_availableIndices.end(), m_occlusionQuery.m_crntCmdBuffIndices.begin(), m_occlusionQuery.m_crntCmdBuffIndices.end()); - // Debug //m_commandQueue->insertDebugCaptureBoundary(); } diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h index 4f7376b6..f46a4be3 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h @@ -323,8 +323,6 @@ public: void SetSamplerState(MTL::RenderCommandEncoder* renderCommandEncoder, MetalShaderType shaderType, MTL::SamplerState* samplerState, uint32 index); MTL::CommandBuffer* GetCommandBuffer(); - bool CommandBufferCompleted(MTL::CommandBuffer* commandBuffer); - void WaitForCommandBufferCompletion(MTL::CommandBuffer* commandBuffer); MTL::RenderCommandEncoder* GetTemporaryRenderCommandEncoder(MTL::RenderPassDescriptor* renderPassDescriptor); MTL::RenderCommandEncoder* GetRenderCommandEncoder(bool forceRecreate = false); MTL::ComputeCommandEncoder* GetComputeCommandEncoder(); @@ -397,11 +395,15 @@ public: uint32 queryIndex = m_occlusionQuery.m_availableIndices.back(); m_occlusionQuery.m_availableIndices.pop_back(); - m_occlusionQuery.m_crntCmdBuffIndices.push_back(queryIndex); return queryIndex; } + void ReleaseOcclusionQueryIndex(uint32 queryIndex) + { + m_occlusionQuery.m_availableIndices.push_back(queryIndex); + } + void SetActiveOcclusionQueryIndex(uint32 queryIndex) { m_occlusionQuery.m_activeIndex = queryIndex; @@ -460,7 +462,6 @@ private: MTL::Buffer* m_resultBuffer; uint64* m_resultsPtr; std::vector m_availableIndices; - std::vector m_crntCmdBuffIndices; uint32 m_activeIndex = INVALID_UINT32; } m_occlusionQuery; From 934b1f8b55d85450fa6e156157f786b845486e91 Mon Sep 17 00:00:00 2001 From: Samuliak Date: Fri, 13 Sep 2024 15:28:51 +0200 Subject: [PATCH 4/5] handle occlusion queries with no draws --- src/Cafe/HW/Latte/Renderer/Metal/MetalQuery.cpp | 17 ++++++++++++----- src/Cafe/HW/Latte/Renderer/Metal/MetalQuery.h | 2 +- .../HW/Latte/Renderer/Metal/MetalRenderer.h | 5 +++++ 3 files changed, 18 insertions(+), 6 deletions(-) diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalQuery.cpp b/src/Cafe/HW/Latte/Renderer/Metal/MetalQuery.cpp index 0119209b..895bab22 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalQuery.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalQuery.cpp @@ -4,12 +4,16 @@ bool LatteQueryObjectMtl::getResult(uint64& numSamplesPassed) { + if (!m_commandBuffer) + { + numSamplesPassed = 0; + return true; + } + if (!CommandBufferCompleted(m_commandBuffer)) return false; numSamplesPassed = m_mtlr->GetOcclusionQueryResultsPtr()[m_queryIndex]; - printf("Num samples: %llu\n", numSamplesPassed); - return true; } @@ -28,7 +32,10 @@ void LatteQueryObjectMtl::begin() void LatteQueryObjectMtl::end() { m_mtlr->SetActiveOcclusionQueryIndex(INVALID_UINT32); - m_commandBuffer = m_mtlr->GetCurrentCommandBuffer(); - // TODO: request soon submit instead? - m_mtlr->CommitCommandBuffer(); + if (m_mtlr->IsCommandBufferActive()) + { + m_commandBuffer = m_mtlr->GetCurrentCommandBuffer(); + // TODO: request soon submit instead? + m_mtlr->CommitCommandBuffer(); + } } diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalQuery.h b/src/Cafe/HW/Latte/Renderer/Metal/MetalQuery.h index 58b4e266..554cdace 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalQuery.h +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalQuery.h @@ -19,5 +19,5 @@ private: uint32 m_queryIndex = INVALID_UINT32; // TODO: make this a list of command buffers - MTL::CommandBuffer* m_commandBuffer; + MTL::CommandBuffer* m_commandBuffer = nullptr; }; diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h index f46a4be3..0c7c78f5 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h @@ -279,6 +279,11 @@ public: // Helpers MetalPerformanceMonitor& GetPerformanceMonitor() { return m_performanceMonitor; } + bool IsCommandBufferActive() const + { + return (m_commandBuffers.size() != 0); + } + MTL::CommandBuffer* GetCurrentCommandBuffer() { cemu_assert_debug(m_commandBuffers.size() != 0); From e89efed7434fe4029861a589d57f3521cd61100c Mon Sep 17 00:00:00 2001 From: Samuliak Date: Fri, 13 Sep 2024 16:12:47 +0200 Subject: [PATCH 5/5] request soon commit instead of committing directly --- .../Latte/Renderer/Metal/LatteTextureReadbackMtl.cpp | 1 + src/Cafe/HW/Latte/Renderer/Metal/MetalQuery.cpp | 4 ++-- src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp | 12 +++++++----- src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h | 8 +++++++- 4 files changed, 17 insertions(+), 8 deletions(-) diff --git a/src/Cafe/HW/Latte/Renderer/Metal/LatteTextureReadbackMtl.cpp b/src/Cafe/HW/Latte/Renderer/Metal/LatteTextureReadbackMtl.cpp index d7adc25c..dd2d8aeb 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/LatteTextureReadbackMtl.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/LatteTextureReadbackMtl.cpp @@ -25,6 +25,7 @@ void LatteTextureReadbackInfoMtl::StartTransfer() blitCommandEncoder->copyFromTexture(baseTexture->GetTexture(), 0, 0, MTL::Origin{0, 0, 0}, MTL::Size{(uint32)baseTexture->width, (uint32)baseTexture->height, 1}, m_mtlr->GetTextureReadbackBuffer(), m_bufferOffset, bytesPerRow, bytesPerImage); m_commandBuffer = m_mtlr->GetCurrentCommandBuffer(); + m_mtlr->RequestSoonCommit(); } bool LatteTextureReadbackInfoMtl::IsFinished() diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalQuery.cpp b/src/Cafe/HW/Latte/Renderer/Metal/MetalQuery.cpp index 895bab22..ab24b4db 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalQuery.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalQuery.cpp @@ -14,6 +14,7 @@ bool LatteQueryObjectMtl::getResult(uint64& numSamplesPassed) return false; numSamplesPassed = m_mtlr->GetOcclusionQueryResultsPtr()[m_queryIndex]; + return true; } @@ -35,7 +36,6 @@ void LatteQueryObjectMtl::end() if (m_mtlr->IsCommandBufferActive()) { m_commandBuffer = m_mtlr->GetCurrentCommandBuffer(); - // TODO: request soon submit instead? - m_mtlr->CommitCommandBuffer(); + m_mtlr->RequestSoonCommit(); } } diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp index e4b25d55..a063a0b7 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp @@ -27,7 +27,7 @@ #include "imgui/imgui_extension.h" #include "imgui/imgui_impl_metal.h" -#define COMMIT_TRESHOLD 256 +#define DEFAULT_COMMIT_TRESHOLD 256 #define OCCLUSION_QUERY_POOL_SIZE 1024 extern bool hasValidFramebufferAttached; @@ -1270,7 +1270,8 @@ void MetalRenderer::draw_endSequence() bool hasReadback = LatteTextureReadback_Update(); m_recordedDrawcalls++; // The number of draw calls needs to twice as big, since we are interrupting the render pass - if (m_recordedDrawcalls >= COMMIT_TRESHOLD * 2 || hasReadback) + // TODO: ucomment? + if (m_recordedDrawcalls >= m_commitTreshold * 2/* || hasReadback*/) { CommitCommandBuffer(); @@ -1409,6 +1410,9 @@ MTL::CommandBuffer* MetalRenderer::GetCommandBuffer() MTL::CommandBuffer* mtlCommandBuffer = m_commandQueue->commandBuffer(); m_commandBuffers.push_back({mtlCommandBuffer}); + m_recordedDrawcalls = 0; + m_commitTreshold = DEFAULT_COMMIT_TRESHOLD; + // Notify memory manager about the new command buffer m_memoryManager->GetTemporaryBufferAllocator().SetActiveCommandBuffer(mtlCommandBuffer); @@ -1557,15 +1561,13 @@ void MetalRenderer::EndEncoding() m_encoderType = MetalEncoderType::None; // Commit the command buffer if enough draw calls have been recorded - if (m_recordedDrawcalls >= COMMIT_TRESHOLD) + if (m_recordedDrawcalls >= m_commitTreshold) CommitCommandBuffer(); } } void MetalRenderer::CommitCommandBuffer() { - m_recordedDrawcalls = 0; - if (m_commandBuffers.size() != 0) { EndEncoding(); diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h index 0c7c78f5..3217d09d 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h @@ -291,6 +291,11 @@ public: return m_commandBuffers[m_commandBuffers.size() - 1].m_commandBuffer; } + void RequestSoonCommit() + { + m_commitTreshold = m_recordedDrawcalls + 8; + } + MTL::CommandEncoder* GetCommandEncoder() { return m_commandEncoder; @@ -475,7 +480,8 @@ private: MetalEncoderType m_encoderType = MetalEncoderType::None; MTL::CommandEncoder* m_commandEncoder = nullptr; - uint32 m_recordedDrawcalls = 0; + uint32 m_recordedDrawcalls; + uint32 m_commitTreshold; // State MetalState m_state;