diff --git a/src/Cafe/CMakeLists.txt b/src/Cafe/CMakeLists.txt index 634014b7..10c85270 100644 --- a/src/Cafe/CMakeLists.txt +++ b/src/Cafe/CMakeLists.txt @@ -567,6 +567,8 @@ if(ENABLE_METAL) HW/Latte/Renderer/Metal/MetalSamplerCache.h HW/Latte/Renderer/Metal/MetalHybridComputePipeline.cpp HW/Latte/Renderer/Metal/MetalHybridComputePipeline.h + HW/Latte/Renderer/Metal/MetalQuery.cpp + HW/Latte/Renderer/Metal/MetalQuery.h HW/Latte/Renderer/Metal/MetalPerformanceMonitor.h HW/Latte/Renderer/Metal/UtilityShaderSource.h ) diff --git a/src/Cafe/HW/Latte/Renderer/Metal/CachedFBOMtl.cpp b/src/Cafe/HW/Latte/Renderer/Metal/CachedFBOMtl.cpp index a9e673f6..85adbfb9 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/CachedFBOMtl.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/CachedFBOMtl.cpp @@ -1,9 +1,9 @@ #include "Cafe/HW/Latte/Renderer/Metal/CachedFBOMtl.h" #include "Cafe/HW/Latte/Renderer/Metal/LatteTextureViewMtl.h" -#include "HW/Latte/Renderer/Metal/LatteToMtl.h" -#include "Metal/MTLRenderPass.hpp" +#include "Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h" +#include "Cafe/HW/Latte/Renderer/Metal/LatteToMtl.h" -void CachedFBOMtl::CreateRenderPass() +CachedFBOMtl::CachedFBOMtl(class MetalRenderer* metalRenderer, uint64 key) : LatteCachedFBO(key) { m_renderPassDescriptor = MTL::RenderPassDescriptor::alloc()->init(); @@ -39,6 +39,9 @@ void CachedFBOMtl::CreateRenderPass() stencilAttachment->setStoreAction(MTL::StoreActionStore); } } + + // Visibility buffer + m_renderPassDescriptor->setVisibilityResultBuffer(metalRenderer->GetOcclusionQueryResultBuffer()); } CachedFBOMtl::~CachedFBOMtl() diff --git a/src/Cafe/HW/Latte/Renderer/Metal/CachedFBOMtl.h b/src/Cafe/HW/Latte/Renderer/Metal/CachedFBOMtl.h index 0d926e7e..f1221eb2 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/CachedFBOMtl.h +++ b/src/Cafe/HW/Latte/Renderer/Metal/CachedFBOMtl.h @@ -8,10 +8,7 @@ class CachedFBOMtl : public LatteCachedFBO { public: - CachedFBOMtl(uint64 key) : LatteCachedFBO(key) - { - CreateRenderPass(); - } + CachedFBOMtl(class MetalRenderer* metalRenderer, uint64 key); ~CachedFBOMtl(); @@ -22,6 +19,4 @@ public: private: MTL::RenderPassDescriptor* m_renderPassDescriptor = nullptr; - - void CreateRenderPass(); }; diff --git a/src/Cafe/HW/Latte/Renderer/Metal/LatteTextureReadbackMtl.cpp b/src/Cafe/HW/Latte/Renderer/Metal/LatteTextureReadbackMtl.cpp index f2c03709..dd2d8aeb 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/LatteTextureReadbackMtl.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/LatteTextureReadbackMtl.cpp @@ -25,6 +25,7 @@ void LatteTextureReadbackInfoMtl::StartTransfer() blitCommandEncoder->copyFromTexture(baseTexture->GetTexture(), 0, 0, MTL::Origin{0, 0, 0}, MTL::Size{(uint32)baseTexture->width, (uint32)baseTexture->height, 1}, m_mtlr->GetTextureReadbackBuffer(), m_bufferOffset, bytesPerRow, bytesPerImage); m_commandBuffer = m_mtlr->GetCurrentCommandBuffer(); + m_mtlr->RequestSoonCommit(); } bool LatteTextureReadbackInfoMtl::IsFinished() @@ -38,12 +39,12 @@ bool LatteTextureReadbackInfoMtl::IsFinished() if (m_mtlr->GetCurrentCommandBuffer() == m_commandBuffer) m_mtlr->CommitCommandBuffer(); - return m_mtlr->CommandBufferCompleted(m_commandBuffer); + return CommandBufferCompleted(m_commandBuffer); } void LatteTextureReadbackInfoMtl::ForceFinish() { - m_mtlr->WaitForCommandBufferCompletion(m_commandBuffer); + m_commandBuffer->waitUntilCompleted(); } uint8* LatteTextureReadbackInfoMtl::GetData() diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalBufferAllocator.h b/src/Cafe/HW/Latte/Renderer/Metal/MetalBufferAllocator.h index b8a3c760..198d9978 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalBufferAllocator.h +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalBufferAllocator.h @@ -255,7 +255,7 @@ public: auto& buffer = m_buffers[i]; for (uint32_t j = 0; j < buffer.m_data.m_commandBuffers.size(); j++) { - if (m_mtlr->CommandBufferCompleted(buffer.m_data.m_commandBuffers[j])) + if (CommandBufferCompleted(buffer.m_data.m_commandBuffers[j])) { if (buffer.m_data.m_commandBuffers.size() == 1) { diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalCommon.h b/src/Cafe/HW/Latte/Renderer/Metal/MetalCommon.h index 8a6daa92..a1fe7f82 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalCommon.h +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalCommon.h @@ -69,3 +69,9 @@ inline bool IsValidDepthTextureType(Latte::E_DIM dim) { return (dim == Latte::E_DIM::DIM_2D || dim == Latte::E_DIM::DIM_2D_MSAA || dim == Latte::E_DIM::DIM_2D_ARRAY || dim == Latte::E_DIM::DIM_2D_ARRAY_MSAA || dim == Latte::E_DIM::DIM_CUBEMAP); } + +inline bool CommandBufferCompleted(MTL::CommandBuffer* commandBuffer) +{ + auto status = commandBuffer->status(); + return (status == MTL::CommandBufferStatusCompleted || status == MTL::CommandBufferStatusError); +} diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalQuery.cpp b/src/Cafe/HW/Latte/Renderer/Metal/MetalQuery.cpp new file mode 100644 index 00000000..ab24b4db --- /dev/null +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalQuery.cpp @@ -0,0 +1,41 @@ +#include "Cafe/HW/Latte/Renderer/Metal/MetalQuery.h" +#include "Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h" +#include "HW/Latte/Renderer/Metal/MetalCommon.h" + +bool LatteQueryObjectMtl::getResult(uint64& numSamplesPassed) +{ + if (!m_commandBuffer) + { + numSamplesPassed = 0; + return true; + } + + if (!CommandBufferCompleted(m_commandBuffer)) + return false; + + numSamplesPassed = m_mtlr->GetOcclusionQueryResultsPtr()[m_queryIndex]; + + return true; +} + +LatteQueryObjectMtl::~LatteQueryObjectMtl() +{ + if (m_queryIndex != INVALID_UINT32) + m_mtlr->ReleaseOcclusionQueryIndex(m_queryIndex); +} + +void LatteQueryObjectMtl::begin() +{ + m_queryIndex = m_mtlr->GetAvailableOcclusionQueryIndex(); + m_mtlr->SetActiveOcclusionQueryIndex(m_queryIndex); +} + +void LatteQueryObjectMtl::end() +{ + m_mtlr->SetActiveOcclusionQueryIndex(INVALID_UINT32); + if (m_mtlr->IsCommandBufferActive()) + { + m_commandBuffer = m_mtlr->GetCurrentCommandBuffer(); + m_mtlr->RequestSoonCommit(); + } +} diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalQuery.h b/src/Cafe/HW/Latte/Renderer/Metal/MetalQuery.h new file mode 100644 index 00000000..554cdace --- /dev/null +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalQuery.h @@ -0,0 +1,23 @@ +#pragma once + +#include "Cafe/HW/Latte/Core/LatteQueryObject.h" + +#include "Cafe/HW/Latte/Renderer/Metal/MetalCommon.h" + +class LatteQueryObjectMtl : public LatteQueryObject +{ +public: + LatteQueryObjectMtl(class MetalRenderer* mtlRenderer) : m_mtlr{mtlRenderer} {} + ~LatteQueryObjectMtl(); + + bool getResult(uint64& numSamplesPassed) override; + void begin() override; + void end() override; + +private: + class MetalRenderer* m_mtlr; + + uint32 m_queryIndex = INVALID_UINT32; + // TODO: make this a list of command buffers + MTL::CommandBuffer* m_commandBuffer = nullptr; +}; diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp index 4ff1a3b0..a063a0b7 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp @@ -9,6 +9,7 @@ #include "Cafe/HW/Latte/Renderer/Metal/MetalSamplerCache.h" #include "Cafe/HW/Latte/Renderer/Metal/LatteTextureReadbackMtl.h" #include "Cafe/HW/Latte/Renderer/Metal/MetalHybridComputePipeline.h" +#include "Cafe/HW/Latte/Renderer/Metal/MetalQuery.h" #include "Cafe/HW/Latte/Renderer/Metal/LatteToMtl.h" #include "Cafe/HW/Latte/Renderer/Metal/UtilityShaderSource.h" @@ -20,12 +21,14 @@ #include "HW/Latte/Renderer/Metal/MetalCommon.h" #include "HW/Latte/Renderer/Metal/MetalLayerHandle.h" #include "HW/Latte/Renderer/Renderer.h" +#include "Metal/MTLRenderCommandEncoder.hpp" #define IMGUI_IMPL_METAL_CPP #include "imgui/imgui_extension.h" #include "imgui/imgui_impl_metal.h" -#define COMMIT_TRESHOLD 256 +#define DEFAULT_COMMIT_TRESHOLD 256 +#define OCCLUSION_QUERY_POOL_SIZE 1024 extern bool hasValidFramebufferAttached; @@ -93,6 +96,17 @@ MetalRenderer::MetalRenderer() m_xfbRingBuffer->setLabel(GetLabel("Transform feedback buffer", m_xfbRingBuffer)); #endif + // Occlusion queries + m_occlusionQuery.m_resultBuffer = m_device->newBuffer(OCCLUSION_QUERY_POOL_SIZE * sizeof(uint64), MTL::ResourceStorageModeShared); +#ifdef CEMU_DEBUG_ASSERT + m_occlusionQuery.m_resultBuffer->setLabel(GetLabel("Occlusion query result buffer", m_occlusionQuery.m_resultBuffer)); +#endif + m_occlusionQuery.m_resultsPtr = (uint64*)m_occlusionQuery.m_resultBuffer->contents(); + + m_occlusionQuery.m_availableIndices.reserve(OCCLUSION_QUERY_POOL_SIZE); + for (uint32 i = 0; i < OCCLUSION_QUERY_POOL_SIZE; i++) + m_occlusionQuery.m_availableIndices.push_back(i); + // Initialize state for (uint32 i = 0; i < METAL_SHADER_TYPE_TOTAL; i++) { @@ -320,7 +334,7 @@ void MetalRenderer::Flush(bool waitIdle) { cemu_assert_debug(commandBuffer.m_commited); - WaitForCommandBufferCompletion(commandBuffer.m_commandBuffer); + commandBuffer.m_commandBuffer->waitUntilCompleted(); } } } @@ -466,7 +480,7 @@ void MetalRenderer::renderTarget_setScissor(sint32 scissorX, sint32 scissorY, si LatteCachedFBO* MetalRenderer::rendertarget_createCachedFBO(uint64 key) { - return new CachedFBOMtl(key); + return new CachedFBOMtl(this, key); } void MetalRenderer::rendertarget_deleteCachedFBO(LatteCachedFBO* cfbo) @@ -1041,6 +1055,14 @@ void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32 encoderState.m_depthClipEnable = zClipEnable; } + // Visibility result mode + if (m_occlusionQuery.m_activeIndex != encoderState.m_visibilityResultOffset) + { + auto mode = (m_occlusionQuery.m_activeIndex == INVALID_UINT32 ? MTL::VisibilityResultModeDisabled : MTL::VisibilityResultModeCounting); + renderCommandEncoder->setVisibilityResultMode(mode, m_occlusionQuery.m_activeIndex * sizeof(uint64)); + encoderState.m_visibilityResultOffset = m_occlusionQuery.m_activeIndex; + } + // todo - how does culling behave with rects? // right now we just assume that their winding is always CW if (isPrimitiveRect) @@ -1248,7 +1270,8 @@ void MetalRenderer::draw_endSequence() bool hasReadback = LatteTextureReadback_Update(); m_recordedDrawcalls++; // The number of draw calls needs to twice as big, since we are interrupting the render pass - if (m_recordedDrawcalls >= COMMIT_TRESHOLD * 2 || hasReadback) + // TODO: ucomment? + if (m_recordedDrawcalls >= m_commitTreshold * 2/* || hasReadback*/) { CommitCommandBuffer(); @@ -1282,6 +1305,23 @@ void MetalRenderer::indexData_uploadIndexMemory(uint32 bufferIndex, uint32 offse */ } +LatteQueryObject* MetalRenderer::occlusionQuery_create() { + return new LatteQueryObjectMtl(this); +} + +void MetalRenderer::occlusionQuery_destroy(LatteQueryObject* queryObj) { + auto queryObjMtl = static_cast(queryObj); + delete queryObjMtl; +} + +void MetalRenderer::occlusionQuery_flush() { + // TODO: implement +} + +void MetalRenderer::occlusionQuery_updateState() { + // TODO: implement +} + void MetalRenderer::SetBuffer(MTL::RenderCommandEncoder* renderCommandEncoder, MetalShaderType shaderType, MTL::Buffer* buffer, size_t offset, uint32 index) { auto& boundBuffer = m_state.m_encoderState.m_buffers[shaderType][index]; @@ -1370,6 +1410,9 @@ MTL::CommandBuffer* MetalRenderer::GetCommandBuffer() MTL::CommandBuffer* mtlCommandBuffer = m_commandQueue->commandBuffer(); m_commandBuffers.push_back({mtlCommandBuffer}); + m_recordedDrawcalls = 0; + m_commitTreshold = DEFAULT_COMMIT_TRESHOLD; + // Notify memory manager about the new command buffer m_memoryManager->GetTemporaryBufferAllocator().SetActiveCommandBuffer(mtlCommandBuffer); @@ -1381,17 +1424,6 @@ MTL::CommandBuffer* MetalRenderer::GetCommandBuffer() } } -bool MetalRenderer::CommandBufferCompleted(MTL::CommandBuffer* commandBuffer) -{ - auto status = commandBuffer->status(); - return (status == MTL::CommandBufferStatusCompleted || status == MTL::CommandBufferStatusError); -} - -void MetalRenderer::WaitForCommandBufferCompletion(MTL::CommandBuffer* commandBuffer) -{ - commandBuffer->waitUntilCompleted(); -} - MTL::RenderCommandEncoder* MetalRenderer::GetTemporaryRenderCommandEncoder(MTL::RenderPassDescriptor* renderPassDescriptor) { EndEncoding(); @@ -1529,15 +1561,13 @@ void MetalRenderer::EndEncoding() m_encoderType = MetalEncoderType::None; // Commit the command buffer if enough draw calls have been recorded - if (m_recordedDrawcalls >= COMMIT_TRESHOLD) + if (m_recordedDrawcalls >= m_commitTreshold) CommitCommandBuffer(); } } void MetalRenderer::CommitCommandBuffer() { - m_recordedDrawcalls = 0; - if (m_commandBuffers.size() != 0) { EndEncoding(); diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h index 25051a97..3217d09d 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h @@ -99,6 +99,7 @@ struct MetalEncoderState uint32 m_depthSlope = 0; uint32 m_depthClamp = 0; bool m_depthClipEnable = true; + uint32 m_visibilityResultOffset = INVALID_UINT32; struct { MTL::Buffer* m_buffer; size_t m_offset; @@ -155,32 +156,6 @@ enum class MetalEncoderType Blit, }; -// HACK: Dummy occlusion query object for Metal -class LatteQueryObjectMtl : public LatteQueryObject -{ -public: - LatteQueryObjectMtl(class MetalRenderer* mtlRenderer) : m_mtlr{mtlRenderer} {} - - bool getResult(uint64& numSamplesPassed) override - { - cemuLog_log(LogType::MetalLogging, "LatteQueryObjectMtl::getResult: occlusion queries are not yet supported on Metal"); - return true; - } - - void begin() override - { - cemuLog_log(LogType::MetalLogging, "LatteQueryObjectMtl::begin: occlusion queries are not yet supported on Metal"); - } - - void end() override - { - cemuLog_log(LogType::MetalLogging, "LatteQueryObjectMtl::end: occlusion queries are not yet supported on Metal"); - } - -private: - class MetalRenderer* m_mtlr; -}; - class MetalRenderer : public Renderer { public: @@ -296,27 +271,19 @@ public: void indexData_uploadIndexMemory(uint32 bufferIndex, uint32 offset, uint32 size) override; // occlusion queries - LatteQueryObject* occlusionQuery_create() override { - cemuLog_log(LogType::MetalLogging, "MetalRenderer::occlusionQuery_create: Occlusion queries are not yet supported on Metal"); - - return new LatteQueryObjectMtl(this); - } - - void occlusionQuery_destroy(LatteQueryObject* queryObj) override { - cemuLog_log(LogType::MetalLogging, "MetalRenderer::occlusionQuery_destroy: occlusion queries are not yet supported on Metal"); - } - - void occlusionQuery_flush() override { - cemuLog_log(LogType::MetalLogging, "MetalRenderer::occlusionQuery_flush: occlusion queries are not yet supported on Metal"); - } - - void occlusionQuery_updateState() override { - cemuLog_log(LogType::MetalLogging, "MetalRenderer::occlusionQuery_updateState: occlusion queries are not yet supported on Metal"); - } + LatteQueryObject* occlusionQuery_create() override; + void occlusionQuery_destroy(LatteQueryObject* queryObj) override; + void occlusionQuery_flush() override; + void occlusionQuery_updateState() override; // Helpers MetalPerformanceMonitor& GetPerformanceMonitor() { return m_performanceMonitor; } + bool IsCommandBufferActive() const + { + return (m_commandBuffers.size() != 0); + } + MTL::CommandBuffer* GetCurrentCommandBuffer() { cemu_assert_debug(m_commandBuffers.size() != 0); @@ -324,6 +291,11 @@ public: return m_commandBuffers[m_commandBuffers.size() - 1].m_commandBuffer; } + void RequestSoonCommit() + { + m_commitTreshold = m_recordedDrawcalls + 8; + } + MTL::CommandEncoder* GetCommandEncoder() { return m_commandEncoder; @@ -361,8 +333,6 @@ public: void SetSamplerState(MTL::RenderCommandEncoder* renderCommandEncoder, MetalShaderType shaderType, MTL::SamplerState* samplerState, uint32 index); MTL::CommandBuffer* GetCommandBuffer(); - bool CommandBufferCompleted(MTL::CommandBuffer* commandBuffer); - void WaitForCommandBufferCompletion(MTL::CommandBuffer* commandBuffer); MTL::RenderCommandEncoder* GetTemporaryRenderCommandEncoder(MTL::RenderPassDescriptor* renderPassDescriptor); MTL::RenderCommandEncoder* GetRenderCommandEncoder(bool forceRecreate = false); MTL::ComputeCommandEncoder* GetComputeCommandEncoder(); @@ -415,6 +385,40 @@ public: return m_readbackBuffer; } + MTL::Buffer* GetOcclusionQueryResultBuffer() const + { + return m_occlusionQuery.m_resultBuffer; + } + + uint64* GetOcclusionQueryResultsPtr() + { + return m_occlusionQuery.m_resultsPtr; + } + + uint32 GetAvailableOcclusionQueryIndex() + { + if (m_occlusionQuery.m_availableIndices.empty()) + { + cemuLog_log(LogType::Force, "No occlusion query index available"); + return 0; + } + + uint32 queryIndex = m_occlusionQuery.m_availableIndices.back(); + m_occlusionQuery.m_availableIndices.pop_back(); + + return queryIndex; + } + + void ReleaseOcclusionQueryIndex(uint32 queryIndex) + { + m_occlusionQuery.m_availableIndices.push_back(queryIndex); + } + + void SetActiveOcclusionQueryIndex(uint32 queryIndex) + { + m_occlusionQuery.m_activeIndex = queryIndex; + } + private: MetalLayerHandle m_mainLayer; MetalLayerHandle m_padLayer; @@ -462,12 +466,22 @@ private: // Transform feedback MTL::Buffer* m_xfbRingBuffer; + // Occlusion queries + struct + { + MTL::Buffer* m_resultBuffer; + uint64* m_resultsPtr; + std::vector m_availableIndices; + uint32 m_activeIndex = INVALID_UINT32; + } m_occlusionQuery; + // Active objects std::vector m_commandBuffers; MetalEncoderType m_encoderType = MetalEncoderType::None; MTL::CommandEncoder* m_commandEncoder = nullptr; - uint32 m_recordedDrawcalls = 0; + uint32 m_recordedDrawcalls; + uint32 m_commitTreshold; // State MetalState m_state;