From a693bf564f21af42fdcca1b5f867dafa4349a30c Mon Sep 17 00:00:00 2001 From: Samo Z Date: Sun, 25 Aug 2024 20:32:46 +0200 Subject: [PATCH 1/6] fix: issues on Intel Macs --- .../Renderer/Metal/MetalBufferAllocator.h | 2 +- .../HW/Latte/Renderer/Metal/MetalRenderer.cpp | 19 ++++++++++++------- 2 files changed, 13 insertions(+), 8 deletions(-) diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalBufferAllocator.h b/src/Cafe/HW/Latte/Renderer/Metal/MetalBufferAllocator.h index 3ec0acbd..d3b0cc5e 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalBufferAllocator.h +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalBufferAllocator.h @@ -39,7 +39,7 @@ public: MetalBufferAllocation GetBufferAllocation(size_t size) { // Align the size - size = Align(size, 16); + size = Align(size, 128); // First, try to find a free range for (uint32 i = 0; i < m_freeBufferRanges.size(); i++) diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp index eb85f1ea..b01e4148 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp @@ -126,7 +126,7 @@ MetalRenderer::MetalRenderer() presentFragmentFunction->release(); error = nullptr; - renderPipelineDescriptor->colorAttachments()->object(0)->setPixelFormat(MTL::PixelFormatRGBA8Unorm); + renderPipelineDescriptor->colorAttachments()->object(0)->setPixelFormat(MTL::PixelFormatBGRA8Unorm); #ifdef CEMU_DEBUG_ASSERT renderPipelineDescriptor->setLabel(GetLabel("Present pipeline linear", renderPipelineDescriptor)); #endif @@ -138,7 +138,7 @@ MetalRenderer::MetalRenderer() } error = nullptr; - renderPipelineDescriptor->colorAttachments()->object(0)->setPixelFormat(MTL::PixelFormatRGBA8Unorm_sRGB); + renderPipelineDescriptor->colorAttachments()->object(0)->setPixelFormat(MTL::PixelFormatBGRA8Unorm_sRGB); #ifdef CEMU_DEBUG_ASSERT renderPipelineDescriptor->setLabel(GetLabel("Present pipeline sRGB", renderPipelineDescriptor)); #endif @@ -185,7 +185,9 @@ MetalRenderer::~MetalRenderer() void MetalRenderer::InitializeLayer(const Vector2i& size, bool mainWindow) { - GetLayer(mainWindow) = MetalLayerHandle(m_device, size); + auto& layer = GetLayer(mainWindow); + layer = MetalLayerHandle(m_device, size); + layer.GetLayer()->setPixelFormat(MTL::PixelFormatBGRA8Unorm); } void MetalRenderer::ResizeLayer(const Vector2i& size, bool mainWindow) @@ -381,7 +383,7 @@ ImTextureID MetalRenderer::GenerateTexture(const std::vector& data, const desc->setPixelFormat(MTL::PixelFormatRGBA8Unorm); desc->setWidth(size.x); desc->setHeight(size.y); - desc->setStorageMode(MTL::StorageModeShared); + desc->setStorageMode(m_isAppleGPU ? MTL::StorageModeShared : MTL::StorageModeManaged); desc->setUsage(MTL::TextureUsageShaderRead); MTL::Texture* texture = m_device->newTexture(desc); @@ -507,11 +509,14 @@ void MetalRenderer::texture_loadSlice(LatteTexture* hostTexture, sint32 width, s auto blitCommandEncoder = GetBlitCommandEncoder(); // Allocate a temporary buffer - auto allocation = m_memoryManager->GetTemporaryBufferAllocator().GetBufferAllocation(compressedImageSize); - auto buffer = m_memoryManager->GetTemporaryBufferAllocator().GetBuffer(allocation.bufferIndex); + // HACK: use the persistent buffer allocator so as to avoid any issues + auto& bufferAllocator = m_memoryManager->GetBufferAllocator(); + auto allocation = bufferAllocator.GetBufferAllocation(compressedImageSize); + auto buffer = bufferAllocator.GetBuffer(allocation.bufferIndex); // Copy the data to the temporary buffer memcpy(allocation.data, pixelData, compressedImageSize); + buffer->didModifyRange(NS::Range(allocation.offset, allocation.size)); // Copy the data from the temporary buffer to the texture blitCommandEncoder->copyFromBuffer(buffer, allocation.offset, bytesPerRow, 0, MTL::Size(width, height, 1), textureMtl->GetTexture(), sliceIndex, mipIndex, MTL::Origin(0, 0, offsetZ)); @@ -1474,7 +1479,7 @@ bool MetalRenderer::AcquireDrawable(bool mainWindow) const bool latteBufferUsesSRGB = mainWindow ? LatteGPUState.tvBufferUsesSRGB : LatteGPUState.drcBufferUsesSRGB; if (latteBufferUsesSRGB != m_state.m_usesSRGB) { - layer.GetLayer()->setPixelFormat(latteBufferUsesSRGB ? MTL::PixelFormatRGBA8Unorm_sRGB : MTL::PixelFormatRGBA8Unorm); + layer.GetLayer()->setPixelFormat(latteBufferUsesSRGB ? MTL::PixelFormatBGRA8Unorm_sRGB : MTL::PixelFormatBGRA8Unorm); m_state.m_usesSRGB = latteBufferUsesSRGB; } From 6c8947d0e5f95941cecc7456d087ab3d976b2761 Mon Sep 17 00:00:00 2001 From: Samo Z Date: Mon, 26 Aug 2024 08:07:49 +0200 Subject: [PATCH 2/6] fix: temporary buffer allocator --- .../HW/Latte/Renderer/Metal/MetalBufferAllocator.h | 12 +++++++++++- src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp | 9 +++++---- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalBufferAllocator.h b/src/Cafe/HW/Latte/Renderer/Metal/MetalBufferAllocator.h index d3b0cc5e..96724e88 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalBufferAllocator.h +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalBufferAllocator.h @@ -152,7 +152,7 @@ struct MetalSyncedBuffer class MetalTemporaryBufferAllocator : public MetalBufferAllocator { public: - MetalTemporaryBufferAllocator(class MetalRenderer* metalRenderer) : MetalBufferAllocator(metalRenderer, metalRenderer->GetOptimalBufferStorageMode()) {} + MetalTemporaryBufferAllocator(class MetalRenderer* metalRenderer) : MetalBufferAllocator(metalRenderer, MTL::ResourceStorageModeShared) {} void SetActiveCommandBuffer(MTL::CommandBuffer* commandBuffer) { @@ -170,6 +170,16 @@ public: { if (buffer.m_commandBuffers.size() == 1) { + // First remove any free ranges that use this buffer + for (uint32 k = 0; k < m_freeBufferRanges.size(); k++) + { + if (m_freeBufferRanges[k].bufferIndex == i) + { + m_freeBufferRanges.erase(m_freeBufferRanges.begin() + k); + k--; + } + } + // All command buffers using it have finished execution, we can use it again m_freeBufferRanges.push_back({i, 0, buffer.m_buffer->length()}); diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp index b01e4148..854a6e00 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp @@ -509,8 +509,7 @@ void MetalRenderer::texture_loadSlice(LatteTexture* hostTexture, sint32 width, s auto blitCommandEncoder = GetBlitCommandEncoder(); // Allocate a temporary buffer - // HACK: use the persistent buffer allocator so as to avoid any issues - auto& bufferAllocator = m_memoryManager->GetBufferAllocator(); + auto& bufferAllocator = m_memoryManager->GetTemporaryBufferAllocator(); auto allocation = bufferAllocator.GetBufferAllocation(compressedImageSize); auto buffer = bufferAllocator.GetBuffer(allocation.bufferIndex); @@ -1192,9 +1191,11 @@ void* MetalRenderer::indexData_reserveIndexMemory(uint32 size, uint32& offset, u void MetalRenderer::indexData_uploadIndexMemory(uint32 bufferIndex, uint32 offset, uint32 size) { - auto buffer = m_memoryManager->GetTemporaryBufferAllocator().GetBuffer(bufferIndex); if (!HasUnifiedMemory()) + { + auto buffer = m_memoryManager->GetTemporaryBufferAllocator().GetBuffer(bufferIndex); buffer->didModifyRange(NS::Range(offset, size)); + } } void MetalRenderer::SetBuffer(MTL::RenderCommandEncoder* renderCommandEncoder, MetalShaderType shaderType, MTL::Buffer* buffer, size_t offset, uint32 index) @@ -1459,7 +1460,7 @@ void MetalRenderer::CommitCommandBuffer() auto& commandBuffer = m_commandBuffers.back(); if (!commandBuffer.m_commited) { - commandBuffer.m_commandBuffer->addCompletedHandler(^(MTL::CommandBuffer* cmd) { + commandBuffer.m_commandBuffer->addCompletedHandler(^(MTL::CommandBuffer*) { m_memoryManager->GetTemporaryBufferAllocator().CommandBufferFinished(commandBuffer.m_commandBuffer); }); From b7f88d093a5aa5c4bc8ac206fad9d921d094030a Mon Sep 17 00:00:00 2001 From: Samo Z Date: Mon, 26 Aug 2024 12:22:33 +0200 Subject: [PATCH 3/6] implement buffer locking system --- .../Renderer/Metal/MetalBufferAllocator.h | 113 +++++++++++++++--- .../HW/Latte/Renderer/Metal/MetalRenderer.cpp | 40 +++++-- .../HW/Latte/Renderer/Metal/MetalRenderer.h | 5 + 3 files changed, 134 insertions(+), 24 deletions(-) diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalBufferAllocator.h b/src/Cafe/HW/Latte/Renderer/Metal/MetalBufferAllocator.h index 96724e88..51c119d9 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalBufferAllocator.h +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalBufferAllocator.h @@ -1,6 +1,7 @@ #pragma once #include "Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h" +#include "Common/precompiled.h" #include "Metal/MTLResource.hpp" struct MetalBufferRange @@ -146,20 +147,86 @@ typedef MetalBufferAllocator MetalDefaultBufferAllocator; struct MetalSyncedBuffer { MTL::Buffer* m_buffer; - std::vector m_commandBuffers; + std::vector m_commandBuffers; + uint32 m_lock = 0; + + bool IsLocked() const + { + return (m_lock != 0); + } }; +//constexpr uint16 MAX_COMMAND_BUFFER_FRAMES = 1024; + class MetalTemporaryBufferAllocator : public MetalBufferAllocator { public: MetalTemporaryBufferAllocator(class MetalRenderer* metalRenderer) : MetalBufferAllocator(metalRenderer, MTL::ResourceStorageModeShared) {} - void SetActiveCommandBuffer(MTL::CommandBuffer* commandBuffer) + void LockBuffer(uint32 bufferIndex) { - m_activeCommandBuffer = commandBuffer; + m_buffers[bufferIndex].m_lock++; } - void CommandBufferFinished(MTL::CommandBuffer* commandBuffer) + void UnlockBuffer(uint32 bufferIndex) + { + auto& buffer = m_buffers[bufferIndex]; + + buffer.m_lock--; + + // TODO: is this really necessary? + // Release the buffer if it wasn't released due to the lock + if (!buffer.IsLocked() && buffer.m_commandBuffers.empty()) + m_freeBufferRanges.push_back({bufferIndex, 0, buffer.m_buffer->length()}); + } + + void UnlockAllBuffers() + { + for (uint32_t i = 0; i < m_buffers.size(); i++) + { + auto& buffer = m_buffers[i]; + + if (buffer.m_lock != 0) + { + if (buffer.m_commandBuffers.empty()) + m_freeBufferRanges.push_back({i, 0, buffer.m_buffer->length()}); + + buffer.m_lock = 0; + } + } + + /* + auto it = m_commandBuffersFrames.begin(); + while (it != m_commandBuffersFrames.end()) + { + it->second++; + + if (it->second > MAX_COMMAND_BUFFER_FRAMES) + { + debug_printf("command buffer %u remained unfinished for more than %u frames\n", it->first, MAX_COMMAND_BUFFER_FRAMES); + + // Pretend like the command buffer has finished + CommandBufferFinished(it->first, false); + + it = m_commandBuffersFrames.erase(it); + } + else + { + it++; + } + } + */ + } + + void SetActiveCommandBuffer(uint32 commandBuffer) + { + m_activeCommandBuffer = commandBuffer; + + //if (commandBuffer != INVALID_COMMAND_BUFFER_ID) + // m_commandBuffersFrames[commandBuffer] = 0; + } + + void CommandBufferFinished(uint32 commandBuffer/*, bool erase = true*/) { for (uint32_t i = 0; i < m_buffers.size(); i++) { @@ -170,18 +237,21 @@ public: { if (buffer.m_commandBuffers.size() == 1) { - // First remove any free ranges that use this buffer - for (uint32 k = 0; k < m_freeBufferRanges.size(); k++) + if (!buffer.IsLocked()) { - if (m_freeBufferRanges[k].bufferIndex == i) + // First remove any free ranges that use this buffer + for (uint32 k = 0; k < m_freeBufferRanges.size(); k++) { - m_freeBufferRanges.erase(m_freeBufferRanges.begin() + k); - k--; + if (m_freeBufferRanges[k].bufferIndex == i) + { + m_freeBufferRanges.erase(m_freeBufferRanges.begin() + k); + k--; + } } - } - // All command buffers using it have finished execution, we can use it again - m_freeBufferRanges.push_back({i, 0, buffer.m_buffer->length()}); + // All command buffers using it have finished execution, we can use it again + m_freeBufferRanges.push_back({i, 0, buffer.m_buffer->length()}); + } buffer.m_commandBuffers.clear(); } @@ -193,18 +263,28 @@ public: } } } + + //if (erase) + // m_commandBuffersFrames.erase(commandBuffer); } - // TODO: should this be here? It's just to ensure safety MTL::Buffer* GetBuffer(uint32 bufferIndex) { + cemu_assert_debug(m_activeCommandBuffer != INVALID_COMMAND_BUFFER_ID); + auto& buffer = m_buffers[bufferIndex]; - if (buffer.m_commandBuffers.back() != m_activeCommandBuffer) + if (buffer.m_commandBuffers.empty() || buffer.m_commandBuffers.back() != m_activeCommandBuffer) buffer.m_commandBuffers.push_back(m_activeCommandBuffer); return buffer.m_buffer; } + MTL::Buffer* GetBufferOutsideOfCommandBuffer(uint32 bufferIndex) + { + return m_buffers[bufferIndex].m_buffer; + } + + /* MetalBufferAllocation GetBufferAllocation(size_t size) { // TODO: remove this @@ -219,7 +299,10 @@ public: return allocation; } + */ private: - MTL::CommandBuffer* m_activeCommandBuffer = nullptr; + uint32 m_activeCommandBuffer = INVALID_COMMAND_BUFFER_ID; + + //std::map m_commandBuffersFrames; }; diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp index 854a6e00..273b4c62 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp @@ -247,12 +247,16 @@ void MetalRenderer::SwapBuffers(bool swapTV, bool swapDRC) // Release all the command buffers CommitCommandBuffer(); + // TODO: should this be released here? for (uint32 i = 0; i < m_commandBuffers.size(); i++) m_commandBuffers[i].m_commandBuffer->release(); m_commandBuffers.clear(); // Release frame persistent buffers m_memoryManager->GetFramePersistentBufferAllocator().ResetAllocations(); + + // Unlock all temporary buffers + m_memoryManager->GetTemporaryBufferAllocator().UnlockAllBuffers(); } // TODO: use `shader` for drawing @@ -515,7 +519,7 @@ void MetalRenderer::texture_loadSlice(LatteTexture* hostTexture, sint32 width, s // Copy the data to the temporary buffer memcpy(allocation.data, pixelData, compressedImageSize); - buffer->didModifyRange(NS::Range(allocation.offset, allocation.size)); + //buffer->didModifyRange(NS::Range(allocation.offset, allocation.size)); // Copy the data from the temporary buffer to the texture blitCommandEncoder->copyFromBuffer(buffer, allocation.offset, bytesPerRow, 0, MTL::Size(width, height, 1), textureMtl->GetTexture(), sliceIndex, mipIndex, MTL::Origin(0, 0, offsetZ)); @@ -1116,7 +1120,13 @@ void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32 // Draw MTL::Buffer* indexBuffer = nullptr; if (hostIndexType != INDEX_TYPE::NONE) - indexBuffer = m_memoryManager->GetTemporaryBufferAllocator().GetBuffer(indexBufferIndex); + { + auto& bufferAllocator = m_memoryManager->GetTemporaryBufferAllocator(); + indexBuffer = bufferAllocator.GetBuffer(indexBufferIndex); + + // We have already retrieved the buffer, no need for it to be locked anymore + bufferAllocator.UnlockBuffer(indexBufferIndex); + } if (usesGeometryShader) { if (indexBuffer) @@ -1182,20 +1192,27 @@ void MetalRenderer::draw_endSequence() void* MetalRenderer::indexData_reserveIndexMemory(uint32 size, uint32& offset, uint32& bufferIndex) { - auto allocation = m_memoryManager->GetTemporaryBufferAllocator().GetBufferAllocation(size); + auto& bufferAllocator = m_memoryManager->GetTemporaryBufferAllocator(); + auto allocation = bufferAllocator.GetBufferAllocation(size); offset = allocation.offset; bufferIndex = allocation.bufferIndex; + // Lock the buffer so that it doesn't get released + bufferAllocator.LockBuffer(allocation.bufferIndex); + return allocation.data; } void MetalRenderer::indexData_uploadIndexMemory(uint32 bufferIndex, uint32 offset, uint32 size) { + // Do nothing + /* if (!HasUnifiedMemory()) { - auto buffer = m_memoryManager->GetTemporaryBufferAllocator().GetBuffer(bufferIndex); + auto buffer = m_memoryManager->GetTemporaryBufferAllocator().GetBufferOutsideOfCommandBuffer(bufferIndex); buffer->didModifyRange(NS::Range(offset, size)); } + */ } void MetalRenderer::SetBuffer(MTL::RenderCommandEncoder* renderCommandEncoder, MetalShaderType shaderType, MTL::Buffer* buffer, size_t offset, uint32 index) @@ -1284,10 +1301,13 @@ MTL::CommandBuffer* MetalRenderer::GetCommandBuffer() //m_commandQueue->insertDebugCaptureBoundary(); MTL::CommandBuffer* mtlCommandBuffer = m_commandQueue->commandBuffer(); - m_commandBuffers.push_back({mtlCommandBuffer}); + MetalCommandBuffer commandBuffer = {mtlCommandBuffer, m_commandBufferID}; + m_commandBuffers.push_back(commandBuffer); + + m_commandBufferID = (m_commandBufferID + 1) % 65536; // Notify memory manager about the new command buffer - m_memoryManager->GetTemporaryBufferAllocator().SetActiveCommandBuffer(mtlCommandBuffer); + m_memoryManager->GetTemporaryBufferAllocator().SetActiveCommandBuffer(commandBuffer.m_id); return mtlCommandBuffer; } @@ -1461,12 +1481,14 @@ void MetalRenderer::CommitCommandBuffer() if (!commandBuffer.m_commited) { commandBuffer.m_commandBuffer->addCompletedHandler(^(MTL::CommandBuffer*) { - m_memoryManager->GetTemporaryBufferAllocator().CommandBufferFinished(commandBuffer.m_commandBuffer); + m_memoryManager->GetTemporaryBufferAllocator().CommandBufferFinished(commandBuffer.m_id); }); commandBuffer.m_commandBuffer->commit(); commandBuffer.m_commited = true; + m_memoryManager->GetTemporaryBufferAllocator().SetActiveCommandBuffer(INVALID_COMMAND_BUFFER_ID); + // Debug //m_commandQueue->insertDebugCaptureBoundary(); } @@ -1702,8 +1724,8 @@ void MetalRenderer::BindStageResources(MTL::RenderCommandEncoder* renderCommandE auto supportBuffer = bufferAllocator.GetBufferAllocation(size); memcpy(supportBuffer.data, supportBufferData, size); auto buffer = bufferAllocator.GetBuffer(supportBuffer.bufferIndex); - if (!HasUnifiedMemory()) - buffer->didModifyRange(NS::Range(supportBuffer.offset, size)); + //if (!HasUnifiedMemory()) + // buffer->didModifyRange(NS::Range(supportBuffer.offset, size)); SetBuffer(renderCommandEncoder, mtlShaderType, buffer, supportBuffer.offset, shader->resourceMapping.uniformVarsBufferBindingPoint); } diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h index 7a9b41e4..3d494cbe 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h @@ -143,9 +143,12 @@ struct MetalState struct MetalCommandBuffer { MTL::CommandBuffer* m_commandBuffer; + uint32 m_id; bool m_commited = false; }; +constexpr uint32 INVALID_COMMAND_BUFFER_ID = std::numeric_limits::max(); + enum class MetalEncoderType { None, @@ -417,6 +420,8 @@ private: MetalPerformanceMonitor m_performanceMonitor; + uint32 m_commandBufferID = 0; + // Metal objects MTL::Device* m_device; MTL::CommandQueue* m_commandQueue; From cd8b74ba32ac569e81a3a497d78c284641e1e3f0 Mon Sep 17 00:00:00 2001 From: Samuliak Date: Mon, 26 Aug 2024 18:31:22 +0200 Subject: [PATCH 4/6] fix: buffer allocator leaks --- .../Renderer/Metal/MetalBufferAllocator.h | 90 +++++++++++++------ .../HW/Latte/Renderer/Metal/MetalRenderer.cpp | 23 ++--- .../HW/Latte/Renderer/Metal/MetalRenderer.h | 5 -- 3 files changed, 75 insertions(+), 43 deletions(-) diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalBufferAllocator.h b/src/Cafe/HW/Latte/Renderer/Metal/MetalBufferAllocator.h index 51c119d9..445fb823 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalBufferAllocator.h +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalBufferAllocator.h @@ -147,7 +147,7 @@ typedef MetalBufferAllocator MetalDefaultBufferAllocator; struct MetalSyncedBuffer { MTL::Buffer* m_buffer; - std::vector m_commandBuffers; + std::vector m_commandBuffers; uint32 m_lock = 0; bool IsLocked() const @@ -156,7 +156,7 @@ struct MetalSyncedBuffer } }; -//constexpr uint16 MAX_COMMAND_BUFFER_FRAMES = 1024; +constexpr uint16 MAX_COMMAND_BUFFER_FRAMES = 8; class MetalTemporaryBufferAllocator : public MetalBufferAllocator { @@ -177,7 +177,7 @@ public: // TODO: is this really necessary? // Release the buffer if it wasn't released due to the lock if (!buffer.IsLocked() && buffer.m_commandBuffers.empty()) - m_freeBufferRanges.push_back({bufferIndex, 0, buffer.m_buffer->length()}); + FreeBuffer(bufferIndex); } void UnlockAllBuffers() @@ -189,7 +189,7 @@ public: if (buffer.m_lock != 0) { if (buffer.m_commandBuffers.empty()) - m_freeBufferRanges.push_back({i, 0, buffer.m_buffer->length()}); + FreeBuffer(i); buffer.m_lock = 0; } @@ -203,7 +203,7 @@ public: if (it->second > MAX_COMMAND_BUFFER_FRAMES) { - debug_printf("command buffer %u remained unfinished for more than %u frames\n", it->first, MAX_COMMAND_BUFFER_FRAMES); + debug_printf("command buffer %p remained unfinished for more than %u frames\n", it->first, MAX_COMMAND_BUFFER_FRAMES); // Pretend like the command buffer has finished CommandBufferFinished(it->first, false); @@ -218,48 +218,39 @@ public: */ } - void SetActiveCommandBuffer(uint32 commandBuffer) + void SetActiveCommandBuffer(MTL::CommandBuffer* commandBuffer) { m_activeCommandBuffer = commandBuffer; - //if (commandBuffer != INVALID_COMMAND_BUFFER_ID) + //if (commandBuffer) // m_commandBuffersFrames[commandBuffer] = 0; } - void CommandBufferFinished(uint32 commandBuffer/*, bool erase = true*/) + void CheckForCompletedCommandBuffers(/*MTL::CommandBuffer* commandBuffer, bool erase = true*/) { for (uint32_t i = 0; i < m_buffers.size(); i++) { auto& buffer = m_buffers[i]; for (uint32_t j = 0; j < buffer.m_commandBuffers.size(); j++) { - if (commandBuffer == buffer.m_commandBuffers[j]) + if (m_mtlr->CommandBufferCompleted(buffer.m_commandBuffers[j])) { if (buffer.m_commandBuffers.size() == 1) { if (!buffer.IsLocked()) { - // First remove any free ranges that use this buffer - for (uint32 k = 0; k < m_freeBufferRanges.size(); k++) - { - if (m_freeBufferRanges[k].bufferIndex == i) - { - m_freeBufferRanges.erase(m_freeBufferRanges.begin() + k); - k--; - } - } - // All command buffers using it have finished execution, we can use it again - m_freeBufferRanges.push_back({i, 0, buffer.m_buffer->length()}); + FreeBuffer(i); } buffer.m_commandBuffers.clear(); + break; } else { buffer.m_commandBuffers.erase(buffer.m_commandBuffers.begin() + j); + j--; } - break; } } } @@ -270,10 +261,10 @@ public: MTL::Buffer* GetBuffer(uint32 bufferIndex) { - cemu_assert_debug(m_activeCommandBuffer != INVALID_COMMAND_BUFFER_ID); + cemu_assert_debug(m_activeCommandBuffer); auto& buffer = m_buffers[bufferIndex]; - if (buffer.m_commandBuffers.empty() || buffer.m_commandBuffers.back() != m_activeCommandBuffer) + if (buffer.m_commandBuffers.empty() || buffer.m_commandBuffers.back() != m_activeCommandBuffer/*std::find(buffer.m_commandBuffers.begin(), buffer.m_commandBuffers.end(), m_activeCommandBuffer) == buffer.m_commandBuffers.end()*/) buffer.m_commandBuffers.push_back(m_activeCommandBuffer); return buffer.m_buffer; @@ -287,7 +278,6 @@ public: /* MetalBufferAllocation GetBufferAllocation(size_t size) { - // TODO: remove this if (!m_activeCommandBuffer) throw std::runtime_error("No active command buffer when allocating a buffer!"); @@ -301,8 +291,54 @@ public: } */ -private: - uint32 m_activeCommandBuffer = INVALID_COMMAND_BUFFER_ID; + /* + void LogInfo() + { + debug_printf("BUFFERS:\n"); + for (auto& buffer : m_buffers) + { + debug_printf(" %p -> size: %lu, command buffers: %zu\n", buffer.m_buffer, buffer.m_buffer->length(), buffer.m_commandBuffers.size()); + uint32 same = 0; + uint32 completed = 0; + for (uint32 i = 0; i < buffer.m_commandBuffers.size(); i++) + { + if (m_mtlr->CommandBufferCompleted(buffer.m_commandBuffers[i])) + completed++; + for (uint32 j = 0; j < buffer.m_commandBuffers.size(); j++) + { + if (i != j && buffer.m_commandBuffers[i] == buffer.m_commandBuffers[j]) + same++; + } + } + debug_printf(" same: %u\n", same); + debug_printf(" completed: %u\n", completed); + } - //std::map m_commandBuffersFrames; + debug_printf("FREE RANGES:\n"); + for (auto& range : m_freeBufferRanges) + { + debug_printf(" %u -> offset: %zu, size: %zu\n", range.bufferIndex, range.offset, range.size); + } + } + */ + +private: + MTL::CommandBuffer* m_activeCommandBuffer = nullptr; + + //std::map m_commandBuffersFrames; + + void FreeBuffer(uint32 bufferIndex) + { + // First remove any free ranges that use this buffer + for (uint32 k = 0; k < m_freeBufferRanges.size(); k++) + { + if (m_freeBufferRanges[k].bufferIndex == bufferIndex) + { + m_freeBufferRanges.erase(m_freeBufferRanges.begin() + k); + k--; + } + } + + m_freeBufferRanges.push_back({bufferIndex, 0, m_buffers[bufferIndex].m_buffer->length()}); + } }; diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp index 273b4c62..5bd5040c 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp @@ -248,8 +248,8 @@ void MetalRenderer::SwapBuffers(bool swapTV, bool swapDRC) // Release all the command buffers CommitCommandBuffer(); // TODO: should this be released here? - for (uint32 i = 0; i < m_commandBuffers.size(); i++) - m_commandBuffers[i].m_commandBuffer->release(); + //for (uint32 i = 0; i < m_commandBuffers.size(); i++) + // m_commandBuffers[i].m_commandBuffer->release(); m_commandBuffers.clear(); // Release frame persistent buffers @@ -257,6 +257,9 @@ void MetalRenderer::SwapBuffers(bool swapTV, bool swapDRC) // Unlock all temporary buffers m_memoryManager->GetTemporaryBufferAllocator().UnlockAllBuffers(); + + // Check for completed command buffers + m_memoryManager->GetTemporaryBufferAllocator().CheckForCompletedCommandBuffers(); } // TODO: use `shader` for drawing @@ -1301,13 +1304,10 @@ MTL::CommandBuffer* MetalRenderer::GetCommandBuffer() //m_commandQueue->insertDebugCaptureBoundary(); MTL::CommandBuffer* mtlCommandBuffer = m_commandQueue->commandBuffer(); - MetalCommandBuffer commandBuffer = {mtlCommandBuffer, m_commandBufferID}; - m_commandBuffers.push_back(commandBuffer); - - m_commandBufferID = (m_commandBufferID + 1) % 65536; + m_commandBuffers.push_back({mtlCommandBuffer}); // Notify memory manager about the new command buffer - m_memoryManager->GetTemporaryBufferAllocator().SetActiveCommandBuffer(commandBuffer.m_id); + m_memoryManager->GetTemporaryBufferAllocator().SetActiveCommandBuffer(mtlCommandBuffer); return mtlCommandBuffer; } @@ -1480,14 +1480,15 @@ void MetalRenderer::CommitCommandBuffer() auto& commandBuffer = m_commandBuffers.back(); if (!commandBuffer.m_commited) { - commandBuffer.m_commandBuffer->addCompletedHandler(^(MTL::CommandBuffer*) { - m_memoryManager->GetTemporaryBufferAllocator().CommandBufferFinished(commandBuffer.m_id); - }); + // Handled differently, since it seems like Metal doesn't always call the completion handler + //commandBuffer.m_commandBuffer->addCompletedHandler(^(MTL::CommandBuffer*) { + // m_memoryManager->GetTemporaryBufferAllocator().CommandBufferFinished(commandBuffer.m_commandBuffer); + //}); commandBuffer.m_commandBuffer->commit(); commandBuffer.m_commited = true; - m_memoryManager->GetTemporaryBufferAllocator().SetActiveCommandBuffer(INVALID_COMMAND_BUFFER_ID); + m_memoryManager->GetTemporaryBufferAllocator().SetActiveCommandBuffer(nullptr); // Debug //m_commandQueue->insertDebugCaptureBoundary(); diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h index 3d494cbe..7a9b41e4 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h @@ -143,12 +143,9 @@ struct MetalState struct MetalCommandBuffer { MTL::CommandBuffer* m_commandBuffer; - uint32 m_id; bool m_commited = false; }; -constexpr uint32 INVALID_COMMAND_BUFFER_ID = std::numeric_limits::max(); - enum class MetalEncoderType { None, @@ -420,8 +417,6 @@ private: MetalPerformanceMonitor m_performanceMonitor; - uint32 m_commandBufferID = 0; - // Metal objects MTL::Device* m_device; MTL::CommandQueue* m_commandQueue; From 3439b3259e481743ff47c3376deaa827a75a1013 Mon Sep 17 00:00:00 2001 From: Samuliak Date: Tue, 27 Aug 2024 08:18:17 +0200 Subject: [PATCH 5/6] uncomment drc swap buffer --- src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp index 5bd5040c..5fac079f 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp @@ -242,12 +242,12 @@ void MetalRenderer::SwapBuffers(bool swapTV, bool swapDRC) { if (swapTV) SwapBuffer(true); - //if (swapDRC) - // SwapBuffer(false); + if (swapDRC) + SwapBuffer(false); // Release all the command buffers CommitCommandBuffer(); - // TODO: should this be released here? + // TODO: release //for (uint32 i = 0; i < m_commandBuffers.size(); i++) // m_commandBuffers[i].m_commandBuffer->release(); m_commandBuffers.clear(); From b7a1adec91f4c4c7b5526282883b3127c90e1a83 Mon Sep 17 00:00:00 2001 From: Samuliak Date: Tue, 27 Aug 2024 14:39:52 +0200 Subject: [PATCH 6/6] clip mode --- src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp | 14 ++++++++++---- src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h | 1 + 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp index 5fac079f..7b6be6ce 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp @@ -947,10 +947,6 @@ void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32 uint32 cullBack = polygonControlReg.get_CULL_BACK(); uint32 polyOffsetFrontEnable = polygonControlReg.get_OFFSET_FRONT_ENABLED(); - // TODO - //cemu_assert_debug(LatteGPUState.contextNew.PA_CL_CLIP_CNTL.get_ZCLIP_NEAR_DISABLE() == LatteGPUState.contextNew.PA_CL_CLIP_CNTL.get_ZCLIP_FAR_DISABLE()); // near or far clipping can be disabled individually - //bool zClipEnable = LatteGPUState.contextNew.PA_CL_CLIP_CNTL.get_ZCLIP_FAR_DISABLE() == false; - if (polyOffsetFrontEnable) { uint32 frontScaleU32 = LatteGPUState.contextNew.PA_SU_POLY_OFFSET_FRONT_SCALE.getRawValue(); @@ -984,6 +980,16 @@ void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32 } } + // Depth clip mode + cemu_assert_debug(LatteGPUState.contextNew.PA_CL_CLIP_CNTL.get_ZCLIP_NEAR_DISABLE() == LatteGPUState.contextNew.PA_CL_CLIP_CNTL.get_ZCLIP_FAR_DISABLE()); // near or far clipping can be disabled individually + bool zClipEnable = LatteGPUState.contextNew.PA_CL_CLIP_CNTL.get_ZCLIP_FAR_DISABLE() == false; + + if (zClipEnable != encoderState.m_depthClipEnable) + { + renderCommandEncoder->setDepthClipMode(zClipEnable ? MTL::DepthClipModeClip : MTL::DepthClipModeClamp); + encoderState.m_depthClipEnable = zClipEnable; + } + // todo - how does culling behave with rects? // right now we just assume that their winding is always CW if (isPrimitiveRect) diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h index 7a9b41e4..8fe3a8d9 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h @@ -98,6 +98,7 @@ struct MetalEncoderState uint32 m_depthBias = 0; uint32 m_depthSlope = 0; uint32 m_depthClamp = 0; + bool m_depthClipEnable = true; struct { MTL::Buffer* m_buffer; size_t m_offset;