From b7f88d093a5aa5c4bc8ac206fad9d921d094030a Mon Sep 17 00:00:00 2001 From: Samo Z Date: Mon, 26 Aug 2024 12:22:33 +0200 Subject: [PATCH] implement buffer locking system --- .../Renderer/Metal/MetalBufferAllocator.h | 113 +++++++++++++++--- .../HW/Latte/Renderer/Metal/MetalRenderer.cpp | 40 +++++-- .../HW/Latte/Renderer/Metal/MetalRenderer.h | 5 + 3 files changed, 134 insertions(+), 24 deletions(-) diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalBufferAllocator.h b/src/Cafe/HW/Latte/Renderer/Metal/MetalBufferAllocator.h index 96724e88..51c119d9 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalBufferAllocator.h +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalBufferAllocator.h @@ -1,6 +1,7 @@ #pragma once #include "Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h" +#include "Common/precompiled.h" #include "Metal/MTLResource.hpp" struct MetalBufferRange @@ -146,20 +147,86 @@ typedef MetalBufferAllocator MetalDefaultBufferAllocator; struct MetalSyncedBuffer { MTL::Buffer* m_buffer; - std::vector m_commandBuffers; + std::vector m_commandBuffers; + uint32 m_lock = 0; + + bool IsLocked() const + { + return (m_lock != 0); + } }; +//constexpr uint16 MAX_COMMAND_BUFFER_FRAMES = 1024; + class MetalTemporaryBufferAllocator : public MetalBufferAllocator { public: MetalTemporaryBufferAllocator(class MetalRenderer* metalRenderer) : MetalBufferAllocator(metalRenderer, MTL::ResourceStorageModeShared) {} - void SetActiveCommandBuffer(MTL::CommandBuffer* commandBuffer) + void LockBuffer(uint32 bufferIndex) { - m_activeCommandBuffer = commandBuffer; + m_buffers[bufferIndex].m_lock++; } - void CommandBufferFinished(MTL::CommandBuffer* commandBuffer) + void UnlockBuffer(uint32 bufferIndex) + { + auto& buffer = m_buffers[bufferIndex]; + + buffer.m_lock--; + + // TODO: is this really necessary? + // Release the buffer if it wasn't released due to the lock + if (!buffer.IsLocked() && buffer.m_commandBuffers.empty()) + m_freeBufferRanges.push_back({bufferIndex, 0, buffer.m_buffer->length()}); + } + + void UnlockAllBuffers() + { + for (uint32_t i = 0; i < m_buffers.size(); i++) + { + auto& buffer = m_buffers[i]; + + if (buffer.m_lock != 0) + { + if (buffer.m_commandBuffers.empty()) + m_freeBufferRanges.push_back({i, 0, buffer.m_buffer->length()}); + + buffer.m_lock = 0; + } + } + + /* + auto it = m_commandBuffersFrames.begin(); + while (it != m_commandBuffersFrames.end()) + { + it->second++; + + if (it->second > MAX_COMMAND_BUFFER_FRAMES) + { + debug_printf("command buffer %u remained unfinished for more than %u frames\n", it->first, MAX_COMMAND_BUFFER_FRAMES); + + // Pretend like the command buffer has finished + CommandBufferFinished(it->first, false); + + it = m_commandBuffersFrames.erase(it); + } + else + { + it++; + } + } + */ + } + + void SetActiveCommandBuffer(uint32 commandBuffer) + { + m_activeCommandBuffer = commandBuffer; + + //if (commandBuffer != INVALID_COMMAND_BUFFER_ID) + // m_commandBuffersFrames[commandBuffer] = 0; + } + + void CommandBufferFinished(uint32 commandBuffer/*, bool erase = true*/) { for (uint32_t i = 0; i < m_buffers.size(); i++) { @@ -170,18 +237,21 @@ public: { if (buffer.m_commandBuffers.size() == 1) { - // First remove any free ranges that use this buffer - for (uint32 k = 0; k < m_freeBufferRanges.size(); k++) + if (!buffer.IsLocked()) { - if (m_freeBufferRanges[k].bufferIndex == i) + // First remove any free ranges that use this buffer + for (uint32 k = 0; k < m_freeBufferRanges.size(); k++) { - m_freeBufferRanges.erase(m_freeBufferRanges.begin() + k); - k--; + if (m_freeBufferRanges[k].bufferIndex == i) + { + m_freeBufferRanges.erase(m_freeBufferRanges.begin() + k); + k--; + } } - } - // All command buffers using it have finished execution, we can use it again - m_freeBufferRanges.push_back({i, 0, buffer.m_buffer->length()}); + // All command buffers using it have finished execution, we can use it again + m_freeBufferRanges.push_back({i, 0, buffer.m_buffer->length()}); + } buffer.m_commandBuffers.clear(); } @@ -193,18 +263,28 @@ public: } } } + + //if (erase) + // m_commandBuffersFrames.erase(commandBuffer); } - // TODO: should this be here? It's just to ensure safety MTL::Buffer* GetBuffer(uint32 bufferIndex) { + cemu_assert_debug(m_activeCommandBuffer != INVALID_COMMAND_BUFFER_ID); + auto& buffer = m_buffers[bufferIndex]; - if (buffer.m_commandBuffers.back() != m_activeCommandBuffer) + if (buffer.m_commandBuffers.empty() || buffer.m_commandBuffers.back() != m_activeCommandBuffer) buffer.m_commandBuffers.push_back(m_activeCommandBuffer); return buffer.m_buffer; } + MTL::Buffer* GetBufferOutsideOfCommandBuffer(uint32 bufferIndex) + { + return m_buffers[bufferIndex].m_buffer; + } + + /* MetalBufferAllocation GetBufferAllocation(size_t size) { // TODO: remove this @@ -219,7 +299,10 @@ public: return allocation; } + */ private: - MTL::CommandBuffer* m_activeCommandBuffer = nullptr; + uint32 m_activeCommandBuffer = INVALID_COMMAND_BUFFER_ID; + + //std::map m_commandBuffersFrames; }; diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp index 854a6e00..273b4c62 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp @@ -247,12 +247,16 @@ void MetalRenderer::SwapBuffers(bool swapTV, bool swapDRC) // Release all the command buffers CommitCommandBuffer(); + // TODO: should this be released here? for (uint32 i = 0; i < m_commandBuffers.size(); i++) m_commandBuffers[i].m_commandBuffer->release(); m_commandBuffers.clear(); // Release frame persistent buffers m_memoryManager->GetFramePersistentBufferAllocator().ResetAllocations(); + + // Unlock all temporary buffers + m_memoryManager->GetTemporaryBufferAllocator().UnlockAllBuffers(); } // TODO: use `shader` for drawing @@ -515,7 +519,7 @@ void MetalRenderer::texture_loadSlice(LatteTexture* hostTexture, sint32 width, s // Copy the data to the temporary buffer memcpy(allocation.data, pixelData, compressedImageSize); - buffer->didModifyRange(NS::Range(allocation.offset, allocation.size)); + //buffer->didModifyRange(NS::Range(allocation.offset, allocation.size)); // Copy the data from the temporary buffer to the texture blitCommandEncoder->copyFromBuffer(buffer, allocation.offset, bytesPerRow, 0, MTL::Size(width, height, 1), textureMtl->GetTexture(), sliceIndex, mipIndex, MTL::Origin(0, 0, offsetZ)); @@ -1116,7 +1120,13 @@ void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32 // Draw MTL::Buffer* indexBuffer = nullptr; if (hostIndexType != INDEX_TYPE::NONE) - indexBuffer = m_memoryManager->GetTemporaryBufferAllocator().GetBuffer(indexBufferIndex); + { + auto& bufferAllocator = m_memoryManager->GetTemporaryBufferAllocator(); + indexBuffer = bufferAllocator.GetBuffer(indexBufferIndex); + + // We have already retrieved the buffer, no need for it to be locked anymore + bufferAllocator.UnlockBuffer(indexBufferIndex); + } if (usesGeometryShader) { if (indexBuffer) @@ -1182,20 +1192,27 @@ void MetalRenderer::draw_endSequence() void* MetalRenderer::indexData_reserveIndexMemory(uint32 size, uint32& offset, uint32& bufferIndex) { - auto allocation = m_memoryManager->GetTemporaryBufferAllocator().GetBufferAllocation(size); + auto& bufferAllocator = m_memoryManager->GetTemporaryBufferAllocator(); + auto allocation = bufferAllocator.GetBufferAllocation(size); offset = allocation.offset; bufferIndex = allocation.bufferIndex; + // Lock the buffer so that it doesn't get released + bufferAllocator.LockBuffer(allocation.bufferIndex); + return allocation.data; } void MetalRenderer::indexData_uploadIndexMemory(uint32 bufferIndex, uint32 offset, uint32 size) { + // Do nothing + /* if (!HasUnifiedMemory()) { - auto buffer = m_memoryManager->GetTemporaryBufferAllocator().GetBuffer(bufferIndex); + auto buffer = m_memoryManager->GetTemporaryBufferAllocator().GetBufferOutsideOfCommandBuffer(bufferIndex); buffer->didModifyRange(NS::Range(offset, size)); } + */ } void MetalRenderer::SetBuffer(MTL::RenderCommandEncoder* renderCommandEncoder, MetalShaderType shaderType, MTL::Buffer* buffer, size_t offset, uint32 index) @@ -1284,10 +1301,13 @@ MTL::CommandBuffer* MetalRenderer::GetCommandBuffer() //m_commandQueue->insertDebugCaptureBoundary(); MTL::CommandBuffer* mtlCommandBuffer = m_commandQueue->commandBuffer(); - m_commandBuffers.push_back({mtlCommandBuffer}); + MetalCommandBuffer commandBuffer = {mtlCommandBuffer, m_commandBufferID}; + m_commandBuffers.push_back(commandBuffer); + + m_commandBufferID = (m_commandBufferID + 1) % 65536; // Notify memory manager about the new command buffer - m_memoryManager->GetTemporaryBufferAllocator().SetActiveCommandBuffer(mtlCommandBuffer); + m_memoryManager->GetTemporaryBufferAllocator().SetActiveCommandBuffer(commandBuffer.m_id); return mtlCommandBuffer; } @@ -1461,12 +1481,14 @@ void MetalRenderer::CommitCommandBuffer() if (!commandBuffer.m_commited) { commandBuffer.m_commandBuffer->addCompletedHandler(^(MTL::CommandBuffer*) { - m_memoryManager->GetTemporaryBufferAllocator().CommandBufferFinished(commandBuffer.m_commandBuffer); + m_memoryManager->GetTemporaryBufferAllocator().CommandBufferFinished(commandBuffer.m_id); }); commandBuffer.m_commandBuffer->commit(); commandBuffer.m_commited = true; + m_memoryManager->GetTemporaryBufferAllocator().SetActiveCommandBuffer(INVALID_COMMAND_BUFFER_ID); + // Debug //m_commandQueue->insertDebugCaptureBoundary(); } @@ -1702,8 +1724,8 @@ void MetalRenderer::BindStageResources(MTL::RenderCommandEncoder* renderCommandE auto supportBuffer = bufferAllocator.GetBufferAllocation(size); memcpy(supportBuffer.data, supportBufferData, size); auto buffer = bufferAllocator.GetBuffer(supportBuffer.bufferIndex); - if (!HasUnifiedMemory()) - buffer->didModifyRange(NS::Range(supportBuffer.offset, size)); + //if (!HasUnifiedMemory()) + // buffer->didModifyRange(NS::Range(supportBuffer.offset, size)); SetBuffer(renderCommandEncoder, mtlShaderType, buffer, supportBuffer.offset, shader->resourceMapping.uniformVarsBufferBindingPoint); } diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h index 7a9b41e4..3d494cbe 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h @@ -143,9 +143,12 @@ struct MetalState struct MetalCommandBuffer { MTL::CommandBuffer* m_commandBuffer; + uint32 m_id; bool m_commited = false; }; +constexpr uint32 INVALID_COMMAND_BUFFER_ID = std::numeric_limits::max(); + enum class MetalEncoderType { None, @@ -417,6 +420,8 @@ private: MetalPerformanceMonitor m_performanceMonitor; + uint32 m_commandBufferID = 0; + // Metal objects MTL::Device* m_device; MTL::CommandQueue* m_commandQueue;