From 265785772aa07da9c1bdbca3fa7c1ba5910b12ed Mon Sep 17 00:00:00 2001 From: Samuliak Date: Sun, 18 Aug 2024 11:13:49 +0200 Subject: [PATCH] rework buffer allocator --- .../Renderer/Metal/MetalMemoryManager.cpp | 83 +----- .../Latte/Renderer/Metal/MetalMemoryManager.h | 245 ++++++++++++++---- .../HW/Latte/Renderer/Metal/MetalRenderer.cpp | 32 ++- .../HW/Latte/Renderer/Metal/MetalRenderer.h | 23 +- 4 files changed, 244 insertions(+), 139 deletions(-) diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.cpp b/src/Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.cpp index fb9419d6..c102bcf2 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.cpp @@ -1,77 +1,8 @@ #include "Cafe/HW/Latte/Renderer/Metal/MetalCommon.h" #include "Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.h" -#include "Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h" #include "Cafe/HW/Latte/Renderer/Metal/MetalHybridComputePipeline.h" #include "Common/precompiled.h" -MetalBufferAllocator::~MetalBufferAllocator() -{ - for (auto buffer : m_buffers) - { - buffer->release(); - } -} - -MetalBufferAllocation MetalBufferAllocator::GetBufferAllocation(size_t size) -{ - // Align the size - size = Align(size, 16); - - // First, try to find a free range - for (uint32 i = 0; i < m_freeBufferRanges.size(); i++) - { - auto& range = m_freeBufferRanges[i]; - if (size <= range.size) - { - MetalBufferAllocation allocation; - allocation.bufferIndex = range.bufferIndex; - allocation.bufferOffset = range.offset; - allocation.data = (uint8*)m_buffers[range.bufferIndex]->contents() + range.offset; - - range.offset += size; - range.size -= size; - - if (range.size == 0) - { - m_freeBufferRanges.erase(m_freeBufferRanges.begin() + i); - } - - return allocation; - } - } - - // If no free range was found, allocate a new buffer - m_allocationSize = std::max(m_allocationSize, size); - MTL::Buffer* buffer = m_mtlr->GetDevice()->newBuffer(m_allocationSize, MTL::ResourceStorageModeShared); -#ifdef CEMU_DEBUG_ASSERT - buffer->setLabel(GetLabel("Buffer from buffer allocator", buffer)); -#endif - - MetalBufferAllocation allocation; - allocation.bufferIndex = m_buffers.size(); - allocation.bufferOffset = 0; - allocation.data = buffer->contents(); - - m_buffers.push_back(buffer); - - // If the buffer is larger than the requested size, add the remaining space to the free buffer ranges - if (size < m_allocationSize) - { - MetalBufferRange range; - range.bufferIndex = allocation.bufferIndex; - range.offset = size; - range.size = m_allocationSize - size; - - m_freeBufferRanges.push_back(range); - } - - // Increase the allocation size for the next buffer - if (m_allocationSize < 128 * 1024 * 1024) - m_allocationSize *= 2; - - return allocation; -} - MetalVertexBufferCache::~MetalVertexBufferCache() { } @@ -87,13 +18,13 @@ MetalRestridedBufferRange MetalVertexBufferCache::RestrideBufferIfNeeded(MTL::Bu return {bufferCache, vertexBufferRange.offset}; } - auto buffer = m_bufferAllocator->GetBuffer(restrideInfo.allocation.bufferIndex); + MTL::Buffer* buffer; if (restrideInfo.memoryInvalidated || stride != restrideInfo.lastStride) { size_t newStride = Align(stride, 4); size_t newSize = vertexBufferRange.size / stride * newStride; - restrideInfo.allocation = m_bufferAllocator->GetBufferAllocation(newSize); - buffer = m_bufferAllocator->GetBuffer(restrideInfo.allocation.bufferIndex); + restrideInfo.allocation = m_bufferAllocator.GetBufferAllocation(newSize); + buffer = m_bufferAllocator.GetBuffer(restrideInfo.allocation.bufferIndex); //uint8* oldPtr = (uint8*)bufferCache->contents() + vertexBufferRange.offset; //uint8* newPtr = (uint8*)buffer->contents() + restrideInfo.allocation.bufferOffset; @@ -112,7 +43,7 @@ MetalRestridedBufferRange MetalVertexBufferCache::RestrideBufferIfNeeded(MTL::Bu m_mtlr->GetEncoderState().m_renderPipelineState = m_restrideBufferPipeline->GetRenderPipelineState(); MTL::Buffer* buffers[] = {bufferCache, buffer}; - size_t offsets[] = {vertexBufferRange.offset, restrideInfo.allocation.bufferOffset}; + size_t offsets[] = {vertexBufferRange.offset, restrideInfo.allocation.offset}; renderCommandEncoder->setVertexBuffers(buffers, offsets, NS::Range(GET_HELPER_BUFFER_BINDING(0), 2)); m_mtlr->GetEncoderState().m_uniformBufferOffsets[METAL_SHADER_TYPE_VERTEX][GET_HELPER_BUFFER_BINDING(0)] = INVALID_OFFSET; m_mtlr->GetEncoderState().m_uniformBufferOffsets[METAL_SHADER_TYPE_VERTEX][GET_HELPER_BUFFER_BINDING(1)] = INVALID_OFFSET; @@ -149,8 +80,12 @@ MetalRestridedBufferRange MetalVertexBufferCache::RestrideBufferIfNeeded(MTL::Bu restrideInfo.memoryInvalidated = false; restrideInfo.lastStride = newStride; } + else + { + buffer = m_bufferAllocator.GetBuffer(restrideInfo.allocation.bufferIndex); + } - return {buffer, restrideInfo.allocation.bufferOffset}; + return {buffer, restrideInfo.allocation.offset}; } void MetalVertexBufferCache::MemoryRangeChanged(size_t offset, size_t size) diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.h b/src/Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.h index 9bffe9f2..ea8b7554 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.h +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.h @@ -3,20 +3,7 @@ #include "Cafe/HW/Latte/ISA/LatteReg.h" #include "Cafe/HW/Latte/Core/LatteConst.h" #include "Cafe/HW/Latte/Renderer/Metal/MetalCommon.h" - -//const uint32 bufferAllocatorIndexShift = 24; - -struct MetalBufferAllocation -{ - void* data; - uint32 bufferIndex; - size_t bufferOffset = INVALID_OFFSET; - - bool IsValid() const - { - return bufferOffset != INVALID_OFFSET; - } -}; +#include "Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h" struct MetalBufferRange { @@ -25,53 +12,212 @@ struct MetalBufferRange size_t size; }; +template class MetalBufferAllocator { public: MetalBufferAllocator(class MetalRenderer* metalRenderer) : m_mtlr{metalRenderer} {} - ~MetalBufferAllocator(); - void ResetTemporaryBuffers() + ~MetalBufferAllocator() + { + for (auto buffer : m_buffers) + { + buffer.m_buffer->release(); + } + } + + void ResetAllocations() { m_freeBufferRanges.clear(); - - // Register the free ranges - for (uint32 i = 0; i < m_buffers.size(); i++) - { - m_freeBufferRanges.push_back({i, 0, m_buffers[i]->length()}); - } + for (uint32_t i = 0; i < m_buffers.size(); i++) + m_freeBufferRanges.push_back({i, 0, m_buffers[i].m_buffer->length()}); } MTL::Buffer* GetBuffer(uint32 bufferIndex) { - return m_buffers[bufferIndex]; + return m_buffers[bufferIndex].m_buffer; } - MetalBufferAllocation GetBufferAllocation(size_t size); + MetalBufferAllocation GetBufferAllocation(size_t size) + { + // Align the size + size = Align(size, 16); -private: + // First, try to find a free range + for (uint32 i = 0; i < m_freeBufferRanges.size(); i++) + { + auto& range = m_freeBufferRanges[i]; + if (size <= range.size) + { + auto& buffer = m_buffers[range.bufferIndex]; + + MetalBufferAllocation allocation; + allocation.bufferIndex = range.bufferIndex; + allocation.offset = range.offset; + allocation.size = size; + allocation.data = (uint8*)buffer.m_buffer->contents() + range.offset; + + range.offset += size; + range.size -= size; + + if (range.size == 0) + { + m_freeBufferRanges.erase(m_freeBufferRanges.begin() + i); + } + + return allocation; + } + } + + // If no free range was found, allocate a new buffer + m_allocationSize = std::max(m_allocationSize, size); + MTL::Buffer* buffer = m_mtlr->GetDevice()->newBuffer(m_allocationSize, MTL::ResourceStorageModeShared); + #ifdef CEMU_DEBUG_ASSERT + buffer->setLabel(GetLabel("Buffer from buffer allocator", buffer)); + #endif + + MetalBufferAllocation allocation; + allocation.bufferIndex = m_buffers.size(); + allocation.offset = 0; + allocation.size = size; + allocation.data = buffer->contents(); + + m_buffers.push_back({buffer}); + + // If the buffer is larger than the requested size, add the remaining space to the free buffer ranges + if (size < m_allocationSize) + { + MetalBufferRange range; + range.bufferIndex = allocation.bufferIndex; + range.offset = size; + range.size = m_allocationSize - size; + + m_freeBufferRanges.push_back(range); + } + + // Increase the allocation size for the next buffer + if (m_allocationSize < 128 * 1024 * 1024) + m_allocationSize *= 2; + + return allocation; + } + + void FreeAllocation(MetalBufferAllocation& allocation) + { + MetalBufferRange range; + range.bufferIndex = allocation.bufferIndex; + range.offset = allocation.offset; + range.size = allocation.size; + + allocation.offset = INVALID_OFFSET; + + // Find the correct position to insert the free range + for (uint32 i = 0; i < m_freeBufferRanges.size(); i++) + { + auto& freeRange = m_freeBufferRanges[i]; + if (freeRange.bufferIndex == range.bufferIndex && freeRange.offset + freeRange.size == range.offset) + { + freeRange.size += range.size; + return; + } + } + + m_freeBufferRanges.push_back(range); + } + +protected: class MetalRenderer* m_mtlr; size_t m_allocationSize = 8 * 1024 * 1024; - std::vector m_buffers; + std::vector m_buffers; std::vector m_freeBufferRanges; }; +struct MetalBuffer +{ + MTL::Buffer* m_buffer; +}; + +typedef MetalBufferAllocator MetalDefaultBufferAllocator; + +struct MetalSyncedBuffer +{ + MTL::Buffer* m_buffer; + std::vector m_commandBuffers; +}; + +class MetalTemporaryBufferAllocator : public MetalBufferAllocator +{ +public: + MetalTemporaryBufferAllocator(class MetalRenderer* metalRenderer) : MetalBufferAllocator(metalRenderer) {} + + void SetActiveCommandBuffer(MTL::CommandBuffer* commandBuffer) + { + m_activeCommandBuffer = commandBuffer; + } + + void CommandBufferFinished(MTL::CommandBuffer* commandBuffer) + { + for (uint32_t i = 0; i < m_buffers.size(); i++) + { + auto& buffer = m_buffers[i]; + for (uint32_t j = 0; j < buffer.m_commandBuffers.size(); j++) + { + if (commandBuffer == buffer.m_commandBuffers[j]) + { + if (buffer.m_commandBuffers.size() == 1) + { + // All command buffers using it have finished execution, we can use it again + m_freeBufferRanges.push_back({i, 0, buffer.m_buffer->length()}); + + buffer.m_commandBuffers.clear(); + } + else + { + buffer.m_commandBuffers.erase(buffer.m_commandBuffers.begin() + j); + } + break; + } + } + } + } + + // TODO: should this be here? It's just to ensure safety + MTL::Buffer* GetBuffer(uint32 bufferIndex) + { + auto& buffer = m_buffers[bufferIndex]; + if (buffer.m_commandBuffers.back() != m_activeCommandBuffer) + buffer.m_commandBuffers.push_back(m_activeCommandBuffer); + + return buffer.m_buffer; + } + + MetalBufferAllocation GetBufferAllocation(size_t size) + { + // TODO: remove this + if (!m_activeCommandBuffer) + throw std::runtime_error("No active command buffer when allocating a buffer!"); + + auto allocation = MetalBufferAllocator::GetBufferAllocation(size); + + auto& buffer = m_buffers[allocation.bufferIndex]; + if (buffer.m_commandBuffers.empty() || buffer.m_commandBuffers.back() != m_activeCommandBuffer) + buffer.m_commandBuffers.push_back(m_activeCommandBuffer); + + return allocation; + } + +private: + MTL::CommandBuffer* m_activeCommandBuffer = nullptr; +}; + struct MetalRestridedBufferRange { MTL::Buffer* buffer; size_t offset; }; -// TODO: use one big buffer for all the restrided vertex buffers? -struct MetalRestrideInfo -{ - bool memoryInvalidated = true; - size_t lastStride = 0; - MetalBufferAllocation allocation{}; -}; - struct MetalVertexBufferRange { size_t offset = INVALID_OFFSET; @@ -84,7 +230,7 @@ class MetalVertexBufferCache public: friend class MetalMemoryManager; - MetalVertexBufferCache(class MetalRenderer* metalRenderer, MetalBufferAllocator* bufferAllocator) : m_mtlr{metalRenderer}, m_bufferAllocator{bufferAllocator} {} + MetalVertexBufferCache(class MetalRenderer* metalRenderer, MetalDefaultBufferAllocator& bufferAllocator) : m_mtlr{metalRenderer}, m_bufferAllocator{bufferAllocator} {} ~MetalVertexBufferCache(); void SetRestrideBufferPipeline(class MetalHybridComputePipeline* restrideBufferPipeline) @@ -100,6 +246,8 @@ public: void UntrackVertexBuffer(uint32 bufferIndex) { auto& range = m_bufferRanges[bufferIndex]; + //if (range.restrideInfo->allocation.offset != INVALID_OFFSET) + // m_bufferAllocator.FreeAllocation(range.restrideInfo->allocation); range.offset = INVALID_OFFSET; } @@ -107,7 +255,7 @@ public: private: class MetalRenderer* m_mtlr; - MetalBufferAllocator* m_bufferAllocator; + MetalDefaultBufferAllocator& m_bufferAllocator; class MetalHybridComputePipeline* m_restrideBufferPipeline = nullptr; @@ -119,7 +267,7 @@ private: class MetalMemoryManager { public: - MetalMemoryManager(class MetalRenderer* metalRenderer) : m_mtlr{metalRenderer}, m_bufferAllocator(metalRenderer), m_vertexBufferCache(metalRenderer, &m_bufferAllocator) {} + MetalMemoryManager(class MetalRenderer* metalRenderer) : m_mtlr{metalRenderer}, m_bufferAllocator(metalRenderer), m_framePersistentBufferAllocator(metalRenderer), m_tempBufferAllocator(metalRenderer), m_vertexBufferCache(metalRenderer, m_framePersistentBufferAllocator) {} ~MetalMemoryManager(); // Pipelines @@ -128,25 +276,19 @@ public: m_vertexBufferCache.SetRestrideBufferPipeline(restrideBufferPipeline); } - void ResetTemporaryBuffers() + MetalDefaultBufferAllocator& GetBufferAllocator() { - m_bufferAllocator/*s[m_bufferAllocatorIndex]*/.ResetTemporaryBuffers(); - //m_bufferAllocatorIndex = (m_bufferAllocatorIndex + 1) % 2; + return m_bufferAllocator; } - MTL::Buffer* GetBuffer(uint32 bufferIndex) + MetalDefaultBufferAllocator& GetFramePersistentBufferAllocator() { - //uint32 bufferAllocatorIndex = (bufferIndex >> bufferAllocatorIndexShift); - - return m_bufferAllocator/*s[bufferAllocatorIndex]*/.GetBuffer(bufferIndex); + return m_framePersistentBufferAllocator; } - MetalBufferAllocation GetBufferAllocation(size_t size) + MetalTemporaryBufferAllocator& GetTemporaryBufferAllocator() { - auto allocation = m_bufferAllocator/*s[m_bufferAllocatorIndex]*/.GetBufferAllocation(size); - //allocation.bufferIndex |= (m_bufferAllocatorIndex << bufferAllocatorIndexShift); - - return allocation; + return m_tempBufferAllocator; } MTL::Buffer* GetBufferCache() @@ -182,8 +324,9 @@ private: std::vector m_textureUploadBuffer; - MetalBufferAllocator m_bufferAllocator;//s[2]; - //uint8 m_bufferAllocatorIndex = 0; + MetalDefaultBufferAllocator m_bufferAllocator; + MetalDefaultBufferAllocator m_framePersistentBufferAllocator; + MetalTemporaryBufferAllocator m_tempBufferAllocator; MetalVertexBufferCache m_vertexBufferCache; MTL::Buffer* m_bufferCache = nullptr; diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp index 94d7d916..20123f0d 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp @@ -1,5 +1,6 @@ #include "Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h" #include "Cafe/HW/Latte/Renderer/Metal/MetalLayer.h" +#include "Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.h" #include "Cafe/HW/Latte/Renderer/Metal/LatteTextureMtl.h" #include "Cafe/HW/Latte/Renderer/Metal/LatteTextureViewMtl.h" #include "Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.h" @@ -253,8 +254,8 @@ void MetalRenderer::SwapBuffers(bool swapTV, bool swapDRC) m_commandBuffers[i].m_commandBuffer->release(); m_commandBuffers.clear(); - // Reset temporary buffers - m_memoryManager->ResetTemporaryBuffers(); + // Release frame persistent buffers + m_memoryManager->GetFramePersistentBufferAllocator().ResetAllocations(); } // TODO: use `shader` for drawing @@ -953,7 +954,7 @@ void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32 if (hostIndexType != INDEX_TYPE::NONE) { auto mtlIndexType = GetMtlIndexType(hostIndexType); - MTL::Buffer* indexBuffer = m_memoryManager->GetBuffer(indexBufferIndex); + MTL::Buffer* indexBuffer = m_memoryManager->GetTemporaryBufferAllocator().GetBuffer(indexBufferIndex); renderCommandEncoder->drawIndexedPrimitives(mtlPrimitiveType, hostIndexCount, mtlIndexType, indexBuffer, indexBufferOffset, instanceCount, baseVertex, baseInstance); } else { @@ -983,8 +984,8 @@ void MetalRenderer::draw_endSequence() void* MetalRenderer::indexData_reserveIndexMemory(uint32 size, uint32& offset, uint32& bufferIndex) { - auto allocation = m_memoryManager->GetBufferAllocation(size); - offset = allocation.bufferOffset; + auto allocation = m_memoryManager->GetTemporaryBufferAllocator().GetBufferAllocation(size); + offset = allocation.offset; bufferIndex = allocation.bufferIndex; return allocation.data; @@ -1006,6 +1007,9 @@ MTL::CommandBuffer* MetalRenderer::GetCommandBuffer() MTL::CommandBuffer* mtlCommandBuffer = m_commandQueue->commandBuffer(); m_commandBuffers.push_back({mtlCommandBuffer}); + // Notify memory manager about the new command buffer + m_memoryManager->GetTemporaryBufferAllocator().SetActiveCommandBuffer(mtlCommandBuffer); + return mtlCommandBuffer; } else @@ -1176,6 +1180,10 @@ void MetalRenderer::CommitCommandBuffer() auto& commandBuffer = m_commandBuffers.back(); if (!commandBuffer.m_commited) { + commandBuffer.m_commandBuffer->addCompletedHandler(^(MTL::CommandBuffer* cmd) { + m_memoryManager->GetTemporaryBufferAllocator().CommandBufferFinished(commandBuffer.m_commandBuffer); + }); + commandBuffer.m_commandBuffer->commit(); commandBuffer.m_commited = true; @@ -1483,22 +1491,22 @@ void MetalRenderer::BindStageResources(MTL::RenderCommandEncoder* renderCommandE } */ - // TODO: uncomment - //auto supportBuffer = m_memoryManager->GetBufferAllocation(sizeof(supportBufferData)); - //memcpy(supportBuffer.data, supportBufferData, sizeof(supportBufferData)); + auto& bufferAllocator = m_memoryManager->GetTemporaryBufferAllocator(); + auto supportBuffer = bufferAllocator.GetBufferAllocation(sizeof(supportBufferData)); + memcpy(supportBuffer.data, supportBufferData, sizeof(supportBufferData)); switch (shader->shaderType) { case LatteConst::ShaderType::Vertex: { - //renderCommandEncoder->setVertexBuffer(m_memoryManager->GetBuffer(supportBuffer.bufferIndex), supportBuffer.bufferOffset, MTL_SUPPORT_BUFFER_BINDING); - renderCommandEncoder->setVertexBytes(supportBufferData, sizeof(supportBufferData), MTL_SUPPORT_BUFFER_BINDING); + renderCommandEncoder->setVertexBuffer(bufferAllocator.GetBuffer(supportBuffer.bufferIndex), supportBuffer.offset, MTL_SUPPORT_BUFFER_BINDING); + //renderCommandEncoder->setVertexBytes(supportBufferData, sizeof(supportBufferData), MTL_SUPPORT_BUFFER_BINDING); break; } case LatteConst::ShaderType::Pixel: { - //renderCommandEncoder->setFragmentBuffer(m_memoryManager->GetBuffer(supportBuffer.bufferIndex), supportBuffer.bufferOffset, MTL_SUPPORT_BUFFER_BINDING); - renderCommandEncoder->setFragmentBytes(supportBufferData, sizeof(supportBufferData), MTL_SUPPORT_BUFFER_BINDING); + renderCommandEncoder->setFragmentBuffer(bufferAllocator.GetBuffer(supportBuffer.bufferIndex), supportBuffer.offset, MTL_SUPPORT_BUFFER_BINDING); + //renderCommandEncoder->setFragmentBytes(supportBufferData, sizeof(supportBufferData), MTL_SUPPORT_BUFFER_BINDING); break; } default: diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h index 664e8815..8d63f6da 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h @@ -6,8 +6,27 @@ #include "Cafe/HW/Latte/Renderer/Renderer.h" -#include "Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.h" -#include "Metal/MTLRenderCommandEncoder.hpp" +#include "Cafe/HW/Latte/Renderer/Metal/MetalCommon.h" + +struct MetalBufferAllocation +{ + void* data; + uint32 bufferIndex; + size_t offset = INVALID_OFFSET; + size_t size; + + bool IsValid() const + { + return offset != INVALID_OFFSET; + } +}; + +struct MetalRestrideInfo +{ + bool memoryInvalidated = true; + size_t lastStride = 0; + MetalBufferAllocation allocation{}; +}; struct MetalBoundBuffer {