From 269e0721394cc9b87b08e301c0c5016b87d0aa8f Mon Sep 17 00:00:00 2001 From: Samuliak Date: Sun, 18 Aug 2024 11:32:24 +0200 Subject: [PATCH] move buffer allocators to separate file & fix: high memory usage --- src/Cafe/CMakeLists.txt | 1 + .../Renderer/Metal/MetalBufferAllocator.h | 210 +++++++++++++++++ .../Latte/Renderer/Metal/MetalMemoryManager.h | 212 +----------------- .../HW/Latte/Renderer/Metal/MetalRenderer.cpp | 5 +- 4 files changed, 215 insertions(+), 213 deletions(-) create mode 100644 src/Cafe/HW/Latte/Renderer/Metal/MetalBufferAllocator.h diff --git a/src/Cafe/CMakeLists.txt b/src/Cafe/CMakeLists.txt index fb4672d2..37bef0e8 100644 --- a/src/Cafe/CMakeLists.txt +++ b/src/Cafe/CMakeLists.txt @@ -554,6 +554,7 @@ if(ENABLE_METAL) HW/Latte/Renderer/Metal/RendererShaderMtl.h HW/Latte/Renderer/Metal/CachedFBOMtl.cpp HW/Latte/Renderer/Metal/CachedFBOMtl.h + HW/Latte/Renderer/Metal/MetalBufferAllocator.h HW/Latte/Renderer/Metal/MetalMemoryManager.cpp HW/Latte/Renderer/Metal/MetalMemoryManager.h HW/Latte/Renderer/Metal/MetalPipelineCache.cpp diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalBufferAllocator.h b/src/Cafe/HW/Latte/Renderer/Metal/MetalBufferAllocator.h new file mode 100644 index 00000000..20467e65 --- /dev/null +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalBufferAllocator.h @@ -0,0 +1,210 @@ +#pragma once + +#include "Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h" + +struct MetalBufferRange +{ + uint32 bufferIndex; + size_t offset; + size_t size; +}; + +template +class MetalBufferAllocator +{ +public: + MetalBufferAllocator(class MetalRenderer* metalRenderer) : m_mtlr{metalRenderer} {} + + ~MetalBufferAllocator() + { + for (auto buffer : m_buffers) + { + buffer.m_buffer->release(); + } + } + + void ResetAllocations() + { + m_freeBufferRanges.clear(); + for (uint32_t i = 0; i < m_buffers.size(); i++) + m_freeBufferRanges.push_back({i, 0, m_buffers[i].m_buffer->length()}); + } + + MTL::Buffer* GetBuffer(uint32 bufferIndex) + { + return m_buffers[bufferIndex].m_buffer; + } + + MetalBufferAllocation GetBufferAllocation(size_t size) + { + // Align the size + size = Align(size, 16); + + // First, try to find a free range + for (uint32 i = 0; i < m_freeBufferRanges.size(); i++) + { + auto& range = m_freeBufferRanges[i]; + if (size <= range.size) + { + auto& buffer = m_buffers[range.bufferIndex]; + + MetalBufferAllocation allocation; + allocation.bufferIndex = range.bufferIndex; + allocation.offset = range.offset; + allocation.size = size; + allocation.data = (uint8*)buffer.m_buffer->contents() + range.offset; + + range.offset += size; + range.size -= size; + + if (range.size == 0) + { + m_freeBufferRanges.erase(m_freeBufferRanges.begin() + i); + } + + return allocation; + } + } + + // If no free range was found, allocate a new buffer + m_allocationSize = std::max(m_allocationSize, size); + MTL::Buffer* buffer = m_mtlr->GetDevice()->newBuffer(m_allocationSize, MTL::ResourceStorageModeShared); + #ifdef CEMU_DEBUG_ASSERT + buffer->setLabel(GetLabel("Buffer from buffer allocator", buffer)); + #endif + + MetalBufferAllocation allocation; + allocation.bufferIndex = m_buffers.size(); + allocation.offset = 0; + allocation.size = size; + allocation.data = buffer->contents(); + + m_buffers.push_back({buffer}); + + // If the buffer is larger than the requested size, add the remaining space to the free buffer ranges + if (size < m_allocationSize) + { + MetalBufferRange range; + range.bufferIndex = allocation.bufferIndex; + range.offset = size; + range.size = m_allocationSize - size; + + m_freeBufferRanges.push_back(range); + } + + // Increase the allocation size for the next buffer + if (m_allocationSize < 128 * 1024 * 1024) + m_allocationSize *= 2; + + return allocation; + } + + void FreeAllocation(MetalBufferAllocation& allocation) + { + MetalBufferRange range; + range.bufferIndex = allocation.bufferIndex; + range.offset = allocation.offset; + range.size = allocation.size; + + allocation.offset = INVALID_OFFSET; + + // Find the correct position to insert the free range + for (uint32 i = 0; i < m_freeBufferRanges.size(); i++) + { + auto& freeRange = m_freeBufferRanges[i]; + if (freeRange.bufferIndex == range.bufferIndex && freeRange.offset + freeRange.size == range.offset) + { + freeRange.size += range.size; + return; + } + } + + m_freeBufferRanges.push_back(range); + } + +protected: + class MetalRenderer* m_mtlr; + + size_t m_allocationSize = 8 * 1024 * 1024; + + std::vector m_buffers; + std::vector m_freeBufferRanges; +}; + +struct MetalBuffer +{ + MTL::Buffer* m_buffer; +}; + +typedef MetalBufferAllocator MetalDefaultBufferAllocator; + +struct MetalSyncedBuffer +{ + MTL::Buffer* m_buffer; + std::vector m_commandBuffers; +}; + +class MetalTemporaryBufferAllocator : public MetalBufferAllocator +{ +public: + MetalTemporaryBufferAllocator(class MetalRenderer* metalRenderer) : MetalBufferAllocator(metalRenderer) {} + + void SetActiveCommandBuffer(MTL::CommandBuffer* commandBuffer) + { + m_activeCommandBuffer = commandBuffer; + } + + void CommandBufferFinished(MTL::CommandBuffer* commandBuffer) + { + for (uint32_t i = 0; i < m_buffers.size(); i++) + { + auto& buffer = m_buffers[i]; + for (uint32_t j = 0; j < buffer.m_commandBuffers.size(); j++) + { + if (commandBuffer == buffer.m_commandBuffers[j]) + { + if (buffer.m_commandBuffers.size() == 1) + { + // All command buffers using it have finished execution, we can use it again + m_freeBufferRanges.push_back({i, 0, buffer.m_buffer->length()}); + + buffer.m_commandBuffers.clear(); + } + else + { + buffer.m_commandBuffers.erase(buffer.m_commandBuffers.begin() + j); + } + break; + } + } + } + } + + // TODO: should this be here? It's just to ensure safety + MTL::Buffer* GetBuffer(uint32 bufferIndex) + { + auto& buffer = m_buffers[bufferIndex]; + if (buffer.m_commandBuffers.back() != m_activeCommandBuffer) + buffer.m_commandBuffers.push_back(m_activeCommandBuffer); + + return buffer.m_buffer; + } + + MetalBufferAllocation GetBufferAllocation(size_t size) + { + // TODO: remove this + if (!m_activeCommandBuffer) + throw std::runtime_error("No active command buffer when allocating a buffer!"); + + auto allocation = MetalBufferAllocator::GetBufferAllocation(size); + + auto& buffer = m_buffers[allocation.bufferIndex]; + if (buffer.m_commandBuffers.empty() || buffer.m_commandBuffers.back() != m_activeCommandBuffer) + buffer.m_commandBuffers.push_back(m_activeCommandBuffer); + + return allocation; + } + +private: + MTL::CommandBuffer* m_activeCommandBuffer = nullptr; +}; diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.h b/src/Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.h index ea8b7554..cc89f5ce 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.h +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.h @@ -1,216 +1,6 @@ #pragma once -#include "Cafe/HW/Latte/ISA/LatteReg.h" -#include "Cafe/HW/Latte/Core/LatteConst.h" -#include "Cafe/HW/Latte/Renderer/Metal/MetalCommon.h" -#include "Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h" - -struct MetalBufferRange -{ - uint32 bufferIndex; - size_t offset; - size_t size; -}; - -template -class MetalBufferAllocator -{ -public: - MetalBufferAllocator(class MetalRenderer* metalRenderer) : m_mtlr{metalRenderer} {} - - ~MetalBufferAllocator() - { - for (auto buffer : m_buffers) - { - buffer.m_buffer->release(); - } - } - - void ResetAllocations() - { - m_freeBufferRanges.clear(); - for (uint32_t i = 0; i < m_buffers.size(); i++) - m_freeBufferRanges.push_back({i, 0, m_buffers[i].m_buffer->length()}); - } - - MTL::Buffer* GetBuffer(uint32 bufferIndex) - { - return m_buffers[bufferIndex].m_buffer; - } - - MetalBufferAllocation GetBufferAllocation(size_t size) - { - // Align the size - size = Align(size, 16); - - // First, try to find a free range - for (uint32 i = 0; i < m_freeBufferRanges.size(); i++) - { - auto& range = m_freeBufferRanges[i]; - if (size <= range.size) - { - auto& buffer = m_buffers[range.bufferIndex]; - - MetalBufferAllocation allocation; - allocation.bufferIndex = range.bufferIndex; - allocation.offset = range.offset; - allocation.size = size; - allocation.data = (uint8*)buffer.m_buffer->contents() + range.offset; - - range.offset += size; - range.size -= size; - - if (range.size == 0) - { - m_freeBufferRanges.erase(m_freeBufferRanges.begin() + i); - } - - return allocation; - } - } - - // If no free range was found, allocate a new buffer - m_allocationSize = std::max(m_allocationSize, size); - MTL::Buffer* buffer = m_mtlr->GetDevice()->newBuffer(m_allocationSize, MTL::ResourceStorageModeShared); - #ifdef CEMU_DEBUG_ASSERT - buffer->setLabel(GetLabel("Buffer from buffer allocator", buffer)); - #endif - - MetalBufferAllocation allocation; - allocation.bufferIndex = m_buffers.size(); - allocation.offset = 0; - allocation.size = size; - allocation.data = buffer->contents(); - - m_buffers.push_back({buffer}); - - // If the buffer is larger than the requested size, add the remaining space to the free buffer ranges - if (size < m_allocationSize) - { - MetalBufferRange range; - range.bufferIndex = allocation.bufferIndex; - range.offset = size; - range.size = m_allocationSize - size; - - m_freeBufferRanges.push_back(range); - } - - // Increase the allocation size for the next buffer - if (m_allocationSize < 128 * 1024 * 1024) - m_allocationSize *= 2; - - return allocation; - } - - void FreeAllocation(MetalBufferAllocation& allocation) - { - MetalBufferRange range; - range.bufferIndex = allocation.bufferIndex; - range.offset = allocation.offset; - range.size = allocation.size; - - allocation.offset = INVALID_OFFSET; - - // Find the correct position to insert the free range - for (uint32 i = 0; i < m_freeBufferRanges.size(); i++) - { - auto& freeRange = m_freeBufferRanges[i]; - if (freeRange.bufferIndex == range.bufferIndex && freeRange.offset + freeRange.size == range.offset) - { - freeRange.size += range.size; - return; - } - } - - m_freeBufferRanges.push_back(range); - } - -protected: - class MetalRenderer* m_mtlr; - - size_t m_allocationSize = 8 * 1024 * 1024; - - std::vector m_buffers; - std::vector m_freeBufferRanges; -}; - -struct MetalBuffer -{ - MTL::Buffer* m_buffer; -}; - -typedef MetalBufferAllocator MetalDefaultBufferAllocator; - -struct MetalSyncedBuffer -{ - MTL::Buffer* m_buffer; - std::vector m_commandBuffers; -}; - -class MetalTemporaryBufferAllocator : public MetalBufferAllocator -{ -public: - MetalTemporaryBufferAllocator(class MetalRenderer* metalRenderer) : MetalBufferAllocator(metalRenderer) {} - - void SetActiveCommandBuffer(MTL::CommandBuffer* commandBuffer) - { - m_activeCommandBuffer = commandBuffer; - } - - void CommandBufferFinished(MTL::CommandBuffer* commandBuffer) - { - for (uint32_t i = 0; i < m_buffers.size(); i++) - { - auto& buffer = m_buffers[i]; - for (uint32_t j = 0; j < buffer.m_commandBuffers.size(); j++) - { - if (commandBuffer == buffer.m_commandBuffers[j]) - { - if (buffer.m_commandBuffers.size() == 1) - { - // All command buffers using it have finished execution, we can use it again - m_freeBufferRanges.push_back({i, 0, buffer.m_buffer->length()}); - - buffer.m_commandBuffers.clear(); - } - else - { - buffer.m_commandBuffers.erase(buffer.m_commandBuffers.begin() + j); - } - break; - } - } - } - } - - // TODO: should this be here? It's just to ensure safety - MTL::Buffer* GetBuffer(uint32 bufferIndex) - { - auto& buffer = m_buffers[bufferIndex]; - if (buffer.m_commandBuffers.back() != m_activeCommandBuffer) - buffer.m_commandBuffers.push_back(m_activeCommandBuffer); - - return buffer.m_buffer; - } - - MetalBufferAllocation GetBufferAllocation(size_t size) - { - // TODO: remove this - if (!m_activeCommandBuffer) - throw std::runtime_error("No active command buffer when allocating a buffer!"); - - auto allocation = MetalBufferAllocator::GetBufferAllocation(size); - - auto& buffer = m_buffers[allocation.bufferIndex]; - if (buffer.m_commandBuffers.empty() || buffer.m_commandBuffers.back() != m_activeCommandBuffer) - buffer.m_commandBuffers.push_back(m_activeCommandBuffer); - - return allocation; - } - -private: - MTL::CommandBuffer* m_activeCommandBuffer = nullptr; -}; +#include "Cafe/HW/Latte/Renderer/Metal/MetalBufferAllocator.h" struct MetalRestridedBufferRange { diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp index 20123f0d..46b5cfd0 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp @@ -1492,8 +1492,9 @@ void MetalRenderer::BindStageResources(MTL::RenderCommandEncoder* renderCommandE */ auto& bufferAllocator = m_memoryManager->GetTemporaryBufferAllocator(); - auto supportBuffer = bufferAllocator.GetBufferAllocation(sizeof(supportBufferData)); - memcpy(supportBuffer.data, supportBufferData, sizeof(supportBufferData)); + size_t size = shader->uniform.uniformRangeSize; + auto supportBuffer = bufferAllocator.GetBufferAllocation(size); + memcpy(supportBuffer.data, supportBufferData, size); switch (shader->shaderType) {