rework buffer allocator

This commit is contained in:
Samuliak 2024-08-18 11:13:49 +02:00
parent 07989d828c
commit 265785772a
4 changed files with 244 additions and 139 deletions

View File

@ -1,77 +1,8 @@
#include "Cafe/HW/Latte/Renderer/Metal/MetalCommon.h"
#include "Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.h"
#include "Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h"
#include "Cafe/HW/Latte/Renderer/Metal/MetalHybridComputePipeline.h"
#include "Common/precompiled.h"
MetalBufferAllocator::~MetalBufferAllocator()
{
for (auto buffer : m_buffers)
{
buffer->release();
}
}
MetalBufferAllocation MetalBufferAllocator::GetBufferAllocation(size_t size)
{
// Align the size
size = Align(size, 16);
// First, try to find a free range
for (uint32 i = 0; i < m_freeBufferRanges.size(); i++)
{
auto& range = m_freeBufferRanges[i];
if (size <= range.size)
{
MetalBufferAllocation allocation;
allocation.bufferIndex = range.bufferIndex;
allocation.bufferOffset = range.offset;
allocation.data = (uint8*)m_buffers[range.bufferIndex]->contents() + range.offset;
range.offset += size;
range.size -= size;
if (range.size == 0)
{
m_freeBufferRanges.erase(m_freeBufferRanges.begin() + i);
}
return allocation;
}
}
// If no free range was found, allocate a new buffer
m_allocationSize = std::max(m_allocationSize, size);
MTL::Buffer* buffer = m_mtlr->GetDevice()->newBuffer(m_allocationSize, MTL::ResourceStorageModeShared);
#ifdef CEMU_DEBUG_ASSERT
buffer->setLabel(GetLabel("Buffer from buffer allocator", buffer));
#endif
MetalBufferAllocation allocation;
allocation.bufferIndex = m_buffers.size();
allocation.bufferOffset = 0;
allocation.data = buffer->contents();
m_buffers.push_back(buffer);
// If the buffer is larger than the requested size, add the remaining space to the free buffer ranges
if (size < m_allocationSize)
{
MetalBufferRange range;
range.bufferIndex = allocation.bufferIndex;
range.offset = size;
range.size = m_allocationSize - size;
m_freeBufferRanges.push_back(range);
}
// Increase the allocation size for the next buffer
if (m_allocationSize < 128 * 1024 * 1024)
m_allocationSize *= 2;
return allocation;
}
MetalVertexBufferCache::~MetalVertexBufferCache()
{
}
@ -87,13 +18,13 @@ MetalRestridedBufferRange MetalVertexBufferCache::RestrideBufferIfNeeded(MTL::Bu
return {bufferCache, vertexBufferRange.offset};
}
auto buffer = m_bufferAllocator->GetBuffer(restrideInfo.allocation.bufferIndex);
MTL::Buffer* buffer;
if (restrideInfo.memoryInvalidated || stride != restrideInfo.lastStride)
{
size_t newStride = Align(stride, 4);
size_t newSize = vertexBufferRange.size / stride * newStride;
restrideInfo.allocation = m_bufferAllocator->GetBufferAllocation(newSize);
buffer = m_bufferAllocator->GetBuffer(restrideInfo.allocation.bufferIndex);
restrideInfo.allocation = m_bufferAllocator.GetBufferAllocation(newSize);
buffer = m_bufferAllocator.GetBuffer(restrideInfo.allocation.bufferIndex);
//uint8* oldPtr = (uint8*)bufferCache->contents() + vertexBufferRange.offset;
//uint8* newPtr = (uint8*)buffer->contents() + restrideInfo.allocation.bufferOffset;
@ -112,7 +43,7 @@ MetalRestridedBufferRange MetalVertexBufferCache::RestrideBufferIfNeeded(MTL::Bu
m_mtlr->GetEncoderState().m_renderPipelineState = m_restrideBufferPipeline->GetRenderPipelineState();
MTL::Buffer* buffers[] = {bufferCache, buffer};
size_t offsets[] = {vertexBufferRange.offset, restrideInfo.allocation.bufferOffset};
size_t offsets[] = {vertexBufferRange.offset, restrideInfo.allocation.offset};
renderCommandEncoder->setVertexBuffers(buffers, offsets, NS::Range(GET_HELPER_BUFFER_BINDING(0), 2));
m_mtlr->GetEncoderState().m_uniformBufferOffsets[METAL_SHADER_TYPE_VERTEX][GET_HELPER_BUFFER_BINDING(0)] = INVALID_OFFSET;
m_mtlr->GetEncoderState().m_uniformBufferOffsets[METAL_SHADER_TYPE_VERTEX][GET_HELPER_BUFFER_BINDING(1)] = INVALID_OFFSET;
@ -149,8 +80,12 @@ MetalRestridedBufferRange MetalVertexBufferCache::RestrideBufferIfNeeded(MTL::Bu
restrideInfo.memoryInvalidated = false;
restrideInfo.lastStride = newStride;
}
else
{
buffer = m_bufferAllocator.GetBuffer(restrideInfo.allocation.bufferIndex);
}
return {buffer, restrideInfo.allocation.bufferOffset};
return {buffer, restrideInfo.allocation.offset};
}
void MetalVertexBufferCache::MemoryRangeChanged(size_t offset, size_t size)

View File

@ -3,20 +3,7 @@
#include "Cafe/HW/Latte/ISA/LatteReg.h"
#include "Cafe/HW/Latte/Core/LatteConst.h"
#include "Cafe/HW/Latte/Renderer/Metal/MetalCommon.h"
//const uint32 bufferAllocatorIndexShift = 24;
struct MetalBufferAllocation
{
void* data;
uint32 bufferIndex;
size_t bufferOffset = INVALID_OFFSET;
bool IsValid() const
{
return bufferOffset != INVALID_OFFSET;
}
};
#include "Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h"
struct MetalBufferRange
{
@ -25,53 +12,212 @@ struct MetalBufferRange
size_t size;
};
template<typename BufferT>
class MetalBufferAllocator
{
public:
MetalBufferAllocator(class MetalRenderer* metalRenderer) : m_mtlr{metalRenderer} {}
~MetalBufferAllocator();
void ResetTemporaryBuffers()
~MetalBufferAllocator()
{
for (auto buffer : m_buffers)
{
buffer.m_buffer->release();
}
}
void ResetAllocations()
{
m_freeBufferRanges.clear();
// Register the free ranges
for (uint32 i = 0; i < m_buffers.size(); i++)
{
m_freeBufferRanges.push_back({i, 0, m_buffers[i]->length()});
}
for (uint32_t i = 0; i < m_buffers.size(); i++)
m_freeBufferRanges.push_back({i, 0, m_buffers[i].m_buffer->length()});
}
MTL::Buffer* GetBuffer(uint32 bufferIndex)
{
return m_buffers[bufferIndex];
return m_buffers[bufferIndex].m_buffer;
}
MetalBufferAllocation GetBufferAllocation(size_t size);
MetalBufferAllocation GetBufferAllocation(size_t size)
{
// Align the size
size = Align(size, 16);
private:
// First, try to find a free range
for (uint32 i = 0; i < m_freeBufferRanges.size(); i++)
{
auto& range = m_freeBufferRanges[i];
if (size <= range.size)
{
auto& buffer = m_buffers[range.bufferIndex];
MetalBufferAllocation allocation;
allocation.bufferIndex = range.bufferIndex;
allocation.offset = range.offset;
allocation.size = size;
allocation.data = (uint8*)buffer.m_buffer->contents() + range.offset;
range.offset += size;
range.size -= size;
if (range.size == 0)
{
m_freeBufferRanges.erase(m_freeBufferRanges.begin() + i);
}
return allocation;
}
}
// If no free range was found, allocate a new buffer
m_allocationSize = std::max(m_allocationSize, size);
MTL::Buffer* buffer = m_mtlr->GetDevice()->newBuffer(m_allocationSize, MTL::ResourceStorageModeShared);
#ifdef CEMU_DEBUG_ASSERT
buffer->setLabel(GetLabel("Buffer from buffer allocator", buffer));
#endif
MetalBufferAllocation allocation;
allocation.bufferIndex = m_buffers.size();
allocation.offset = 0;
allocation.size = size;
allocation.data = buffer->contents();
m_buffers.push_back({buffer});
// If the buffer is larger than the requested size, add the remaining space to the free buffer ranges
if (size < m_allocationSize)
{
MetalBufferRange range;
range.bufferIndex = allocation.bufferIndex;
range.offset = size;
range.size = m_allocationSize - size;
m_freeBufferRanges.push_back(range);
}
// Increase the allocation size for the next buffer
if (m_allocationSize < 128 * 1024 * 1024)
m_allocationSize *= 2;
return allocation;
}
void FreeAllocation(MetalBufferAllocation& allocation)
{
MetalBufferRange range;
range.bufferIndex = allocation.bufferIndex;
range.offset = allocation.offset;
range.size = allocation.size;
allocation.offset = INVALID_OFFSET;
// Find the correct position to insert the free range
for (uint32 i = 0; i < m_freeBufferRanges.size(); i++)
{
auto& freeRange = m_freeBufferRanges[i];
if (freeRange.bufferIndex == range.bufferIndex && freeRange.offset + freeRange.size == range.offset)
{
freeRange.size += range.size;
return;
}
}
m_freeBufferRanges.push_back(range);
}
protected:
class MetalRenderer* m_mtlr;
size_t m_allocationSize = 8 * 1024 * 1024;
std::vector<MTL::Buffer*> m_buffers;
std::vector<BufferT> m_buffers;
std::vector<MetalBufferRange> m_freeBufferRanges;
};
struct MetalBuffer
{
MTL::Buffer* m_buffer;
};
typedef MetalBufferAllocator<MetalBuffer> MetalDefaultBufferAllocator;
struct MetalSyncedBuffer
{
MTL::Buffer* m_buffer;
std::vector<MTL::CommandBuffer*> m_commandBuffers;
};
class MetalTemporaryBufferAllocator : public MetalBufferAllocator<MetalSyncedBuffer>
{
public:
MetalTemporaryBufferAllocator(class MetalRenderer* metalRenderer) : MetalBufferAllocator<MetalSyncedBuffer>(metalRenderer) {}
void SetActiveCommandBuffer(MTL::CommandBuffer* commandBuffer)
{
m_activeCommandBuffer = commandBuffer;
}
void CommandBufferFinished(MTL::CommandBuffer* commandBuffer)
{
for (uint32_t i = 0; i < m_buffers.size(); i++)
{
auto& buffer = m_buffers[i];
for (uint32_t j = 0; j < buffer.m_commandBuffers.size(); j++)
{
if (commandBuffer == buffer.m_commandBuffers[j])
{
if (buffer.m_commandBuffers.size() == 1)
{
// All command buffers using it have finished execution, we can use it again
m_freeBufferRanges.push_back({i, 0, buffer.m_buffer->length()});
buffer.m_commandBuffers.clear();
}
else
{
buffer.m_commandBuffers.erase(buffer.m_commandBuffers.begin() + j);
}
break;
}
}
}
}
// TODO: should this be here? It's just to ensure safety
MTL::Buffer* GetBuffer(uint32 bufferIndex)
{
auto& buffer = m_buffers[bufferIndex];
if (buffer.m_commandBuffers.back() != m_activeCommandBuffer)
buffer.m_commandBuffers.push_back(m_activeCommandBuffer);
return buffer.m_buffer;
}
MetalBufferAllocation GetBufferAllocation(size_t size)
{
// TODO: remove this
if (!m_activeCommandBuffer)
throw std::runtime_error("No active command buffer when allocating a buffer!");
auto allocation = MetalBufferAllocator<MetalSyncedBuffer>::GetBufferAllocation(size);
auto& buffer = m_buffers[allocation.bufferIndex];
if (buffer.m_commandBuffers.empty() || buffer.m_commandBuffers.back() != m_activeCommandBuffer)
buffer.m_commandBuffers.push_back(m_activeCommandBuffer);
return allocation;
}
private:
MTL::CommandBuffer* m_activeCommandBuffer = nullptr;
};
struct MetalRestridedBufferRange
{
MTL::Buffer* buffer;
size_t offset;
};
// TODO: use one big buffer for all the restrided vertex buffers?
struct MetalRestrideInfo
{
bool memoryInvalidated = true;
size_t lastStride = 0;
MetalBufferAllocation allocation{};
};
struct MetalVertexBufferRange
{
size_t offset = INVALID_OFFSET;
@ -84,7 +230,7 @@ class MetalVertexBufferCache
public:
friend class MetalMemoryManager;
MetalVertexBufferCache(class MetalRenderer* metalRenderer, MetalBufferAllocator* bufferAllocator) : m_mtlr{metalRenderer}, m_bufferAllocator{bufferAllocator} {}
MetalVertexBufferCache(class MetalRenderer* metalRenderer, MetalDefaultBufferAllocator& bufferAllocator) : m_mtlr{metalRenderer}, m_bufferAllocator{bufferAllocator} {}
~MetalVertexBufferCache();
void SetRestrideBufferPipeline(class MetalHybridComputePipeline* restrideBufferPipeline)
@ -100,6 +246,8 @@ public:
void UntrackVertexBuffer(uint32 bufferIndex)
{
auto& range = m_bufferRanges[bufferIndex];
//if (range.restrideInfo->allocation.offset != INVALID_OFFSET)
// m_bufferAllocator.FreeAllocation(range.restrideInfo->allocation);
range.offset = INVALID_OFFSET;
}
@ -107,7 +255,7 @@ public:
private:
class MetalRenderer* m_mtlr;
MetalBufferAllocator* m_bufferAllocator;
MetalDefaultBufferAllocator& m_bufferAllocator;
class MetalHybridComputePipeline* m_restrideBufferPipeline = nullptr;
@ -119,7 +267,7 @@ private:
class MetalMemoryManager
{
public:
MetalMemoryManager(class MetalRenderer* metalRenderer) : m_mtlr{metalRenderer}, m_bufferAllocator(metalRenderer), m_vertexBufferCache(metalRenderer, &m_bufferAllocator) {}
MetalMemoryManager(class MetalRenderer* metalRenderer) : m_mtlr{metalRenderer}, m_bufferAllocator(metalRenderer), m_framePersistentBufferAllocator(metalRenderer), m_tempBufferAllocator(metalRenderer), m_vertexBufferCache(metalRenderer, m_framePersistentBufferAllocator) {}
~MetalMemoryManager();
// Pipelines
@ -128,25 +276,19 @@ public:
m_vertexBufferCache.SetRestrideBufferPipeline(restrideBufferPipeline);
}
void ResetTemporaryBuffers()
MetalDefaultBufferAllocator& GetBufferAllocator()
{
m_bufferAllocator/*s[m_bufferAllocatorIndex]*/.ResetTemporaryBuffers();
//m_bufferAllocatorIndex = (m_bufferAllocatorIndex + 1) % 2;
return m_bufferAllocator;
}
MTL::Buffer* GetBuffer(uint32 bufferIndex)
MetalDefaultBufferAllocator& GetFramePersistentBufferAllocator()
{
//uint32 bufferAllocatorIndex = (bufferIndex >> bufferAllocatorIndexShift);
return m_bufferAllocator/*s[bufferAllocatorIndex]*/.GetBuffer(bufferIndex);
return m_framePersistentBufferAllocator;
}
MetalBufferAllocation GetBufferAllocation(size_t size)
MetalTemporaryBufferAllocator& GetTemporaryBufferAllocator()
{
auto allocation = m_bufferAllocator/*s[m_bufferAllocatorIndex]*/.GetBufferAllocation(size);
//allocation.bufferIndex |= (m_bufferAllocatorIndex << bufferAllocatorIndexShift);
return allocation;
return m_tempBufferAllocator;
}
MTL::Buffer* GetBufferCache()
@ -182,8 +324,9 @@ private:
std::vector<uint8> m_textureUploadBuffer;
MetalBufferAllocator m_bufferAllocator;//s[2];
//uint8 m_bufferAllocatorIndex = 0;
MetalDefaultBufferAllocator m_bufferAllocator;
MetalDefaultBufferAllocator m_framePersistentBufferAllocator;
MetalTemporaryBufferAllocator m_tempBufferAllocator;
MetalVertexBufferCache m_vertexBufferCache;
MTL::Buffer* m_bufferCache = nullptr;

View File

@ -1,5 +1,6 @@
#include "Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h"
#include "Cafe/HW/Latte/Renderer/Metal/MetalLayer.h"
#include "Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.h"
#include "Cafe/HW/Latte/Renderer/Metal/LatteTextureMtl.h"
#include "Cafe/HW/Latte/Renderer/Metal/LatteTextureViewMtl.h"
#include "Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.h"
@ -253,8 +254,8 @@ void MetalRenderer::SwapBuffers(bool swapTV, bool swapDRC)
m_commandBuffers[i].m_commandBuffer->release();
m_commandBuffers.clear();
// Reset temporary buffers
m_memoryManager->ResetTemporaryBuffers();
// Release frame persistent buffers
m_memoryManager->GetFramePersistentBufferAllocator().ResetAllocations();
}
// TODO: use `shader` for drawing
@ -953,7 +954,7 @@ void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32
if (hostIndexType != INDEX_TYPE::NONE)
{
auto mtlIndexType = GetMtlIndexType(hostIndexType);
MTL::Buffer* indexBuffer = m_memoryManager->GetBuffer(indexBufferIndex);
MTL::Buffer* indexBuffer = m_memoryManager->GetTemporaryBufferAllocator().GetBuffer(indexBufferIndex);
renderCommandEncoder->drawIndexedPrimitives(mtlPrimitiveType, hostIndexCount, mtlIndexType, indexBuffer, indexBufferOffset, instanceCount, baseVertex, baseInstance);
} else
{
@ -983,8 +984,8 @@ void MetalRenderer::draw_endSequence()
void* MetalRenderer::indexData_reserveIndexMemory(uint32 size, uint32& offset, uint32& bufferIndex)
{
auto allocation = m_memoryManager->GetBufferAllocation(size);
offset = allocation.bufferOffset;
auto allocation = m_memoryManager->GetTemporaryBufferAllocator().GetBufferAllocation(size);
offset = allocation.offset;
bufferIndex = allocation.bufferIndex;
return allocation.data;
@ -1006,6 +1007,9 @@ MTL::CommandBuffer* MetalRenderer::GetCommandBuffer()
MTL::CommandBuffer* mtlCommandBuffer = m_commandQueue->commandBuffer();
m_commandBuffers.push_back({mtlCommandBuffer});
// Notify memory manager about the new command buffer
m_memoryManager->GetTemporaryBufferAllocator().SetActiveCommandBuffer(mtlCommandBuffer);
return mtlCommandBuffer;
}
else
@ -1176,6 +1180,10 @@ void MetalRenderer::CommitCommandBuffer()
auto& commandBuffer = m_commandBuffers.back();
if (!commandBuffer.m_commited)
{
commandBuffer.m_commandBuffer->addCompletedHandler(^(MTL::CommandBuffer* cmd) {
m_memoryManager->GetTemporaryBufferAllocator().CommandBufferFinished(commandBuffer.m_commandBuffer);
});
commandBuffer.m_commandBuffer->commit();
commandBuffer.m_commited = true;
@ -1483,22 +1491,22 @@ void MetalRenderer::BindStageResources(MTL::RenderCommandEncoder* renderCommandE
}
*/
// TODO: uncomment
//auto supportBuffer = m_memoryManager->GetBufferAllocation(sizeof(supportBufferData));
//memcpy(supportBuffer.data, supportBufferData, sizeof(supportBufferData));
auto& bufferAllocator = m_memoryManager->GetTemporaryBufferAllocator();
auto supportBuffer = bufferAllocator.GetBufferAllocation(sizeof(supportBufferData));
memcpy(supportBuffer.data, supportBufferData, sizeof(supportBufferData));
switch (shader->shaderType)
{
case LatteConst::ShaderType::Vertex:
{
//renderCommandEncoder->setVertexBuffer(m_memoryManager->GetBuffer(supportBuffer.bufferIndex), supportBuffer.bufferOffset, MTL_SUPPORT_BUFFER_BINDING);
renderCommandEncoder->setVertexBytes(supportBufferData, sizeof(supportBufferData), MTL_SUPPORT_BUFFER_BINDING);
renderCommandEncoder->setVertexBuffer(bufferAllocator.GetBuffer(supportBuffer.bufferIndex), supportBuffer.offset, MTL_SUPPORT_BUFFER_BINDING);
//renderCommandEncoder->setVertexBytes(supportBufferData, sizeof(supportBufferData), MTL_SUPPORT_BUFFER_BINDING);
break;
}
case LatteConst::ShaderType::Pixel:
{
//renderCommandEncoder->setFragmentBuffer(m_memoryManager->GetBuffer(supportBuffer.bufferIndex), supportBuffer.bufferOffset, MTL_SUPPORT_BUFFER_BINDING);
renderCommandEncoder->setFragmentBytes(supportBufferData, sizeof(supportBufferData), MTL_SUPPORT_BUFFER_BINDING);
renderCommandEncoder->setFragmentBuffer(bufferAllocator.GetBuffer(supportBuffer.bufferIndex), supportBuffer.offset, MTL_SUPPORT_BUFFER_BINDING);
//renderCommandEncoder->setFragmentBytes(supportBufferData, sizeof(supportBufferData), MTL_SUPPORT_BUFFER_BINDING);
break;
}
default:

View File

@ -6,8 +6,27 @@
#include "Cafe/HW/Latte/Renderer/Renderer.h"
#include "Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.h"
#include "Metal/MTLRenderCommandEncoder.hpp"
#include "Cafe/HW/Latte/Renderer/Metal/MetalCommon.h"
struct MetalBufferAllocation
{
void* data;
uint32 bufferIndex;
size_t offset = INVALID_OFFSET;
size_t size;
bool IsValid() const
{
return offset != INVALID_OFFSET;
}
};
struct MetalRestrideInfo
{
bool memoryInvalidated = true;
size_t lastStride = 0;
MetalBufferAllocation allocation{};
};
struct MetalBoundBuffer
{