release unused buffers

This commit is contained in:
Samuliak 2024-08-28 16:42:55 +02:00
parent 35740c5c8e
commit a6c8d83436
2 changed files with 109 additions and 113 deletions

View File

@ -6,15 +6,23 @@
struct MetalBufferRange struct MetalBufferRange
{ {
uint32 bufferIndex;
size_t offset; size_t offset;
size_t size; size_t size;
}; };
constexpr size_t BASE_ALLOCATION_SIZE = 8 * 1024 * 1024;
template<typename BufferT> template<typename BufferT>
class MetalBufferAllocator class MetalBufferAllocator
{ {
public: public:
struct Buffer
{
MTL::Buffer* m_buffer;
std::vector<MetalBufferRange> m_freeRanges;
BufferT m_data;
};
MetalBufferAllocator(class MetalRenderer* metalRenderer, MTL::ResourceOptions storageMode) : m_mtlr{metalRenderer} { MetalBufferAllocator(class MetalRenderer* metalRenderer, MTL::ResourceOptions storageMode) : m_mtlr{metalRenderer} {
m_isCPUAccessible = (storageMode == MTL::ResourceStorageModeShared) || (storageMode == MTL::ResourceStorageModeManaged); m_isCPUAccessible = (storageMode == MTL::ResourceStorageModeShared) || (storageMode == MTL::ResourceStorageModeManaged);
@ -33,9 +41,8 @@ public:
void ResetAllocations() void ResetAllocations()
{ {
m_freeBufferRanges.clear(); for (uint32 i = 0; i < m_buffers.size(); i++)
for (uint32_t i = 0; i < m_buffers.size(); i++) FreeBuffer(i);
m_freeBufferRanges.push_back({i, 0, m_buffers[i].m_buffer->length()});
} }
MTL::Buffer* GetBuffer(uint32 bufferIndex) MTL::Buffer* GetBuffer(uint32 bufferIndex)
@ -49,63 +56,62 @@ public:
size = Align(size, 128); size = Align(size, 128);
// First, try to find a free range // First, try to find a free range
for (uint32 i = 0; i < m_freeBufferRanges.size(); i++) for (uint32 i = 0; i < m_buffers.size(); i++)
{ {
auto& range = m_freeBufferRanges[i]; auto& buffer = m_buffers[i];
if (size <= range.size) for (uint32 j = 0; j < buffer.m_freeRanges.size(); j++)
{ {
auto& buffer = m_buffers[range.bufferIndex]; auto& range = buffer.m_freeRanges[j];
if (size <= range.size)
MetalBufferAllocation allocation;
allocation.bufferIndex = range.bufferIndex;
allocation.offset = range.offset;
allocation.size = size;
allocation.data = (m_isCPUAccessible ? (uint8*)buffer.m_buffer->contents() + range.offset : nullptr);
range.offset += size;
range.size -= size;
if (range.size == 0)
{ {
m_freeBufferRanges.erase(m_freeBufferRanges.begin() + i); MetalBufferAllocation allocation;
} allocation.bufferIndex = i;
allocation.offset = range.offset;
allocation.size = size;
allocation.data = (m_isCPUAccessible ? (uint8*)buffer.m_buffer->contents() + range.offset : nullptr);
return allocation; range.offset += size;
range.size -= size;
if (range.size == 0)
{
buffer.m_freeRanges.erase(buffer.m_freeRanges.begin() + j);
}
return allocation;
}
} }
} }
// If no free range was found, allocate a new buffer // If no free range was found, allocate a new buffer
m_allocationSize = std::max(m_allocationSize, size); size_t allocationSize = BASE_ALLOCATION_SIZE * (1u << m_buffers.size());
MTL::Buffer* buffer = m_mtlr->GetDevice()->newBuffer(m_allocationSize, m_options); allocationSize = std::max(allocationSize, size);
MTL::Buffer* mtlBuffer = m_mtlr->GetDevice()->newBuffer(allocationSize, m_options);
#ifdef CEMU_DEBUG_ASSERT #ifdef CEMU_DEBUG_ASSERT
buffer->setLabel(GetLabel("Buffer from buffer allocator", buffer)); mtlBuffer->setLabel(GetLabel("Buffer from buffer allocator", mtlBuffer));
#endif #endif
MetalBufferAllocation allocation; MetalBufferAllocation allocation;
allocation.bufferIndex = m_buffers.size(); allocation.bufferIndex = m_buffers.size();
allocation.offset = 0; allocation.offset = 0;
allocation.size = size; allocation.size = size;
allocation.data = (m_isCPUAccessible ? buffer->contents() : nullptr); allocation.data = (m_isCPUAccessible ? mtlBuffer->contents() : nullptr);
m_buffers.push_back({buffer}); m_buffers.push_back({mtlBuffer});
auto& buffer = m_buffers.back();
// If the buffer is larger than the requested size, add the remaining space to the free buffer ranges // If the buffer is larger than the requested size, add the remaining space to the free buffer ranges
if (size < m_allocationSize) if (size < allocationSize)
{ {
MetalBufferRange range; MetalBufferRange range;
range.bufferIndex = allocation.bufferIndex;
range.offset = size; range.offset = size;
range.size = m_allocationSize - size; range.size = allocationSize - size;
m_freeBufferRanges.push_back(range); buffer.m_freeRanges.push_back(range);
} }
// Debug // Debug
m_mtlr->GetPerformanceMonitor().m_bufferAllocatorMemory += m_allocationSize; m_mtlr->GetPerformanceMonitor().m_bufferAllocatorMemory += allocationSize;
// Increase the allocation size for the next buffer
if (m_allocationSize < 128 * 1024 * 1024)
m_allocationSize *= 2;
return allocation; return allocation;
} }
@ -113,24 +119,24 @@ public:
void FreeAllocation(MetalBufferAllocation& allocation) void FreeAllocation(MetalBufferAllocation& allocation)
{ {
MetalBufferRange range; MetalBufferRange range;
range.bufferIndex = allocation.bufferIndex;
range.offset = allocation.offset; range.offset = allocation.offset;
range.size = allocation.size; range.size = allocation.size;
allocation.offset = INVALID_OFFSET; allocation.offset = INVALID_OFFSET;
// Find the correct position to insert the free range // Find the correct position to insert the free range
for (uint32 i = 0; i < m_freeBufferRanges.size(); i++) auto& buffer = m_buffers[allocation.bufferIndex];
for (uint32 i = 0; i < buffer.m_freeRanges.size(); i++)
{ {
auto& freeRange = m_freeBufferRanges[i]; auto& freeRange = buffer.m_freeRanges[i];
if (freeRange.bufferIndex == range.bufferIndex && freeRange.offset + freeRange.size == range.offset) if (freeRange.offset + freeRange.size == range.offset)
{ {
freeRange.size += range.size; freeRange.size += range.size;
return; return;
} }
} }
m_freeBufferRanges.push_back(range); buffer.m_freeRanges.push_back(range);
} }
protected: protected:
@ -138,22 +144,22 @@ protected:
bool m_isCPUAccessible; bool m_isCPUAccessible;
MTL::ResourceOptions m_options; MTL::ResourceOptions m_options;
size_t m_allocationSize = 8 * 1024 * 1024; std::vector<Buffer> m_buffers;
std::vector<BufferT> m_buffers; void FreeBuffer(uint32 bufferIndex)
std::vector<MetalBufferRange> m_freeBufferRanges; {
auto& buffer = m_buffers[bufferIndex];
buffer.m_freeRanges.clear();
buffer.m_freeRanges.reserve(1);
buffer.m_freeRanges.push_back({0, m_buffers[bufferIndex].m_buffer->length()});
}
}; };
struct MetalBuffer struct Empty {};
{ typedef MetalBufferAllocator<Empty> MetalDefaultBufferAllocator;
MTL::Buffer* m_buffer;
};
typedef MetalBufferAllocator<MetalBuffer> MetalDefaultBufferAllocator;
struct MetalSyncedBuffer struct MetalSyncedBuffer
{ {
MTL::Buffer* m_buffer;
std::vector<MTL::CommandBuffer*> m_commandBuffers; std::vector<MTL::CommandBuffer*> m_commandBuffers;
uint32 m_lock = 0; uint32 m_lock = 0;
@ -163,7 +169,7 @@ struct MetalSyncedBuffer
} }
}; };
constexpr uint16 MAX_COMMAND_BUFFER_FRAMES = 8; constexpr uint16 BUFFER_RELEASE_FRAME_TRESHOLD = 1024;
class MetalTemporaryBufferAllocator : public MetalBufferAllocator<MetalSyncedBuffer> class MetalTemporaryBufferAllocator : public MetalBufferAllocator<MetalSyncedBuffer>
{ {
@ -172,65 +178,72 @@ public:
void LockBuffer(uint32 bufferIndex) void LockBuffer(uint32 bufferIndex)
{ {
m_buffers[bufferIndex].m_lock++; m_buffers[bufferIndex].m_data.m_lock++;
} }
void UnlockBuffer(uint32 bufferIndex) void UnlockBuffer(uint32 bufferIndex)
{ {
auto& buffer = m_buffers[bufferIndex]; auto& buffer = m_buffers[bufferIndex];
buffer.m_lock--; buffer.m_data.m_lock--;
// TODO: is this really necessary? // TODO: is this really necessary?
// Release the buffer if it wasn't released due to the lock // Release the buffer if it wasn't released due to the lock
if (!buffer.IsLocked() && buffer.m_commandBuffers.empty()) if (!buffer.m_data.IsLocked() && buffer.m_data.m_commandBuffers.empty())
FreeBuffer(bufferIndex); FreeBuffer(bufferIndex);
} }
void UnlockAllBuffers() void EndFrame()
{ {
CheckForCompletedCommandBuffers();
// Unlock all buffers
for (uint32_t i = 0; i < m_buffers.size(); i++) for (uint32_t i = 0; i < m_buffers.size(); i++)
{ {
auto& buffer = m_buffers[i]; auto& buffer = m_buffers[i];
if (buffer.m_lock != 0) if (buffer.m_data.m_lock != 0)
{ {
if (buffer.m_commandBuffers.empty()) if (buffer.m_data.m_commandBuffers.empty())
FreeBuffer(i); FreeBuffer(i);
buffer.m_lock = 0; buffer.m_data.m_lock = 0;
} }
} }
/* // TODO: do this for other buffer allocators as well?
auto it = m_commandBuffersFrames.begin(); // Track how many frames have passed since the last access to the back buffer
while (it != m_commandBuffersFrames.end()) if (!m_buffers.empty())
{ {
it->second++; auto& backBuffer = m_buffers.back();
if (backBuffer.m_data.m_commandBuffers.empty())
if (it->second > MAX_COMMAND_BUFFER_FRAMES)
{ {
debug_printf("command buffer %p remained unfinished for more than %u frames\n", it->first, MAX_COMMAND_BUFFER_FRAMES); // Release the back buffer if it hasn't been accessed for a while
if (m_framesSinceBackBufferAccess >= BUFFER_RELEASE_FRAME_TRESHOLD)
{
// Debug
m_mtlr->GetPerformanceMonitor().m_bufferAllocatorMemory -= backBuffer.m_buffer->length();
// Pretend like the command buffer has finished backBuffer.m_buffer->release();
CommandBufferFinished(it->first, false); m_buffers.pop_back();
it = m_commandBuffersFrames.erase(it); m_framesSinceBackBufferAccess = 0;
}
else
{
m_framesSinceBackBufferAccess++;
}
} }
else else
{ {
it++; m_framesSinceBackBufferAccess = 0;
} }
} }
*/
} }
void SetActiveCommandBuffer(MTL::CommandBuffer* commandBuffer) void SetActiveCommandBuffer(MTL::CommandBuffer* commandBuffer)
{ {
m_activeCommandBuffer = commandBuffer; m_activeCommandBuffer = commandBuffer;
//if (commandBuffer)
// m_commandBuffersFrames[commandBuffer] = 0;
} }
void CheckForCompletedCommandBuffers(/*MTL::CommandBuffer* commandBuffer, bool erase = true*/) void CheckForCompletedCommandBuffers(/*MTL::CommandBuffer* commandBuffer, bool erase = true*/)
@ -238,24 +251,24 @@ public:
for (uint32_t i = 0; i < m_buffers.size(); i++) for (uint32_t i = 0; i < m_buffers.size(); i++)
{ {
auto& buffer = m_buffers[i]; auto& buffer = m_buffers[i];
for (uint32_t j = 0; j < buffer.m_commandBuffers.size(); j++) for (uint32_t j = 0; j < buffer.m_data.m_commandBuffers.size(); j++)
{ {
if (m_mtlr->CommandBufferCompleted(buffer.m_commandBuffers[j])) if (m_mtlr->CommandBufferCompleted(buffer.m_data.m_commandBuffers[j]))
{ {
if (buffer.m_commandBuffers.size() == 1) if (buffer.m_data.m_commandBuffers.size() == 1)
{ {
if (!buffer.IsLocked()) if (!buffer.m_data.IsLocked())
{ {
// All command buffers using it have finished execution, we can use it again // All command buffers using it have finished execution, we can use it again
FreeBuffer(i); FreeBuffer(i);
} }
buffer.m_commandBuffers.clear(); buffer.m_data.m_commandBuffers.clear();
break; break;
} }
else else
{ {
buffer.m_commandBuffers.erase(buffer.m_commandBuffers.begin() + j); buffer.m_data.m_commandBuffers.erase(buffer.m_data.m_commandBuffers.begin() + j);
j--; j--;
} }
} }
@ -271,8 +284,8 @@ public:
cemu_assert_debug(m_activeCommandBuffer); cemu_assert_debug(m_activeCommandBuffer);
auto& buffer = m_buffers[bufferIndex]; auto& buffer = m_buffers[bufferIndex];
if (buffer.m_commandBuffers.empty() || buffer.m_commandBuffers.back() != m_activeCommandBuffer/*std::find(buffer.m_commandBuffers.begin(), buffer.m_commandBuffers.end(), m_activeCommandBuffer) == buffer.m_commandBuffers.end()*/) if (buffer.m_data.m_commandBuffers.empty() || buffer.m_data.m_commandBuffers.back() != m_activeCommandBuffer/*std::find(buffer.m_commandBuffers.begin(), buffer.m_commandBuffers.end(), m_activeCommandBuffer) == buffer.m_commandBuffers.end()*/)
buffer.m_commandBuffers.push_back(m_activeCommandBuffer); buffer.m_data.m_commandBuffers.push_back(m_activeCommandBuffer);
return buffer.m_buffer; return buffer.m_buffer;
} }
@ -298,33 +311,34 @@ public:
} }
*/ */
// For debugging
/* /*
void LogInfo() void LogInfo()
{ {
debug_printf("BUFFERS:\n"); debug_printf("BUFFERS:\n");
for (auto& buffer : m_buffers) for (auto& buffer : m_buffers)
{ {
debug_printf(" %p -> size: %lu, command buffers: %zu\n", buffer.m_buffer, buffer.m_buffer->length(), buffer.m_commandBuffers.size()); debug_printf(" %p -> size: %lu, command buffers: %zu\n", buffer.m_buffer, buffer.m_buffer->length(), buffer.m_data.m_commandBuffers.size());
uint32 same = 0; uint32 same = 0;
uint32 completed = 0; uint32 completed = 0;
for (uint32 i = 0; i < buffer.m_commandBuffers.size(); i++) for (uint32 i = 0; i < buffer.m_data.m_commandBuffers.size(); i++)
{ {
if (m_mtlr->CommandBufferCompleted(buffer.m_commandBuffers[i])) if (m_mtlr->CommandBufferCompleted(buffer.m_data.m_commandBuffers[i]))
completed++; completed++;
for (uint32 j = 0; j < buffer.m_commandBuffers.size(); j++) for (uint32 j = 0; j < buffer.m_data.m_commandBuffers.size(); j++)
{ {
if (i != j && buffer.m_commandBuffers[i] == buffer.m_commandBuffers[j]) if (i != j && buffer.m_data.m_commandBuffers[i] == buffer.m_data.m_commandBuffers[j])
same++; same++;
} }
} }
debug_printf(" same: %u\n", same); debug_printf(" same: %u\n", same);
debug_printf(" completed: %u\n", completed); debug_printf(" completed: %u\n", completed);
}
debug_printf("FREE RANGES:\n"); debug_printf(" FREE RANGES:\n");
for (auto& range : m_freeBufferRanges) for (auto& range : buffer.m_freeRanges)
{ {
debug_printf(" %u -> offset: %zu, size: %zu\n", range.bufferIndex, range.offset, range.size); debug_printf(" offset: %zu, size: %zu\n", range.offset, range.size);
}
} }
} }
*/ */
@ -332,20 +346,5 @@ public:
private: private:
MTL::CommandBuffer* m_activeCommandBuffer = nullptr; MTL::CommandBuffer* m_activeCommandBuffer = nullptr;
//std::map<MTL::CommandBuffer*, uint16> m_commandBuffersFrames; uint16 m_framesSinceBackBufferAccess = 0;
void FreeBuffer(uint32 bufferIndex)
{
// First remove any free ranges that use this buffer
for (uint32 k = 0; k < m_freeBufferRanges.size(); k++)
{
if (m_freeBufferRanges[k].bufferIndex == bufferIndex)
{
m_freeBufferRanges.erase(m_freeBufferRanges.begin() + k);
k--;
}
}
m_freeBufferRanges.push_back({bufferIndex, 0, m_buffers[bufferIndex].m_buffer->length()});
}
}; };

View File

@ -264,10 +264,7 @@ void MetalRenderer::SwapBuffers(bool swapTV, bool swapDRC)
m_memoryManager->GetFramePersistentBufferAllocator().ResetAllocations(); m_memoryManager->GetFramePersistentBufferAllocator().ResetAllocations();
// Unlock all temporary buffers // Unlock all temporary buffers
m_memoryManager->GetTemporaryBufferAllocator().UnlockAllBuffers(); m_memoryManager->GetTemporaryBufferAllocator().EndFrame();
// Check for completed command buffers
m_memoryManager->GetTemporaryBufferAllocator().CheckForCompletedCommandBuffers();
} }
// TODO: use `shader` for drawing // TODO: use `shader` for drawing
@ -504,7 +501,7 @@ void MetalRenderer::texture_clearSlice(LatteTexture* hostTexture, sint32 sliceIn
} }
} }
// TODO: do a GPU blit even on unified memory? That would mean we could use private storage mode for all textures // TODO: do a cpu copy on Apple Silicon?
void MetalRenderer::texture_loadSlice(LatteTexture* hostTexture, sint32 width, sint32 height, sint32 depth, void* pixelData, sint32 sliceIndex, sint32 mipIndex, uint32 compressedImageSize) void MetalRenderer::texture_loadSlice(LatteTexture* hostTexture, sint32 width, sint32 height, sint32 depth, void* pixelData, sint32 sliceIndex, sint32 mipIndex, uint32 compressedImageSize)
{ {
auto textureMtl = (LatteTextureMtl*)hostTexture; auto textureMtl = (LatteTextureMtl*)hostTexture;