mirror of
https://github.com/cemu-project/Cemu.git
synced 2025-01-07 15:48:15 +01:00
release unused buffers
This commit is contained in:
parent
35740c5c8e
commit
a6c8d83436
@ -6,15 +6,23 @@
|
||||
|
||||
struct MetalBufferRange
|
||||
{
|
||||
uint32 bufferIndex;
|
||||
size_t offset;
|
||||
size_t size;
|
||||
};
|
||||
|
||||
constexpr size_t BASE_ALLOCATION_SIZE = 8 * 1024 * 1024;
|
||||
|
||||
template<typename BufferT>
|
||||
class MetalBufferAllocator
|
||||
{
|
||||
public:
|
||||
struct Buffer
|
||||
{
|
||||
MTL::Buffer* m_buffer;
|
||||
std::vector<MetalBufferRange> m_freeRanges;
|
||||
BufferT m_data;
|
||||
};
|
||||
|
||||
MetalBufferAllocator(class MetalRenderer* metalRenderer, MTL::ResourceOptions storageMode) : m_mtlr{metalRenderer} {
|
||||
m_isCPUAccessible = (storageMode == MTL::ResourceStorageModeShared) || (storageMode == MTL::ResourceStorageModeManaged);
|
||||
|
||||
@ -33,9 +41,8 @@ public:
|
||||
|
||||
void ResetAllocations()
|
||||
{
|
||||
m_freeBufferRanges.clear();
|
||||
for (uint32_t i = 0; i < m_buffers.size(); i++)
|
||||
m_freeBufferRanges.push_back({i, 0, m_buffers[i].m_buffer->length()});
|
||||
for (uint32 i = 0; i < m_buffers.size(); i++)
|
||||
FreeBuffer(i);
|
||||
}
|
||||
|
||||
MTL::Buffer* GetBuffer(uint32 bufferIndex)
|
||||
@ -49,63 +56,62 @@ public:
|
||||
size = Align(size, 128);
|
||||
|
||||
// First, try to find a free range
|
||||
for (uint32 i = 0; i < m_freeBufferRanges.size(); i++)
|
||||
for (uint32 i = 0; i < m_buffers.size(); i++)
|
||||
{
|
||||
auto& range = m_freeBufferRanges[i];
|
||||
if (size <= range.size)
|
||||
auto& buffer = m_buffers[i];
|
||||
for (uint32 j = 0; j < buffer.m_freeRanges.size(); j++)
|
||||
{
|
||||
auto& buffer = m_buffers[range.bufferIndex];
|
||||
|
||||
MetalBufferAllocation allocation;
|
||||
allocation.bufferIndex = range.bufferIndex;
|
||||
allocation.offset = range.offset;
|
||||
allocation.size = size;
|
||||
allocation.data = (m_isCPUAccessible ? (uint8*)buffer.m_buffer->contents() + range.offset : nullptr);
|
||||
|
||||
range.offset += size;
|
||||
range.size -= size;
|
||||
|
||||
if (range.size == 0)
|
||||
auto& range = buffer.m_freeRanges[j];
|
||||
if (size <= range.size)
|
||||
{
|
||||
m_freeBufferRanges.erase(m_freeBufferRanges.begin() + i);
|
||||
}
|
||||
MetalBufferAllocation allocation;
|
||||
allocation.bufferIndex = i;
|
||||
allocation.offset = range.offset;
|
||||
allocation.size = size;
|
||||
allocation.data = (m_isCPUAccessible ? (uint8*)buffer.m_buffer->contents() + range.offset : nullptr);
|
||||
|
||||
return allocation;
|
||||
range.offset += size;
|
||||
range.size -= size;
|
||||
|
||||
if (range.size == 0)
|
||||
{
|
||||
buffer.m_freeRanges.erase(buffer.m_freeRanges.begin() + j);
|
||||
}
|
||||
|
||||
return allocation;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If no free range was found, allocate a new buffer
|
||||
m_allocationSize = std::max(m_allocationSize, size);
|
||||
MTL::Buffer* buffer = m_mtlr->GetDevice()->newBuffer(m_allocationSize, m_options);
|
||||
size_t allocationSize = BASE_ALLOCATION_SIZE * (1u << m_buffers.size());
|
||||
allocationSize = std::max(allocationSize, size);
|
||||
MTL::Buffer* mtlBuffer = m_mtlr->GetDevice()->newBuffer(allocationSize, m_options);
|
||||
#ifdef CEMU_DEBUG_ASSERT
|
||||
buffer->setLabel(GetLabel("Buffer from buffer allocator", buffer));
|
||||
mtlBuffer->setLabel(GetLabel("Buffer from buffer allocator", mtlBuffer));
|
||||
#endif
|
||||
|
||||
MetalBufferAllocation allocation;
|
||||
allocation.bufferIndex = m_buffers.size();
|
||||
allocation.offset = 0;
|
||||
allocation.size = size;
|
||||
allocation.data = (m_isCPUAccessible ? buffer->contents() : nullptr);
|
||||
allocation.data = (m_isCPUAccessible ? mtlBuffer->contents() : nullptr);
|
||||
|
||||
m_buffers.push_back({buffer});
|
||||
m_buffers.push_back({mtlBuffer});
|
||||
auto& buffer = m_buffers.back();
|
||||
|
||||
// If the buffer is larger than the requested size, add the remaining space to the free buffer ranges
|
||||
if (size < m_allocationSize)
|
||||
if (size < allocationSize)
|
||||
{
|
||||
MetalBufferRange range;
|
||||
range.bufferIndex = allocation.bufferIndex;
|
||||
range.offset = size;
|
||||
range.size = m_allocationSize - size;
|
||||
range.size = allocationSize - size;
|
||||
|
||||
m_freeBufferRanges.push_back(range);
|
||||
buffer.m_freeRanges.push_back(range);
|
||||
}
|
||||
|
||||
// Debug
|
||||
m_mtlr->GetPerformanceMonitor().m_bufferAllocatorMemory += m_allocationSize;
|
||||
|
||||
// Increase the allocation size for the next buffer
|
||||
if (m_allocationSize < 128 * 1024 * 1024)
|
||||
m_allocationSize *= 2;
|
||||
m_mtlr->GetPerformanceMonitor().m_bufferAllocatorMemory += allocationSize;
|
||||
|
||||
return allocation;
|
||||
}
|
||||
@ -113,24 +119,24 @@ public:
|
||||
void FreeAllocation(MetalBufferAllocation& allocation)
|
||||
{
|
||||
MetalBufferRange range;
|
||||
range.bufferIndex = allocation.bufferIndex;
|
||||
range.offset = allocation.offset;
|
||||
range.size = allocation.size;
|
||||
|
||||
allocation.offset = INVALID_OFFSET;
|
||||
|
||||
// Find the correct position to insert the free range
|
||||
for (uint32 i = 0; i < m_freeBufferRanges.size(); i++)
|
||||
auto& buffer = m_buffers[allocation.bufferIndex];
|
||||
for (uint32 i = 0; i < buffer.m_freeRanges.size(); i++)
|
||||
{
|
||||
auto& freeRange = m_freeBufferRanges[i];
|
||||
if (freeRange.bufferIndex == range.bufferIndex && freeRange.offset + freeRange.size == range.offset)
|
||||
auto& freeRange = buffer.m_freeRanges[i];
|
||||
if (freeRange.offset + freeRange.size == range.offset)
|
||||
{
|
||||
freeRange.size += range.size;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
m_freeBufferRanges.push_back(range);
|
||||
buffer.m_freeRanges.push_back(range);
|
||||
}
|
||||
|
||||
protected:
|
||||
@ -138,22 +144,22 @@ protected:
|
||||
bool m_isCPUAccessible;
|
||||
MTL::ResourceOptions m_options;
|
||||
|
||||
size_t m_allocationSize = 8 * 1024 * 1024;
|
||||
std::vector<Buffer> m_buffers;
|
||||
|
||||
std::vector<BufferT> m_buffers;
|
||||
std::vector<MetalBufferRange> m_freeBufferRanges;
|
||||
void FreeBuffer(uint32 bufferIndex)
|
||||
{
|
||||
auto& buffer = m_buffers[bufferIndex];
|
||||
buffer.m_freeRanges.clear();
|
||||
buffer.m_freeRanges.reserve(1);
|
||||
buffer.m_freeRanges.push_back({0, m_buffers[bufferIndex].m_buffer->length()});
|
||||
}
|
||||
};
|
||||
|
||||
struct MetalBuffer
|
||||
{
|
||||
MTL::Buffer* m_buffer;
|
||||
};
|
||||
|
||||
typedef MetalBufferAllocator<MetalBuffer> MetalDefaultBufferAllocator;
|
||||
struct Empty {};
|
||||
typedef MetalBufferAllocator<Empty> MetalDefaultBufferAllocator;
|
||||
|
||||
struct MetalSyncedBuffer
|
||||
{
|
||||
MTL::Buffer* m_buffer;
|
||||
std::vector<MTL::CommandBuffer*> m_commandBuffers;
|
||||
uint32 m_lock = 0;
|
||||
|
||||
@ -163,7 +169,7 @@ struct MetalSyncedBuffer
|
||||
}
|
||||
};
|
||||
|
||||
constexpr uint16 MAX_COMMAND_BUFFER_FRAMES = 8;
|
||||
constexpr uint16 BUFFER_RELEASE_FRAME_TRESHOLD = 1024;
|
||||
|
||||
class MetalTemporaryBufferAllocator : public MetalBufferAllocator<MetalSyncedBuffer>
|
||||
{
|
||||
@ -172,65 +178,72 @@ public:
|
||||
|
||||
void LockBuffer(uint32 bufferIndex)
|
||||
{
|
||||
m_buffers[bufferIndex].m_lock++;
|
||||
m_buffers[bufferIndex].m_data.m_lock++;
|
||||
}
|
||||
|
||||
void UnlockBuffer(uint32 bufferIndex)
|
||||
{
|
||||
auto& buffer = m_buffers[bufferIndex];
|
||||
|
||||
buffer.m_lock--;
|
||||
buffer.m_data.m_lock--;
|
||||
|
||||
// TODO: is this really necessary?
|
||||
// Release the buffer if it wasn't released due to the lock
|
||||
if (!buffer.IsLocked() && buffer.m_commandBuffers.empty())
|
||||
if (!buffer.m_data.IsLocked() && buffer.m_data.m_commandBuffers.empty())
|
||||
FreeBuffer(bufferIndex);
|
||||
}
|
||||
|
||||
void UnlockAllBuffers()
|
||||
void EndFrame()
|
||||
{
|
||||
CheckForCompletedCommandBuffers();
|
||||
|
||||
// Unlock all buffers
|
||||
for (uint32_t i = 0; i < m_buffers.size(); i++)
|
||||
{
|
||||
auto& buffer = m_buffers[i];
|
||||
|
||||
if (buffer.m_lock != 0)
|
||||
if (buffer.m_data.m_lock != 0)
|
||||
{
|
||||
if (buffer.m_commandBuffers.empty())
|
||||
if (buffer.m_data.m_commandBuffers.empty())
|
||||
FreeBuffer(i);
|
||||
|
||||
buffer.m_lock = 0;
|
||||
buffer.m_data.m_lock = 0;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
auto it = m_commandBuffersFrames.begin();
|
||||
while (it != m_commandBuffersFrames.end())
|
||||
// TODO: do this for other buffer allocators as well?
|
||||
// Track how many frames have passed since the last access to the back buffer
|
||||
if (!m_buffers.empty())
|
||||
{
|
||||
it->second++;
|
||||
|
||||
if (it->second > MAX_COMMAND_BUFFER_FRAMES)
|
||||
auto& backBuffer = m_buffers.back();
|
||||
if (backBuffer.m_data.m_commandBuffers.empty())
|
||||
{
|
||||
debug_printf("command buffer %p remained unfinished for more than %u frames\n", it->first, MAX_COMMAND_BUFFER_FRAMES);
|
||||
// Release the back buffer if it hasn't been accessed for a while
|
||||
if (m_framesSinceBackBufferAccess >= BUFFER_RELEASE_FRAME_TRESHOLD)
|
||||
{
|
||||
// Debug
|
||||
m_mtlr->GetPerformanceMonitor().m_bufferAllocatorMemory -= backBuffer.m_buffer->length();
|
||||
|
||||
// Pretend like the command buffer has finished
|
||||
CommandBufferFinished(it->first, false);
|
||||
backBuffer.m_buffer->release();
|
||||
m_buffers.pop_back();
|
||||
|
||||
it = m_commandBuffersFrames.erase(it);
|
||||
m_framesSinceBackBufferAccess = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
m_framesSinceBackBufferAccess++;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
it++;
|
||||
m_framesSinceBackBufferAccess = 0;
|
||||
}
|
||||
}
|
||||
*/
|
||||
}
|
||||
|
||||
void SetActiveCommandBuffer(MTL::CommandBuffer* commandBuffer)
|
||||
{
|
||||
m_activeCommandBuffer = commandBuffer;
|
||||
|
||||
//if (commandBuffer)
|
||||
// m_commandBuffersFrames[commandBuffer] = 0;
|
||||
}
|
||||
|
||||
void CheckForCompletedCommandBuffers(/*MTL::CommandBuffer* commandBuffer, bool erase = true*/)
|
||||
@ -238,24 +251,24 @@ public:
|
||||
for (uint32_t i = 0; i < m_buffers.size(); i++)
|
||||
{
|
||||
auto& buffer = m_buffers[i];
|
||||
for (uint32_t j = 0; j < buffer.m_commandBuffers.size(); j++)
|
||||
for (uint32_t j = 0; j < buffer.m_data.m_commandBuffers.size(); j++)
|
||||
{
|
||||
if (m_mtlr->CommandBufferCompleted(buffer.m_commandBuffers[j]))
|
||||
if (m_mtlr->CommandBufferCompleted(buffer.m_data.m_commandBuffers[j]))
|
||||
{
|
||||
if (buffer.m_commandBuffers.size() == 1)
|
||||
if (buffer.m_data.m_commandBuffers.size() == 1)
|
||||
{
|
||||
if (!buffer.IsLocked())
|
||||
if (!buffer.m_data.IsLocked())
|
||||
{
|
||||
// All command buffers using it have finished execution, we can use it again
|
||||
FreeBuffer(i);
|
||||
}
|
||||
|
||||
buffer.m_commandBuffers.clear();
|
||||
buffer.m_data.m_commandBuffers.clear();
|
||||
break;
|
||||
}
|
||||
else
|
||||
{
|
||||
buffer.m_commandBuffers.erase(buffer.m_commandBuffers.begin() + j);
|
||||
buffer.m_data.m_commandBuffers.erase(buffer.m_data.m_commandBuffers.begin() + j);
|
||||
j--;
|
||||
}
|
||||
}
|
||||
@ -271,8 +284,8 @@ public:
|
||||
cemu_assert_debug(m_activeCommandBuffer);
|
||||
|
||||
auto& buffer = m_buffers[bufferIndex];
|
||||
if (buffer.m_commandBuffers.empty() || buffer.m_commandBuffers.back() != m_activeCommandBuffer/*std::find(buffer.m_commandBuffers.begin(), buffer.m_commandBuffers.end(), m_activeCommandBuffer) == buffer.m_commandBuffers.end()*/)
|
||||
buffer.m_commandBuffers.push_back(m_activeCommandBuffer);
|
||||
if (buffer.m_data.m_commandBuffers.empty() || buffer.m_data.m_commandBuffers.back() != m_activeCommandBuffer/*std::find(buffer.m_commandBuffers.begin(), buffer.m_commandBuffers.end(), m_activeCommandBuffer) == buffer.m_commandBuffers.end()*/)
|
||||
buffer.m_data.m_commandBuffers.push_back(m_activeCommandBuffer);
|
||||
|
||||
return buffer.m_buffer;
|
||||
}
|
||||
@ -298,33 +311,34 @@ public:
|
||||
}
|
||||
*/
|
||||
|
||||
// For debugging
|
||||
/*
|
||||
void LogInfo()
|
||||
{
|
||||
debug_printf("BUFFERS:\n");
|
||||
for (auto& buffer : m_buffers)
|
||||
{
|
||||
debug_printf(" %p -> size: %lu, command buffers: %zu\n", buffer.m_buffer, buffer.m_buffer->length(), buffer.m_commandBuffers.size());
|
||||
debug_printf(" %p -> size: %lu, command buffers: %zu\n", buffer.m_buffer, buffer.m_buffer->length(), buffer.m_data.m_commandBuffers.size());
|
||||
uint32 same = 0;
|
||||
uint32 completed = 0;
|
||||
for (uint32 i = 0; i < buffer.m_commandBuffers.size(); i++)
|
||||
for (uint32 i = 0; i < buffer.m_data.m_commandBuffers.size(); i++)
|
||||
{
|
||||
if (m_mtlr->CommandBufferCompleted(buffer.m_commandBuffers[i]))
|
||||
if (m_mtlr->CommandBufferCompleted(buffer.m_data.m_commandBuffers[i]))
|
||||
completed++;
|
||||
for (uint32 j = 0; j < buffer.m_commandBuffers.size(); j++)
|
||||
for (uint32 j = 0; j < buffer.m_data.m_commandBuffers.size(); j++)
|
||||
{
|
||||
if (i != j && buffer.m_commandBuffers[i] == buffer.m_commandBuffers[j])
|
||||
if (i != j && buffer.m_data.m_commandBuffers[i] == buffer.m_data.m_commandBuffers[j])
|
||||
same++;
|
||||
}
|
||||
}
|
||||
debug_printf(" same: %u\n", same);
|
||||
debug_printf(" completed: %u\n", completed);
|
||||
}
|
||||
|
||||
debug_printf("FREE RANGES:\n");
|
||||
for (auto& range : m_freeBufferRanges)
|
||||
{
|
||||
debug_printf(" %u -> offset: %zu, size: %zu\n", range.bufferIndex, range.offset, range.size);
|
||||
debug_printf(" FREE RANGES:\n");
|
||||
for (auto& range : buffer.m_freeRanges)
|
||||
{
|
||||
debug_printf(" offset: %zu, size: %zu\n", range.offset, range.size);
|
||||
}
|
||||
}
|
||||
}
|
||||
*/
|
||||
@ -332,20 +346,5 @@ public:
|
||||
private:
|
||||
MTL::CommandBuffer* m_activeCommandBuffer = nullptr;
|
||||
|
||||
//std::map<MTL::CommandBuffer*, uint16> m_commandBuffersFrames;
|
||||
|
||||
void FreeBuffer(uint32 bufferIndex)
|
||||
{
|
||||
// First remove any free ranges that use this buffer
|
||||
for (uint32 k = 0; k < m_freeBufferRanges.size(); k++)
|
||||
{
|
||||
if (m_freeBufferRanges[k].bufferIndex == bufferIndex)
|
||||
{
|
||||
m_freeBufferRanges.erase(m_freeBufferRanges.begin() + k);
|
||||
k--;
|
||||
}
|
||||
}
|
||||
|
||||
m_freeBufferRanges.push_back({bufferIndex, 0, m_buffers[bufferIndex].m_buffer->length()});
|
||||
}
|
||||
uint16 m_framesSinceBackBufferAccess = 0;
|
||||
};
|
||||
|
@ -264,10 +264,7 @@ void MetalRenderer::SwapBuffers(bool swapTV, bool swapDRC)
|
||||
m_memoryManager->GetFramePersistentBufferAllocator().ResetAllocations();
|
||||
|
||||
// Unlock all temporary buffers
|
||||
m_memoryManager->GetTemporaryBufferAllocator().UnlockAllBuffers();
|
||||
|
||||
// Check for completed command buffers
|
||||
m_memoryManager->GetTemporaryBufferAllocator().CheckForCompletedCommandBuffers();
|
||||
m_memoryManager->GetTemporaryBufferAllocator().EndFrame();
|
||||
}
|
||||
|
||||
// TODO: use `shader` for drawing
|
||||
@ -504,7 +501,7 @@ void MetalRenderer::texture_clearSlice(LatteTexture* hostTexture, sint32 sliceIn
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: do a GPU blit even on unified memory? That would mean we could use private storage mode for all textures
|
||||
// TODO: do a cpu copy on Apple Silicon?
|
||||
void MetalRenderer::texture_loadSlice(LatteTexture* hostTexture, sint32 width, sint32 height, sint32 depth, void* pixelData, sint32 sliceIndex, sint32 mipIndex, uint32 compressedImageSize)
|
||||
{
|
||||
auto textureMtl = (LatteTextureMtl*)hostTexture;
|
||||
|
Loading…
Reference in New Issue
Block a user