From 82dcbd98a6487e3285ff86d8ccfced57856d595b Mon Sep 17 00:00:00 2001 From: Samuliak Date: Tue, 6 Aug 2024 19:08:22 +0200 Subject: [PATCH] prepare for vertex stride workaround --- src/Cafe/CMakeLists.txt | 1 + .../HW/Latte/Renderer/Metal/MetalCommon.h | 6 ++ .../Renderer/Metal/MetalMemoryManager.cpp | 71 +++++++++++++++- .../Latte/Renderer/Metal/MetalMemoryManager.h | 81 ++++++++++++++++++- .../Renderer/Metal/MetalPipelineCache.cpp | 13 ++- .../HW/Latte/Renderer/Metal/MetalRenderer.cpp | 49 +++++++---- .../HW/Latte/Renderer/Metal/MetalRenderer.h | 5 ++ 7 files changed, 197 insertions(+), 29 deletions(-) create mode 100644 src/Cafe/HW/Latte/Renderer/Metal/MetalCommon.h diff --git a/src/Cafe/CMakeLists.txt b/src/Cafe/CMakeLists.txt index 7d10788a..ea237348 100644 --- a/src/Cafe/CMakeLists.txt +++ b/src/Cafe/CMakeLists.txt @@ -538,6 +538,7 @@ if(ENABLE_METAL) target_sources(CemuCafe PRIVATE HW/Latte/Renderer/Metal/MetalRenderer.cpp HW/Latte/Renderer/Metal/MetalRenderer.h + HW/Latte/Renderer/Metal/MetalCommon.h HW/Latte/Renderer/Metal/MetalCppImpl.cpp HW/Latte/Renderer/Metal/MetalLayer.mm HW/Latte/Renderer/Metal/MetalLayer.h diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalCommon.h b/src/Cafe/HW/Latte/Renderer/Metal/MetalCommon.h new file mode 100644 index 00000000..c7011ab8 --- /dev/null +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalCommon.h @@ -0,0 +1,6 @@ +#pragma once + +inline size_t align(size_t size, size_t alignment) +{ + return (size + alignment - 1) & ~(alignment - 1); +} diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.cpp b/src/Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.cpp index f6f064f3..897e9593 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.cpp @@ -1,3 +1,4 @@ +#include "Cafe/HW/Latte/Renderer/Metal/MetalCommon.h" #include "Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.h" #include "Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h" @@ -14,7 +15,7 @@ MetalBufferAllocator::~MetalBufferAllocator() MetalBufferAllocation MetalBufferAllocator::GetBufferAllocation(size_t size, size_t alignment) { // Align the size - size = (size + alignment - 1) & ~(alignment - 1); + size = align(size, alignment); // First, try to find a free range for (uint32 i = 0; i < m_freeBufferRanges.size(); i++) @@ -63,6 +64,65 @@ MetalBufferAllocation MetalBufferAllocator::GetBufferAllocation(size_t size, siz return allocation; } +MetalVertexBufferCache::~MetalVertexBufferCache() +{ + for (uint32 i = 0; i < LATTE_MAX_VERTEX_BUFFERS; i++) + { + auto vertexBufferRange = m_bufferRanges[i]; + if (vertexBufferRange) + { + if (vertexBufferRange->restrideInfo.buffer) + { + vertexBufferRange->restrideInfo.buffer->release(); + } + } + } +} + +MetalRestridedBufferRange MetalVertexBufferCache::RestrideBufferIfNeeded(uint32 bufferIndex, size_t stride) +{ + auto vertexBufferRange = m_bufferRanges[bufferIndex]; + auto& restrideInfo = vertexBufferRange->restrideInfo; + + if (stride % 4 == 0) + { + // No restride needed + return {nullptr, vertexBufferRange->offset}; + } + + if (restrideInfo.memoryInvalidated || stride != restrideInfo.lastStride) + { + // TODO: restride + throw std::runtime_error("restride needed"); + + restrideInfo.memoryInvalidated = false; + restrideInfo.lastStride = stride; + } + + // TODO: remove + throw std::runtime_error("restride unimplemented"); + + return {restrideInfo.buffer, 0}; +} + +void MetalVertexBufferCache::MemoryRangeChanged(size_t offset, size_t size) +{ + for (uint32 i = 0; i < LATTE_MAX_VERTEX_BUFFERS; i++) + { + auto vertexBufferRange = m_bufferRanges[i]; + if (vertexBufferRange) + { + if ((offset < vertexBufferRange->offset && (offset + size) < (vertexBufferRange->offset + vertexBufferRange->size)) || + (offset > vertexBufferRange->offset && (offset + size) > (vertexBufferRange->offset + vertexBufferRange->size))) + { + continue; + } + + vertexBufferRange->restrideInfo.memoryInvalidated = true; + } + } +} + MetalMemoryManager::~MetalMemoryManager() { if (m_bufferCache) @@ -85,7 +145,7 @@ void MetalMemoryManager::InitBufferCache(size_t size) { if (m_bufferCache) { - printf("MetalMemoryManager::InitBufferCache: buffer cache already initialized\n"); + debug_printf("MetalMemoryManager::InitBufferCache: buffer cache already initialized\n"); return; } @@ -101,18 +161,21 @@ void MetalMemoryManager::UploadToBufferCache(const void* data, size_t offset, si if (!m_bufferCache) { - printf("MetalMemoryManager::UploadToBufferCache: buffer cache not initialized\n"); + debug_printf("MetalMemoryManager::UploadToBufferCache: buffer cache not initialized\n"); return; } memcpy((uint8*)m_bufferCache->contents() + offset, data, size); + + // Notify vertex buffer cache about the change + m_vertexBufferCache.MemoryRangeChanged(offset, size); } void MetalMemoryManager::CopyBufferCache(size_t srcOffset, size_t dstOffset, size_t size) { if (!m_bufferCache) { - printf("MetalMemoryManager::CopyBufferCache: buffer cache not initialized\n"); + debug_printf("MetalMemoryManager::CopyBufferCache: buffer cache not initialized\n"); return; } diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.h b/src/Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.h index b0be2948..5ee0b37d 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.h +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.h @@ -3,6 +3,7 @@ #include #include "Cafe/HW/Latte/ISA/LatteReg.h" +#include "Cafe/HW/Latte/Core/LatteConst.h" //const uint32 bufferAllocatorIndexShift = 24; @@ -51,10 +52,65 @@ private: std::vector m_freeBufferRanges; }; +struct MetalRestridedBufferRange +{ + MTL::Buffer* buffer; + size_t offset; +}; + +// TODO: use one big buffer for all the restrided vertex buffers? +struct MetalRestrideInfo +{ + bool memoryInvalidated = true; + size_t lastStride = 0; + MTL::Buffer* buffer = nullptr; +}; + +struct MetalVertexBufferRange +{ + size_t offset; + size_t size; + MetalRestrideInfo& restrideInfo; +}; + +class MetalVertexBufferCache +{ +public: + friend class MetalMemoryManager; + + MetalVertexBufferCache(class MetalRenderer* metalRenderer) : m_mtlr{metalRenderer} {} + ~MetalVertexBufferCache(); + + // Vertex buffer cache + void TrackVertexBuffer(uint32 bufferIndex, size_t offset, size_t size, MetalRestrideInfo& restrideInfo) + { + m_bufferRanges[bufferIndex] = new MetalVertexBufferRange{offset, size, restrideInfo}; + } + + void UntrackVertexBuffer(uint32 bufferIndex) + { + auto& range = m_bufferRanges[bufferIndex]; + if (range->restrideInfo.buffer) + { + range->restrideInfo.buffer->release(); + } + range = nullptr; + } + + MetalRestridedBufferRange RestrideBufferIfNeeded(uint32 bufferIndex, size_t stride); + +private: + class MetalRenderer* m_mtlr; + + MetalVertexBufferRange* m_bufferRanges[LATTE_MAX_VERTEX_BUFFERS] = {nullptr}; + + void MemoryRangeChanged(size_t offset, size_t size); +}; + class MetalMemoryManager { public: - MetalMemoryManager(class MetalRenderer* metalRenderer) : m_mtlr{metalRenderer}, m_bufferAllocator(metalRenderer) {} + MetalMemoryManager(class MetalRenderer* metalRenderer) : m_mtlr{metalRenderer}, m_bufferAllocator(metalRenderer), m_vertexBufferCache(metalRenderer) {} ~MetalMemoryManager(); void ResetTemporaryBuffers() @@ -90,6 +146,28 @@ public: void UploadToBufferCache(const void* data, size_t offset, size_t size); void CopyBufferCache(size_t srcOffset, size_t dstOffset, size_t size); + // Vertex buffer cache + void TrackVertexBuffer(uint32 bufferIndex, size_t offset, size_t size, MetalRestrideInfo& restrideInfo) + { + m_vertexBufferCache.TrackVertexBuffer(bufferIndex, offset, size, restrideInfo); + } + + void UntrackVertexBuffer(uint32 bufferIndex) + { + m_vertexBufferCache.UntrackVertexBuffer(bufferIndex); + } + + MetalRestridedBufferRange RestrideBufferIfNeeded(uint32 bufferIndex, size_t stride) + { + auto range = m_vertexBufferCache.RestrideBufferIfNeeded(bufferIndex, stride); + if (!range.buffer) + { + range.buffer = m_bufferCache; + } + + return range; + } + private: class MetalRenderer* m_mtlr; @@ -97,6 +175,7 @@ private: MetalBufferAllocator m_bufferAllocator;//s[2]; //uint8 m_bufferAllocatorIndex = 0; + MetalVertexBufferCache m_vertexBufferCache; MTL::Buffer* m_bufferCache = nullptr; }; diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.cpp b/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.cpp index 5ced9e9f..46918f8f 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.cpp @@ -1,12 +1,14 @@ +#include "Cafe/HW/Latte/Renderer/Metal/MetalCommon.h" #include "Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.h" #include "Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h" -#include "HW/Latte/Core/FetchShader.h" -#include "HW/Latte/ISA/RegDefines.h" #include "HW/Latte/Renderer/Metal/CachedFBOMtl.h" #include "HW/Latte/Renderer/Metal/LatteToMtl.h" #include "HW/Latte/Renderer/Metal/RendererShaderMtl.h" #include "HW/Latte/Renderer/Metal/LatteTextureViewMtl.h" +#include "HW/Latte/Core/FetchShader.h" +#include "HW/Latte/ISA/RegDefines.h" + MetalPipelineCache::~MetalPipelineCache() { for (auto& pair : m_pipelineCache) @@ -59,12 +61,7 @@ MTL::RenderPipelineState* MetalPipelineCache::GetPipelineState(const LatteFetchS uint32 bufferIndex = bufferGroup.attributeBufferIndex; uint32 bufferBaseRegisterIndex = mmSQ_VTX_ATTRIBUTE_BLOCK_START + bufferIndex * 7; uint32 bufferStride = (LatteGPUState.contextNew.GetRawView()[bufferBaseRegisterIndex + 2] >> 11) & 0xFFFF; - - uint32 strideRemainder = bufferStride % 4; - if (strideRemainder != 0) - { - debug_printf("vertex stride must be a multiple of 4, remainder: %u\n", strideRemainder); - } + bufferStride = align(bufferStride, 4); auto layout = vertexDescriptor->layouts()->object(GET_MTL_VERTEX_BUFFER_INDEX(bufferIndex)); layout->setStride(bufferStride); diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp index 5ff58c68..e4764590 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp @@ -6,7 +6,6 @@ #include "Cafe/HW/Latte/Renderer/Metal/CachedFBOMtl.h" #include "Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.h" #include "Cafe/HW/Latte/Renderer/Metal/MetalDepthStencilCache.h" -#include "Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.h" #include "Cafe/HW/Latte/Renderer/Metal/LatteToMtl.h" #include "Cafe/HW/Latte/Renderer/Metal/ShaderSourcePresent.h" @@ -440,11 +439,22 @@ void MetalRenderer::bufferCache_copyStreamoutToMainBuffer(uint32 srcOffset, uint void MetalRenderer::buffer_bindVertexBuffer(uint32 bufferIndex, uint32 offset, uint32 size) { - if (m_state.vertexBuffers[bufferIndex].offset == offset) + cemu_assert_debug(bufferIndex < LATTE_MAX_VERTEX_BUFFERS); + auto& buffer = m_state.vertexBuffers[bufferIndex]; + if (buffer.offset == offset && buffer.size == size) return; - cemu_assert_debug(bufferIndex < LATTE_MAX_VERTEX_BUFFERS); - m_state.vertexBuffers[bufferIndex].needsRebind = true; - m_state.vertexBuffers[bufferIndex].offset = offset; + + if (buffer.offset != INVALID_OFFSET) + { + m_memoryManager->UntrackVertexBuffer(bufferIndex); + } + + buffer.needsRebind = true; + buffer.offset = offset; + buffer.size = size; + buffer.restrideInfo = {}; + + m_memoryManager->TrackVertexBuffer(bufferIndex, offset, size, buffer.restrideInfo); } void MetalRenderer::buffer_bindUniformBuffer(LatteConst::ShaderType shaderType, uint32 bufferIndex, uint32 offset, uint32 size) @@ -598,15 +608,25 @@ void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32 LatteBufferCache_Sync(indexMin + baseVertex, indexMax + baseVertex, baseInstance, instanceCount); // Vertex buffers - for (uint8 i = 0; i < MAX_MTL_BUFFERS; i++) - { - auto& vertexBufferRange = m_state.vertexBuffers[i]; - if (vertexBufferRange.needsRebind) + for (uint8 i = 0; i < MAX_MTL_BUFFERS; i++) + { + auto& vertexBufferRange = m_state.vertexBuffers[i]; + if (vertexBufferRange.offset != INVALID_OFFSET) { - renderCommandEncoder->setVertexBuffer(m_memoryManager->GetBufferCache(), vertexBufferRange.offset, GET_MTL_VERTEX_BUFFER_INDEX(i)); - vertexBufferRange.needsRebind = false; + // Restride + uint32 bufferBaseRegisterIndex = mmSQ_VTX_ATTRIBUTE_BLOCK_START + i * 7; + uint32 bufferStride = (LatteGPUState.contextNew.GetRawView()[bufferBaseRegisterIndex + 2] >> 11) & 0xFFFF; + + auto restridedBuffer = m_memoryManager->RestrideBufferIfNeeded(i, bufferStride); + + // Bind + if (vertexBufferRange.needsRebind) + { + renderCommandEncoder->setVertexBuffer(restridedBuffer.buffer, restridedBuffer.offset, GET_MTL_VERTEX_BUFFER_INDEX(i)); + vertexBufferRange.needsRebind = false; + } } - } + } // Uniform buffers, textures and samplers BindStageResources(renderCommandEncoder, vertexShader); @@ -1186,10 +1206,7 @@ void MetalRenderer::RebindRenderState(MTL::RenderCommandEncoder* renderCommandEn { auto& vertexBufferRange = m_state.vertexBuffers[i]; if (vertexBufferRange.offset != INVALID_OFFSET) - { - renderCommandEncoder->setVertexBuffer(m_memoryManager->GetBufferCache(), vertexBufferRange.offset, GET_MTL_VERTEX_BUFFER_INDEX(i)); - vertexBufferRange.needsRebind = false; - } + vertexBufferRange.needsRebind = true; } } diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h index b6fb30ad..bee581d6 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h @@ -6,6 +6,8 @@ #include "Cafe/HW/Latte/Renderer/Renderer.h" +#include "Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.h" + #define MAX_MTL_BUFFERS 31 #define GET_MTL_VERTEX_BUFFER_INDEX(index) (MAX_MTL_BUFFERS - index - 2) // TODO: don't harcdode the support buffer binding @@ -20,6 +22,9 @@ struct MetalBoundBuffer { bool needsRebind = false; size_t offset = INVALID_OFFSET; + size_t size = 0; + // Memory manager will write restride info to this variable + MetalRestrideInfo restrideInfo; }; struct MetalState