diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.cpp b/src/Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.cpp index e406abf6..c2f08532 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.cpp @@ -9,7 +9,7 @@ MetalVertexBufferCache::~MetalVertexBufferCache() { } -MetalRestridedBufferRange MetalVertexBufferCache::RestrideBufferIfNeeded(MTL::Buffer* bufferCache, uint32 bufferIndex, size_t stride) +MetalRestridedBufferRange MetalVertexBufferCache::RestrideBufferIfNeeded(MTL::Buffer* bufferCache, uint32 bufferIndex, size_t stride, std::vector& barrierBuffers) { auto vertexBufferRange = m_bufferRanges[bufferIndex]; auto& restrideInfo = *vertexBufferRange.restrideInfo; @@ -28,14 +28,14 @@ MetalRestridedBufferRange MetalVertexBufferCache::RestrideBufferIfNeeded(MTL::Bu restrideInfo.allocation = m_bufferAllocator.GetBufferAllocation(newSize); buffer = m_bufferAllocator.GetBuffer(restrideInfo.allocation.bufferIndex); - // HACK: the restriding is done on the CPU, since doing it on the GPU was causing over-synchronization + /* uint8* oldPtr = (uint8*)bufferCache->contents() + vertexBufferRange.offset; uint8* newPtr = (uint8*)buffer->contents() + restrideInfo.allocation.offset; for (size_t elem = 0; elem < vertexBufferRange.size / stride; elem++) memcpy(newPtr + elem * newStride, oldPtr + elem * stride, stride); + */ - /* if (m_mtlr->GetEncoderType() == MetalEncoderType::Render) { auto renderCommandEncoder = static_cast(m_mtlr->GetCommandEncoder()); @@ -56,16 +56,12 @@ MetalRestridedBufferRange MetalVertexBufferCache::RestrideBufferIfNeeded(MTL::Bu renderCommandEncoder->drawPrimitives(MTL::PrimitiveTypeTriangleStrip, NS::UInteger(0), vertexBufferRange.size / stride); - // TODO: do the barriers in one call? - MTL::Resource* barrierBuffers[] = {buffer}; - renderCommandEncoder->memoryBarrier(barrierBuffers, 1, MTL::RenderStageVertex, MTL::RenderStageVertex); + vectorAppendUnique(barrierBuffers, static_cast(buffer)); } else { - debug_printf("vertex buffer restride needs an active render command encoder\n"); cemu_assert_suspicious(); } - */ restrideInfo.memoryInvalidated = false; restrideInfo.lastStride = newStride; diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.h b/src/Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.h index 87327c4c..ff74a8ee 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.h +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.h @@ -42,7 +42,7 @@ public: range.offset = INVALID_OFFSET; } - MetalRestridedBufferRange RestrideBufferIfNeeded(MTL::Buffer* bufferCache, uint32 bufferIndex, size_t stride); + MetalRestridedBufferRange RestrideBufferIfNeeded(MTL::Buffer* bufferCache, uint32 bufferIndex, size_t stride, std::vector& barrierBuffers); private: class MetalRenderer* m_mtlr; @@ -105,9 +105,9 @@ public: m_vertexBufferCache.UntrackVertexBuffer(bufferIndex); } - MetalRestridedBufferRange RestrideBufferIfNeeded(uint32 bufferIndex, size_t stride) + MetalRestridedBufferRange RestrideBufferIfNeeded(uint32 bufferIndex, size_t stride, std::vector& barrierBuffers) { - return m_vertexBufferCache.RestrideBufferIfNeeded(m_bufferCache, bufferIndex, stride); + return m_vertexBufferCache.RestrideBufferIfNeeded(m_bufferCache, bufferIndex, stride, barrierBuffers); } private: diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp index 4d10b2b8..589fb20a 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp @@ -1141,6 +1141,7 @@ void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32 // Resources // Vertex buffers + std::vector barrierBuffers; for (uint8 i = 0; i < MAX_MTL_BUFFERS; i++) { auto& vertexBufferRange = m_state.m_vertexBuffers[i]; @@ -1161,7 +1162,7 @@ void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32 uint32 bufferBaseRegisterIndex = mmSQ_VTX_ATTRIBUTE_BLOCK_START + i * 7; uint32 bufferStride = (LatteGPUState.contextNew.GetRawView()[bufferBaseRegisterIndex + 2] >> 11) & 0xFFFF; - auto restridedBuffer = m_memoryManager->RestrideBufferIfNeeded(i, bufferStride); + auto restridedBuffer = m_memoryManager->RestrideBufferIfNeeded(i, bufferStride, barrierBuffers); buffer = restridedBuffer.buffer; offset = restridedBuffer.offset; @@ -1172,6 +1173,11 @@ void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32 } } + if (!barrierBuffers.empty()) + { + renderCommandEncoder->memoryBarrier(barrierBuffers.data(), barrierBuffers.size(), MTL::RenderStageVertex, MTL::RenderStageVertex); + } + // Render pipeline state MTL::RenderPipelineState* renderPipelineState; if (usesGeometryShader)