diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.cpp b/src/Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.cpp index f0c12217..ef55e96d 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.cpp @@ -3,6 +3,7 @@ #include "Cafe/HW/Latte/Renderer/Metal/MetalHybridComputePipeline.h" #include "Common/precompiled.h" #include "HW/Latte/Renderer/Metal/MetalRenderer.h" +#include "Metal/MTLResource.hpp" MetalVertexBufferCache::~MetalVertexBufferCache() { @@ -115,13 +116,9 @@ void* MetalMemoryManager::GetTextureUploadBuffer(size_t size) void MetalMemoryManager::InitBufferCache(size_t size) { - if (m_bufferCache) - { - debug_printf("MetalMemoryManager::InitBufferCache: buffer cache already initialized\n"); - return; - } + cemu_assert_debug(!m_bufferCache); - m_bufferCache = m_mtlr->GetDevice()->newBuffer(size, m_mtlr->GetOptimalBufferStorageMode() | MTL::ResourceCPUCacheModeWriteCombined); + m_bufferCache = m_mtlr->GetDevice()->newBuffer(size, MTL::ResourceStorageModePrivate); #ifdef CEMU_DEBUG_ASSERT m_bufferCache->setLabel(GetLabel("Buffer cache", m_bufferCache)); #endif @@ -129,20 +126,23 @@ void MetalMemoryManager::InitBufferCache(size_t size) void MetalMemoryManager::UploadToBufferCache(const void* data, size_t offset, size_t size) { - if (!m_bufferCache) - { - debug_printf("MetalMemoryManager::UploadToBufferCache: buffer cache not initialized\n"); - return; - } + cemu_assert_debug(m_bufferCache); + cemu_assert_debug((offset + size) <= m_bufferCache->length()); - if ((offset + size) > m_bufferCache->length()) - { - debug_printf("MetalMemoryManager::UploadToBufferCache: out of bounds access (offset: %zu, size: %zu, buffer size: %zu)\n", offset, size, m_bufferCache->length()); - } + auto allocation = m_tempBufferAllocator.GetBufferAllocation(size); + auto buffer = m_tempBufferAllocator.GetBufferOutsideOfCommandBuffer(allocation.bufferIndex); + memcpy((uint8*)buffer->contents() + allocation.offset, data, size); - memcpy((uint8*)m_bufferCache->contents() + offset, data, size); - if (!m_mtlr->HasUnifiedMemory()) - m_bufferCache->didModifyRange(NS::Range(offset, size)); + // Lock the buffer to make sure it's not deallocated before the copy is done + m_tempBufferAllocator.LockBuffer(allocation.bufferIndex); + + m_mtlr->CopyBufferToBuffer(buffer, allocation.offset, m_bufferCache, offset, size); + + // Make sure the buffer has the right command buffer + m_tempBufferAllocator.GetBuffer(allocation.bufferIndex); // TODO: make a helper function for this + + // We can now safely unlock the buffer + m_tempBufferAllocator.UnlockBuffer(allocation.bufferIndex); // Notify vertex buffer cache about the change m_vertexBufferCache.MemoryRangeChanged(offset, size); @@ -150,11 +150,7 @@ void MetalMemoryManager::UploadToBufferCache(const void* data, size_t offset, si void MetalMemoryManager::CopyBufferCache(size_t srcOffset, size_t dstOffset, size_t size) { - if (!m_bufferCache) - { - debug_printf("MetalMemoryManager::CopyBufferCache: buffer cache not initialized\n"); - return; - } + cemu_assert_debug(m_bufferCache); - memcpy((uint8*)m_bufferCache->contents() + dstOffset, (uint8*)m_bufferCache->contents() + srcOffset, size); + m_mtlr->CopyBufferToBuffer(m_bufferCache, srcOffset, m_bufferCache, dstOffset, size); } diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp index af251a5d..a755ba31 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp @@ -22,11 +22,6 @@ #include "HW/Latte/Renderer/Metal/MetalCommon.h" #include "HW/Latte/Renderer/Metal/MetalLayerHandle.h" #include "HW/Latte/Renderer/Renderer.h" -#include "Metal/MTLCommandBuffer.hpp" -#include "Metal/MTLDevice.hpp" -#include "Metal/MTLRenderCommandEncoder.hpp" -#include "Metal/MTLRenderPass.hpp" -#include "Metal/MTLRenderPipeline.hpp" #include "imgui.h" #include @@ -780,31 +775,7 @@ void MetalRenderer::bufferCache_copy(uint32 srcOffset, uint32 dstOffset, uint32 void MetalRenderer::bufferCache_copyStreamoutToMainBuffer(uint32 srcOffset, uint32 dstOffset, uint32 size) { - // Do the copy in a vertex shader on Apple GPUs - if (m_isAppleGPU && m_encoderType == MetalEncoderType::Render) - { - auto renderCommandEncoder = static_cast(m_commandEncoder); - - MTL::Resource* barrierBuffers[] = {m_xfbRingBuffer}; - renderCommandEncoder->memoryBarrier(barrierBuffers, 1, MTL::RenderStageVertex | MTL::RenderStageFragment | MTL::RenderStageObject | MTL::RenderStageMesh, MTL::RenderStageVertex); - - renderCommandEncoder->setRenderPipelineState(m_copyBufferToBufferPipeline->GetRenderPipelineState()); - m_state.m_encoderState.m_renderPipelineState = m_copyBufferToBufferPipeline->GetRenderPipelineState(); - - SetBuffer(renderCommandEncoder, METAL_SHADER_TYPE_VERTEX, m_xfbRingBuffer, srcOffset, GET_HELPER_BUFFER_BINDING(0)); - SetBuffer(renderCommandEncoder, METAL_SHADER_TYPE_VERTEX, m_memoryManager->GetBufferCache(), dstOffset, GET_HELPER_BUFFER_BINDING(1)); - - renderCommandEncoder->drawPrimitives(MTL::PrimitiveTypePoint, NS::UInteger(0), NS::UInteger(size)); - - barrierBuffers[0] = m_memoryManager->GetBufferCache(); - renderCommandEncoder->memoryBarrier(barrierBuffers, 1, MTL::RenderStageVertex, MTL::RenderStageVertex | MTL::RenderStageFragment | MTL::RenderStageObject | MTL::RenderStageMesh); - } - else - { - auto blitCommandEncoder = GetBlitCommandEncoder(); - - blitCommandEncoder->copyFromBuffer(m_xfbRingBuffer, srcOffset, m_memoryManager->GetBufferCache(), dstOffset, size); - } + CopyBufferToBuffer(m_xfbRingBuffer, srcOffset, m_memoryManager->GetBufferCache(), dstOffset, size); } void MetalRenderer::buffer_bindVertexBuffer(uint32 bufferIndex, uint32 offset, uint32 size) @@ -945,9 +916,6 @@ void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32 if (endRenderPass) EndEncoding(); - // Render pass - auto renderCommandEncoder = GetRenderCommandEncoder(); - // Primitive type const LattePrimitiveMode primitiveMode = static_cast(LatteGPUState.contextRegister[mmVGT_PRIMITIVE_TYPE]); auto mtlPrimitiveType = GetMtlPrimitiveType(primitiveMode); @@ -955,6 +923,22 @@ void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32 bool usesGeometryShader = (geometryShader != nullptr || isPrimitiveRect); + // Index buffer + Renderer::INDEX_TYPE hostIndexType; + uint32 hostIndexCount; + uint32 indexMin = 0; + uint32 indexMax = 0; + uint32 indexBufferOffset = 0; + uint32 indexBufferIndex = 0; + LatteIndices_decode(memory_getPointerFromVirtualOffset(indexDataMPTR), indexType, count, primitiveMode, indexMin, indexMax, hostIndexType, hostIndexCount, indexBufferOffset, indexBufferIndex); + + // synchronize vertex and uniform cache and update buffer bindings + // We need to call this before getting the render command encoder, since it can cause buffer copies + LatteBufferCache_Sync(indexMin + baseVertex, indexMax + baseVertex, baseInstance, instanceCount); + + // Render pass + auto renderCommandEncoder = GetRenderCommandEncoder(); + // Depth stencil state // Disable depth write when there is no depth attachment @@ -1120,18 +1104,6 @@ void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32 // Resources - // Index buffer - Renderer::INDEX_TYPE hostIndexType; - uint32 hostIndexCount; - uint32 indexMin = 0; - uint32 indexMax = 0; - uint32 indexBufferOffset = 0; - uint32 indexBufferIndex = 0; - LatteIndices_decode(memory_getPointerFromVirtualOffset(indexDataMPTR), indexType, count, primitiveMode, indexMin, indexMax, hostIndexType, hostIndexCount, indexBufferOffset, indexBufferIndex); - - // synchronize vertex and uniform cache and update buffer bindings - LatteBufferCache_Sync(indexMin + baseVertex, indexMax + baseVertex, baseInstance, instanceCount); - // Vertex buffers for (uint8 i = 0; i < MAX_MTL_BUFFERS; i++) { @@ -1851,6 +1823,37 @@ void MetalRenderer::ClearColorTextureInternal(MTL::Texture* mtlTexture, sint32 s EndEncoding(); } +void MetalRenderer::CopyBufferToBuffer(MTL::Buffer* src, uint32 srcOffset, MTL::Buffer* dst, uint32 dstOffset, uint32 size) +{ + // Do the copy in a vertex shader on Apple GPUs + if (m_isAppleGPU && m_encoderType == MetalEncoderType::Render) + { + auto renderCommandEncoder = static_cast(m_commandEncoder); + + MTL::Resource* barrierBuffers[] = {src}; + // TODO: let the caller choose the stages + renderCommandEncoder->memoryBarrier(barrierBuffers, 1, MTL::RenderStageVertex | MTL::RenderStageFragment | MTL::RenderStageObject | MTL::RenderStageMesh, MTL::RenderStageVertex); + + renderCommandEncoder->setRenderPipelineState(m_copyBufferToBufferPipeline->GetRenderPipelineState()); + m_state.m_encoderState.m_renderPipelineState = m_copyBufferToBufferPipeline->GetRenderPipelineState(); + + SetBuffer(renderCommandEncoder, METAL_SHADER_TYPE_VERTEX, src, srcOffset, GET_HELPER_BUFFER_BINDING(0)); + SetBuffer(renderCommandEncoder, METAL_SHADER_TYPE_VERTEX, dst, dstOffset, GET_HELPER_BUFFER_BINDING(1)); + + renderCommandEncoder->drawPrimitives(MTL::PrimitiveTypePoint, NS::UInteger(0), NS::UInteger(size)); + + barrierBuffers[0] = dst; + // TODO: let the caller choose the stages + renderCommandEncoder->memoryBarrier(barrierBuffers, 1, MTL::RenderStageVertex, MTL::RenderStageVertex | MTL::RenderStageFragment | MTL::RenderStageObject | MTL::RenderStageMesh); + } + else + { + auto blitCommandEncoder = GetBlitCommandEncoder(); + + blitCommandEncoder->copyFromBuffer(src, srcOffset, dst, dstOffset, size); + } +} + void MetalRenderer::SwapBuffer(bool mainWindow) { auto& layer = GetLayer(mainWindow); diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h index f9acb781..e8c15133 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h @@ -376,6 +376,8 @@ public: void ClearColorTextureInternal(MTL::Texture* mtlTexture, sint32 sliceIndex, sint32 mipIndex, float r, float g, float b, float a); + void CopyBufferToBuffer(MTL::Buffer* src, uint32 srcOffset, MTL::Buffer* dst, uint32 dstOffset, uint32 size); + // Getters bool IsAppleGPU() const {