From 485a652c85660d830dc6ac9e360322b1139e540d Mon Sep 17 00:00:00 2001 From: Samuliak Date: Sun, 18 Aug 2024 14:37:39 +0200 Subject: [PATCH] use managed storage mode when dedicated memory --- src/Cafe/HW/Latte/Core/LatteIndices.cpp | 2 +- .../Latte/Renderer/Metal/LatteTextureMtl.cpp | 2 +- .../Renderer/Metal/MetalBufferAllocator.h | 8 +++-- .../Renderer/Metal/MetalMemoryManager.cpp | 4 ++- .../Latte/Renderer/Metal/MetalMemoryManager.h | 3 +- .../HW/Latte/Renderer/Metal/MetalRenderer.cpp | 19 ++++++++--- .../HW/Latte/Renderer/Metal/MetalRenderer.h | 32 +++++++++++++++---- .../HW/Latte/Renderer/OpenGL/OpenGLRenderer.h | 6 ++-- src/Cafe/HW/Latte/Renderer/Renderer.h | 2 +- .../HW/Latte/Renderer/Vulkan/VulkanRenderer.h | 2 +- .../Renderer/Vulkan/VulkanRendererCore.cpp | 20 ++++++------ 11 files changed, 67 insertions(+), 33 deletions(-) diff --git a/src/Cafe/HW/Latte/Core/LatteIndices.cpp b/src/Cafe/HW/Latte/Core/LatteIndices.cpp index 0f12356b..dc6408f9 100644 --- a/src/Cafe/HW/Latte/Core/LatteIndices.cpp +++ b/src/Cafe/HW/Latte/Core/LatteIndices.cpp @@ -769,7 +769,7 @@ void LatteIndices_decode(const void* indexData, LatteIndexType indexType, uint32 // recalculate index range but filter out primitive restart index LatteIndices_alternativeCalculateIndexMinMax(indexData, indexType, count, indexMin, indexMax); } - g_renderer->indexData_uploadIndexMemory(indexBufferOffset, indexOutputSize); + g_renderer->indexData_uploadIndexMemory(indexBufferIndex, indexBufferOffset, indexOutputSize); // update cache LatteIndexCache.lastPtr = indexData; LatteIndexCache.lastCount = count; diff --git a/src/Cafe/HW/Latte/Renderer/Metal/LatteTextureMtl.cpp b/src/Cafe/HW/Latte/Renderer/Metal/LatteTextureMtl.cpp index 645973df..c1508085 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/LatteTextureMtl.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/LatteTextureMtl.cpp @@ -9,7 +9,7 @@ LatteTextureMtl::LatteTextureMtl(class MetalRenderer* mtlRenderer, Latte::E_DIM : LatteTexture(dim, physAddress, physMipAddress, format, width, height, depth, pitch, mipLevels, swizzle, tileMode, isDepth), m_mtlr(mtlRenderer), m_format(format), m_isDepth(isDepth) { MTL::TextureDescriptor* desc = MTL::TextureDescriptor::alloc()->init(); - desc->setStorageMode(MTL::StorageModeShared); // TODO: use private? + desc->setStorageMode(m_mtlr->GetOptimalStorageMode()); sint32 effectiveBaseWidth = width; sint32 effectiveBaseHeight = height; diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalBufferAllocator.h b/src/Cafe/HW/Latte/Renderer/Metal/MetalBufferAllocator.h index 20467e65..a7e54e1e 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalBufferAllocator.h +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalBufferAllocator.h @@ -1,6 +1,7 @@ #pragma once #include "Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h" +#include "Metal/MTLResource.hpp" struct MetalBufferRange { @@ -13,7 +14,7 @@ template class MetalBufferAllocator { public: - MetalBufferAllocator(class MetalRenderer* metalRenderer) : m_mtlr{metalRenderer} {} + MetalBufferAllocator(class MetalRenderer* metalRenderer, MTL::ResourceOptions storageMode) : m_mtlr{metalRenderer}, m_storageMode{storageMode} {} ~MetalBufferAllocator() { @@ -68,7 +69,7 @@ public: // If no free range was found, allocate a new buffer m_allocationSize = std::max(m_allocationSize, size); - MTL::Buffer* buffer = m_mtlr->GetDevice()->newBuffer(m_allocationSize, MTL::ResourceStorageModeShared); + MTL::Buffer* buffer = m_mtlr->GetDevice()->newBuffer(m_allocationSize, m_storageMode); #ifdef CEMU_DEBUG_ASSERT buffer->setLabel(GetLabel("Buffer from buffer allocator", buffer)); #endif @@ -124,6 +125,7 @@ public: protected: class MetalRenderer* m_mtlr; + MTL::ResourceOptions m_storageMode; size_t m_allocationSize = 8 * 1024 * 1024; @@ -147,7 +149,7 @@ struct MetalSyncedBuffer class MetalTemporaryBufferAllocator : public MetalBufferAllocator { public: - MetalTemporaryBufferAllocator(class MetalRenderer* metalRenderer) : MetalBufferAllocator(metalRenderer) {} + MetalTemporaryBufferAllocator(class MetalRenderer* metalRenderer) : MetalBufferAllocator(metalRenderer, metalRenderer->GetOptimalResourceStorageMode()) {} void SetActiveCommandBuffer(MTL::CommandBuffer* commandBuffer) { diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.cpp b/src/Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.cpp index c102bcf2..534b9831 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.cpp @@ -132,7 +132,7 @@ void MetalMemoryManager::InitBufferCache(size_t size) return; } - m_bufferCache = m_mtlr->GetDevice()->newBuffer(size, MTL::ResourceStorageModeShared); + m_bufferCache = m_mtlr->GetDevice()->newBuffer(size, m_mtlr->GetOptimalResourceStorageMode()); #ifdef CEMU_DEBUG_ASSERT m_bufferCache->setLabel(GetLabel("Buffer cache", m_bufferCache)); #endif @@ -152,6 +152,8 @@ void MetalMemoryManager::UploadToBufferCache(const void* data, size_t offset, si } memcpy((uint8*)m_bufferCache->contents() + offset, data, size); + if (!m_mtlr->HasUnifiedMemory()) + m_bufferCache->didModifyRange(NS::Range(offset, size)); // Notify vertex buffer cache about the change m_vertexBufferCache.MemoryRangeChanged(offset, size); diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.h b/src/Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.h index cc89f5ce..5abc7c62 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.h +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.h @@ -1,6 +1,7 @@ #pragma once #include "Cafe/HW/Latte/Renderer/Metal/MetalBufferAllocator.h" +#include "Metal/MTLResource.hpp" struct MetalRestridedBufferRange { @@ -57,7 +58,7 @@ private: class MetalMemoryManager { public: - MetalMemoryManager(class MetalRenderer* metalRenderer) : m_mtlr{metalRenderer}, m_bufferAllocator(metalRenderer), m_framePersistentBufferAllocator(metalRenderer), m_tempBufferAllocator(metalRenderer), m_vertexBufferCache(metalRenderer, m_framePersistentBufferAllocator) {} + MetalMemoryManager(class MetalRenderer* metalRenderer) : m_mtlr{metalRenderer}, m_bufferAllocator(metalRenderer, m_mtlr->GetOptimalResourceStorageMode()), m_framePersistentBufferAllocator(metalRenderer, MTL::ResourceStorageModePrivate), m_tempBufferAllocator(metalRenderer), m_vertexBufferCache(metalRenderer, m_framePersistentBufferAllocator) {} ~MetalMemoryManager(); // Pipelines diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp index 46b5cfd0..7c6c6a2d 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp @@ -18,6 +18,7 @@ #include "Cafe/HW/Latte/Core/LatteIndices.h" #include "Cemu/Logging/CemuDebugLogging.h" #include "Common/precompiled.h" +#include "Metal/MTLResource.hpp" #include "gui/guiWrapper.h" #define COMMIT_TRESHOLD 256 @@ -31,6 +32,9 @@ MetalRenderer::MetalRenderer() m_device = MTL::CreateSystemDefaultDevice(); m_commandQueue = m_device->newCommandQueue(); + // Feature support + m_hasUnifiedMemory = m_device->hasUnifiedMemory(); + // Resources MTL::SamplerDescriptor* samplerDescriptor = MTL::SamplerDescriptor::alloc()->init(); #ifdef CEMU_DEBUG_ASSERT @@ -75,7 +79,7 @@ MetalRenderer::MetalRenderer() #endif // Transform feedback - m_xfbRingBuffer = m_device->newBuffer(LatteStreamout_GetRingBufferSize(), MTL::StorageModeShared); + m_xfbRingBuffer = m_device->newBuffer(LatteStreamout_GetRingBufferSize(), MTL::ResourceStorageModePrivate); #ifdef CEMU_DEBUG_ASSERT m_xfbRingBuffer->setLabel(GetLabel("Transform feedback buffer", m_xfbRingBuffer)); #endif @@ -991,9 +995,11 @@ void* MetalRenderer::indexData_reserveIndexMemory(uint32 size, uint32& offset, u return allocation.data; } -void MetalRenderer::indexData_uploadIndexMemory(uint32 offset, uint32 size) +void MetalRenderer::indexData_uploadIndexMemory(uint32 bufferIndex, uint32 offset, uint32 size) { - // Do nothing, since the buffer has shared storage mode + auto buffer = m_memoryManager->GetTemporaryBufferAllocator().GetBuffer(bufferIndex); + if (!HasUnifiedMemory()) + buffer->didModifyRange(NS::Range(offset, size)); } MTL::CommandBuffer* MetalRenderer::GetCommandBuffer() @@ -1495,18 +1501,21 @@ void MetalRenderer::BindStageResources(MTL::RenderCommandEncoder* renderCommandE size_t size = shader->uniform.uniformRangeSize; auto supportBuffer = bufferAllocator.GetBufferAllocation(size); memcpy(supportBuffer.data, supportBufferData, size); + auto buffer = bufferAllocator.GetBuffer(supportBuffer.bufferIndex); + if (!HasUnifiedMemory()) + buffer->didModifyRange(NS::Range(supportBuffer.offset, size)); switch (shader->shaderType) { case LatteConst::ShaderType::Vertex: { - renderCommandEncoder->setVertexBuffer(bufferAllocator.GetBuffer(supportBuffer.bufferIndex), supportBuffer.offset, MTL_SUPPORT_BUFFER_BINDING); + renderCommandEncoder->setVertexBuffer(buffer, supportBuffer.offset, MTL_SUPPORT_BUFFER_BINDING); //renderCommandEncoder->setVertexBytes(supportBufferData, sizeof(supportBufferData), MTL_SUPPORT_BUFFER_BINDING); break; } case LatteConst::ShaderType::Pixel: { - renderCommandEncoder->setFragmentBuffer(bufferAllocator.GetBuffer(supportBuffer.bufferIndex), supportBuffer.offset, MTL_SUPPORT_BUFFER_BINDING); + renderCommandEncoder->setFragmentBuffer(buffer, supportBuffer.offset, MTL_SUPPORT_BUFFER_BINDING); //renderCommandEncoder->setFragmentBytes(supportBufferData, sizeof(supportBufferData), MTL_SUPPORT_BUFFER_BINDING); break; } diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h index 8d63f6da..c3eb9ab7 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h @@ -7,6 +7,7 @@ #include "Cafe/HW/Latte/Renderer/Renderer.h" #include "Cafe/HW/Latte/Renderer/Metal/MetalCommon.h" +#include "Metal/MTLResource.hpp" struct MetalBufferAllocation { @@ -269,7 +270,7 @@ public: // index void* indexData_reserveIndexMemory(uint32 size, uint32& offset, uint32& bufferIndex) override; - void indexData_uploadIndexMemory(uint32 offset, uint32 size) override; + void indexData_uploadIndexMemory(uint32 bufferIndex, uint32 offset, uint32 size) override; // occlusion queries LatteQueryObject* occlusionQuery_create() override { @@ -348,7 +349,22 @@ public: void ClearColorTextureInternal(MTL::Texture* mtlTexture, sint32 sliceIndex, sint32 mipIndex, float r, float g, float b, float a); // Getters - MTL::Buffer* GetTextureReadbackBuffer() + bool HasUnifiedMemory() const + { + return m_hasUnifiedMemory; + } + + MTL::StorageMode GetOptimalStorageMode() const + { + return (m_hasUnifiedMemory ? MTL::StorageModeShared : MTL::StorageModeManaged); + } + + MTL::ResourceOptions GetOptimalResourceStorageMode() const + { + return (m_hasUnifiedMemory ? MTL::ResourceStorageModeShared : MTL::ResourceStorageModeManaged); + } + + MTL::Buffer* GetTextureReadbackBuffer() const { return m_readbackBuffer; } @@ -357,15 +373,19 @@ private: CA::MetalLayer* m_metalLayer; float m_layerScaleX, m_layerScaleY; + // Metal objects + MTL::Device* m_device; + MTL::CommandQueue* m_commandQueue; + + // Feature support + bool m_hasUnifiedMemory; + + // Managers and caches class MetalMemoryManager* m_memoryManager; class MetalPipelineCache* m_pipelineCache; class MetalDepthStencilCache* m_depthStencilCache; class MetalSamplerCache* m_samplerCache; - // Metal objects - MTL::Device* m_device; - MTL::CommandQueue* m_commandQueue; - // Pipelines MTL::RenderPipelineState* m_presentPipelineLinear; MTL::RenderPipelineState* m_presentPipelineSRGB; diff --git a/src/Cafe/HW/Latte/Renderer/OpenGL/OpenGLRenderer.h b/src/Cafe/HW/Latte/Renderer/OpenGL/OpenGLRenderer.h index 313ea3c0..196403b4 100644 --- a/src/Cafe/HW/Latte/Renderer/OpenGL/OpenGLRenderer.h +++ b/src/Cafe/HW/Latte/Renderer/OpenGL/OpenGLRenderer.h @@ -109,7 +109,7 @@ public: return nullptr; } - void indexData_uploadIndexMemory(uint32 offset, uint32 size) override + void indexData_uploadIndexMemory(uint32 bufferIndex, uint32 offset, uint32 size) override { assert_dbg(); } @@ -243,11 +243,11 @@ private: // occlusion queries std::vector list_queryCacheOcclusion; // cache for unused queries - // resource garbage collection + // resource garbage collection struct BufferCacheReleaseQueueEntry { BufferCacheReleaseQueueEntry(VirtualBufferHeap_t* heap, VirtualBufferHeapEntry_t* entry) : m_heap(heap), m_entry(entry) {}; - + void free() { virtualBufferHeap_free(m_heap, m_entry); diff --git a/src/Cafe/HW/Latte/Renderer/Renderer.h b/src/Cafe/HW/Latte/Renderer/Renderer.h index 7bd143d0..1dba52c8 100644 --- a/src/Cafe/HW/Latte/Renderer/Renderer.h +++ b/src/Cafe/HW/Latte/Renderer/Renderer.h @@ -141,7 +141,7 @@ public: // index virtual void* indexData_reserveIndexMemory(uint32 size, uint32& offset, uint32& bufferIndex) = 0; - virtual void indexData_uploadIndexMemory(uint32 offset, uint32 size) = 0; + virtual void indexData_uploadIndexMemory(uint32 bufferIndex, uint32 offset, uint32 size) = 0; // occlusion queries virtual LatteQueryObject* occlusionQuery_create() = 0; diff --git a/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRenderer.h b/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRenderer.h index e4b4cbf9..2b819e15 100644 --- a/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRenderer.h +++ b/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRenderer.h @@ -328,7 +328,7 @@ public: RendererShader* shader_create(RendererShader::ShaderType type, uint64 baseHash, uint64 auxHash, const std::string& source, bool isGameShader, bool isGfxPackShader) override; void* indexData_reserveIndexMemory(uint32 size, uint32& offset, uint32& bufferIndex) override; - void indexData_uploadIndexMemory(uint32 offset, uint32 size) override; + void indexData_uploadIndexMemory(uint32 bufferIndex, uint32 offset, uint32 size) override; // externally callable void GetTextureFormatInfoVK(Latte::E_GX2SURFFMT format, bool isDepth, Latte::E_DIM dim, sint32 width, sint32 height, FormatInfoVK* formatInfoOut); diff --git a/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRendererCore.cpp b/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRendererCore.cpp index 6500f7d3..d41022ac 100644 --- a/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRendererCore.cpp +++ b/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRendererCore.cpp @@ -60,7 +60,7 @@ uint64 VulkanRenderer::draw_calculateGraphicsPipelineHash(const LatteFetchShader uint64 stateHash; stateHash = draw_calculateMinimalGraphicsPipelineHash(fetchShader, lcr); stateHash = (stateHash >> 8) + (stateHash * 0x370531ull) % 0x7F980D3BF9B4639Dull; - + uint32* ctxRegister = lcr.GetRawView(); if (vertexShader) @@ -103,7 +103,7 @@ uint64 VulkanRenderer::draw_calculateGraphicsPipelineHash(const LatteFetchShader } stateHash += renderPassObj->m_hashForPipeline; - + uint32 depthControl = ctxRegister[Latte::REGADDR::DB_DEPTH_CONTROL]; bool stencilTestEnable = depthControl & 1; if (stencilTestEnable) @@ -111,7 +111,7 @@ uint64 VulkanRenderer::draw_calculateGraphicsPipelineHash(const LatteFetchShader stateHash += ctxRegister[mmDB_STENCILREFMASK]; stateHash = std::rotl(stateHash, 17); if(depthControl & (1<<7)) // back stencil enable - { + { stateHash += ctxRegister[mmDB_STENCILREFMASK_BF]; stateHash = std::rotl(stateHash, 13); } @@ -302,7 +302,7 @@ PipelineInfo* VulkanRenderer::draw_createGraphicsPipeline(uint32 indexCount) pipelineCompiler->TrackAsCached(vsBaseHash, pipelineHash); // use heuristics based on parameter patterns to determine if the current drawcall is essential (non-skipable) - bool allowAsyncCompile = false; + bool allowAsyncCompile = false; if (GetConfig().async_compile) allowAsyncCompile = IsAsyncPipelineAllowed(indexCount); @@ -366,7 +366,7 @@ void* VulkanRenderer::indexData_reserveIndexMemory(uint32 size, uint32& offset, return resv.memPtr; } -void VulkanRenderer::indexData_uploadIndexMemory(uint32 offset, uint32 size) +void VulkanRenderer::indexData_uploadIndexMemory(uint32 bufferIndex, uint32 offset, uint32 size) { // does nothing since the index buffer memory is coherent } @@ -701,8 +701,8 @@ VkDescriptorSetInfo* VulkanRenderer::draw_getOrCreateDescriptorSet(PipelineInfo* LatteTexture* baseTexture = textureView->baseTexture; // get texture register word 0 uint32 word4 = LatteGPUState.contextRegister[texUnitRegIndex + 4]; - - auto imageViewObj = textureView->GetSamplerView(word4); + + auto imageViewObj = textureView->GetSamplerView(word4); info.imageView = imageViewObj->m_textureImageView; vkObjDS->addRef(imageViewObj); @@ -772,7 +772,7 @@ VkDescriptorSetInfo* VulkanRenderer::draw_getOrCreateDescriptorSet(PipelineInfo* VK_SAMPLER_ADDRESS_MODE_REPEAT, // WRAP VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT, // MIRROR VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, // CLAMP_LAST_TEXEL - VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE, // MIRROR_ONCE_LAST_TEXEL + VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE, // MIRROR_ONCE_LAST_TEXEL VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, // unsupported HALF_BORDER VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER, // unsupported MIRROR_ONCE_HALF_BORDER VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER, // CLAMP_BORDER @@ -900,7 +900,7 @@ VkDescriptorSetInfo* VulkanRenderer::draw_getOrCreateDescriptorSet(PipelineInfo* uniformVarsBufferInfo.buffer = m_uniformVarBuffer; uniformVarsBufferInfo.offset = 0; // fixed offset is always zero since we only use dynamic offsets uniformVarsBufferInfo.range = shader->uniform.uniformRangeSize; - + VkWriteDescriptorSet write_descriptor{}; write_descriptor.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; write_descriptor.dstSet = result; @@ -1211,7 +1211,7 @@ void VulkanRenderer::draw_setRenderPass() draw_endRenderPass(); if (m_state.descriptorSetsChanged) sync_inputTexturesChanged(); - + // assume that FBO changed, update self-dependency state m_state.hasRenderSelfDependency = fboVk->CheckForCollision(m_state.activeVertexDS, m_state.activeGeometryDS, m_state.activePixelDS);