From a3476c7b7c6aa92c63266c7559d0f9e83b8cf8ec Mon Sep 17 00:00:00 2001 From: goeiecool9999 <7033575+goeiecool9999@users.noreply.github.com> Date: Wed, 30 Nov 2022 01:26:42 +0100 Subject: [PATCH] macOS: Workaround for MoltenVK stride limitation (#534) --- src/Cafe/HW/Latte/Core/LatteBufferData.cpp | 14 +++++++++ .../Renderer/Vulkan/VKRMemoryManager.cpp | 2 ++ .../Latte/Renderer/Vulkan/VKRMemoryManager.h | 9 +++++- .../Vulkan/VulkanPipelineCompiler.cpp | 3 +- .../Latte/Renderer/Vulkan/VulkanRenderer.cpp | 30 +++++++++++++++++++ .../HW/Latte/Renderer/Vulkan/VulkanRenderer.h | 2 ++ 6 files changed, 57 insertions(+), 3 deletions(-) diff --git a/src/Cafe/HW/Latte/Core/LatteBufferData.cpp b/src/Cafe/HW/Latte/Core/LatteBufferData.cpp index d864750a..d31a8651 100644 --- a/src/Cafe/HW/Latte/Core/LatteBufferData.cpp +++ b/src/Cafe/HW/Latte/Core/LatteBufferData.cpp @@ -9,6 +9,7 @@ #include "Cafe/GameProfile/GameProfile.h" #include "Cafe/HW/Latte/Core/LatteBufferCache.h" +#include "Cafe/HW/Latte/Renderer/Vulkan/VulkanRenderer.h" template void rectGenerate4thVertex(uint32be* output, uint32be* input0, uint32be* input1, uint32be* input2) @@ -198,6 +199,19 @@ bool LatteBufferCache_Sync(uint32 minIndex, uint32 maxIndex, uint32 baseInstance if (fixedBufferSize == 0 || bufferStride == 0) fixedBufferSize += 128; + +#if BOOST_OS_MACOS + if(bufferStride % 4 != 0) + { + if (VulkanRenderer* vkRenderer = VulkanRenderer::GetInstance()) + { + auto fixedBuffer = vkRenderer->buffer_genStrideWorkaroundVertexBuffer(bufferAddress, fixedBufferSize, bufferStride); + vkRenderer->buffer_bindVertexStrideWorkaroundBuffer(fixedBuffer.first, fixedBuffer.second, bufferIndex, fixedBufferSize); + continue; + } + } +#endif + uint32 bindOffset = LatteBufferCache_retrieveDataInCache(bufferAddress, fixedBufferSize); g_renderer->buffer_bindVertexBuffer(bufferIndex, bindOffset, fixedBufferSize); } diff --git a/src/Cafe/HW/Latte/Renderer/Vulkan/VKRMemoryManager.cpp b/src/Cafe/HW/Latte/Renderer/Vulkan/VKRMemoryManager.cpp index 95a04315..502d84be 100644 --- a/src/Cafe/HW/Latte/Renderer/Vulkan/VKRMemoryManager.cpp +++ b/src/Cafe/HW/Latte/Renderer/Vulkan/VKRMemoryManager.cpp @@ -27,6 +27,8 @@ void VKRSynchronizedRingAllocator::allocateAdditionalUploadBuffer(uint32 sizeReq m_vkrMemMgr->CreateBuffer(bufferAllocSize, VK_BUFFER_USAGE_TRANSFER_SRC_BIT, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, newBuffer.vk_buffer, newBuffer.vk_mem); else if (m_bufferType == BUFFER_TYPE::INDEX) m_vkrMemMgr->CreateBuffer(bufferAllocSize, VK_BUFFER_USAGE_INDEX_BUFFER_BIT, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, newBuffer.vk_buffer, newBuffer.vk_mem); + else if (m_bufferType == BUFFER_TYPE::STRIDE) + m_vkrMemMgr->CreateBuffer(bufferAllocSize, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, newBuffer.vk_buffer, newBuffer.vk_mem); else cemu_assert_debug(false); diff --git a/src/Cafe/HW/Latte/Renderer/Vulkan/VKRMemoryManager.h b/src/Cafe/HW/Latte/Renderer/Vulkan/VKRMemoryManager.h index 47561159..3da6fe4c 100644 --- a/src/Cafe/HW/Latte/Renderer/Vulkan/VKRMemoryManager.h +++ b/src/Cafe/HW/Latte/Renderer/Vulkan/VKRMemoryManager.h @@ -75,6 +75,7 @@ public: { STAGING, // staging upload buffer INDEX, // buffer for index data + STRIDE, // buffer for stride-adjusted vertex data }; VKRSynchronizedRingAllocator(class VulkanRenderer* vkRenderer, class VKRMemoryManager* vkMemoryManager, BUFFER_TYPE bufferType, uint32 minimumBufferAllocSize) : m_vkr(vkRenderer), m_vkrMemMgr(vkMemoryManager), m_bufferType(bufferType), m_minimumBufferAllocSize(minimumBufferAllocSize) {}; @@ -138,7 +139,10 @@ class VKRMemoryManager { friend class VKRSynchronizedRingAllocator; public: - VKRMemoryManager(class VulkanRenderer* renderer) : m_stagingBuffer(renderer, this, VKRSynchronizedRingAllocator::BUFFER_TYPE::STAGING, 32u * 1024 * 1024), m_indexBuffer(renderer, this, VKRSynchronizedRingAllocator::BUFFER_TYPE::INDEX, 4u * 1024 * 1024) + VKRMemoryManager(class VulkanRenderer* renderer) : + m_stagingBuffer(renderer, this, VKRSynchronizedRingAllocator::BUFFER_TYPE::STAGING, 32u * 1024 * 1024), + m_indexBuffer(renderer, this, VKRSynchronizedRingAllocator::BUFFER_TYPE::INDEX, 4u * 1024 * 1024), + m_vertexStrideMetalBuffer(renderer, this, VKRSynchronizedRingAllocator::BUFFER_TYPE::STRIDE, 4u * 1024 * 1024) { m_vkr = renderer; } @@ -164,12 +168,14 @@ public: VKRSynchronizedRingAllocator& getStagingAllocator() { return m_stagingBuffer; }; // allocator for texture/attribute/uniform uploads VKRSynchronizedRingAllocator& getIndexAllocator() { return m_indexBuffer; }; // allocator for index data + VKRSynchronizedRingAllocator& getMetalStrideWorkaroundAllocator() { return m_vertexStrideMetalBuffer; }; // allocator for stride-adjusted vertex data void cleanupBuffers(uint64 latestFinishedCommandBufferId) { LatteIndices_invalidateAll(); m_stagingBuffer.CleanupBuffer(latestFinishedCommandBufferId); m_indexBuffer.CleanupBuffer(latestFinishedCommandBufferId); + m_vertexStrideMetalBuffer.CleanupBuffer(latestFinishedCommandBufferId); } // memory helpers @@ -197,4 +203,5 @@ public: class VulkanRenderer* m_vkr; VKRSynchronizedRingAllocator m_stagingBuffer; VKRSynchronizedRingAllocator m_indexBuffer; + VKRSynchronizedRingAllocator m_vertexStrideMetalBuffer; }; diff --git a/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanPipelineCompiler.cpp b/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanPipelineCompiler.cpp index 6e433466..fde113bb 100644 --- a/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanPipelineCompiler.cpp +++ b/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanPipelineCompiler.cpp @@ -459,8 +459,7 @@ void PipelineCompiler::InitVertexInputState(const LatteContextRegister& latteReg VkVertexInputBindingDescription entry{}; #if BOOST_OS_MACOS if (bufferStride % 4 != 0) { - forceLog_printf("MoltenVK error: vertex stride was %d, expected multiple of 4", bufferStride); - bufferStride = 0; + bufferStride = bufferStride + (4-(bufferStride % 4)); } #endif entry.stride = bufferStride; diff --git a/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRenderer.cpp b/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRenderer.cpp index f1f5afdd..d5e0718b 100644 --- a/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRenderer.cpp +++ b/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRenderer.cpp @@ -3458,6 +3458,36 @@ void VulkanRenderer::buffer_bindVertexBuffer(uint32 bufferIndex, uint32 offset, vkCmdBindVertexBuffers(m_state.currentCommandBuffer, bufferIndex, 1, &attrBuffer, &attrOffset); } +void VulkanRenderer::buffer_bindVertexStrideWorkaroundBuffer(VkBuffer fixedBuffer, uint32 offset, uint32 bufferIndex, uint32 size) +{ + cemu_assert_debug(bufferIndex < LATTE_MAX_VERTEX_BUFFERS); + m_state.currentVertexBinding[bufferIndex].offset = 0xFFFFFFFF; + VkBuffer attrBuffer = fixedBuffer; + VkDeviceSize attrOffset = offset; + vkCmdBindVertexBuffers(m_state.currentCommandBuffer, bufferIndex, 1, &attrBuffer, &attrOffset); +} + +std::pair VulkanRenderer::buffer_genStrideWorkaroundVertexBuffer(MPTR buffer, uint32 size, uint32 oldStride) +{ + cemu_assert_debug(oldStride % 4 != 0); + + std::span old_buffer{memory_getPointerFromPhysicalOffset(buffer), size}; + + //new stride is the nearest multiple of 4 + uint32 newStride = oldStride + (4-(oldStride % 4)); + uint32 newSize = size / oldStride * newStride; + + auto new_buffer_alloc = memoryManager->getMetalStrideWorkaroundAllocator().AllocateBufferMemory(newSize, 128); + + std::span new_buffer{new_buffer_alloc.memPtr, new_buffer_alloc.size}; + + for(size_t elem = 0; elem < size / oldStride; elem++) + { + memcpy(&new_buffer[elem * newStride], &old_buffer[elem * oldStride], oldStride); + } + return {new_buffer_alloc.vkBuffer, new_buffer_alloc.bufferOffset}; +} + void VulkanRenderer::buffer_bindUniformBuffer(LatteConst::ShaderType shaderType, uint32 bufferIndex, uint32 offset, uint32 size) { cemu_assert_debug(!m_useHostMemoryForCache); diff --git a/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRenderer.h b/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRenderer.h index 2dd8735a..84b74b99 100644 --- a/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRenderer.h +++ b/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRenderer.h @@ -342,6 +342,8 @@ public: void bufferCache_copy(uint32 srcOffset, uint32 dstOffset, uint32 size) override; void buffer_bindVertexBuffer(uint32 bufferIndex, uint32 buffer, uint32 size) override; + void buffer_bindVertexStrideWorkaroundBuffer(VkBuffer fixedBuffer, uint32 offset, uint32 bufferIndex, uint32 size); + std::pair buffer_genStrideWorkaroundVertexBuffer(MPTR buffer, uint32 size, uint32 oldStride); void buffer_bindUniformBuffer(LatteConst::ShaderType shaderType, uint32 bufferIndex, uint32 offset, uint32 size) override; RendererShader* shader_create(RendererShader::ShaderType type, uint64 baseHash, uint64 auxHash, const std::string& source, bool isGameShader, bool isGfxPackShader) override;