mirror of
https://github.com/cemu-project/Cemu.git
synced 2024-11-22 17:19:18 +01:00
macOS: Workaround for MoltenVK stride limitation (#534)
This commit is contained in:
parent
79e731d9b4
commit
a3476c7b7c
@ -9,6 +9,7 @@
|
|||||||
#include "Cafe/GameProfile/GameProfile.h"
|
#include "Cafe/GameProfile/GameProfile.h"
|
||||||
|
|
||||||
#include "Cafe/HW/Latte/Core/LatteBufferCache.h"
|
#include "Cafe/HW/Latte/Core/LatteBufferCache.h"
|
||||||
|
#include "Cafe/HW/Latte/Renderer/Vulkan/VulkanRenderer.h"
|
||||||
|
|
||||||
template<int vectorLen>
|
template<int vectorLen>
|
||||||
void rectGenerate4thVertex(uint32be* output, uint32be* input0, uint32be* input1, uint32be* input2)
|
void rectGenerate4thVertex(uint32be* output, uint32be* input0, uint32be* input1, uint32be* input2)
|
||||||
@ -198,6 +199,19 @@ bool LatteBufferCache_Sync(uint32 minIndex, uint32 maxIndex, uint32 baseInstance
|
|||||||
if (fixedBufferSize == 0 || bufferStride == 0)
|
if (fixedBufferSize == 0 || bufferStride == 0)
|
||||||
fixedBufferSize += 128;
|
fixedBufferSize += 128;
|
||||||
|
|
||||||
|
|
||||||
|
#if BOOST_OS_MACOS
|
||||||
|
if(bufferStride % 4 != 0)
|
||||||
|
{
|
||||||
|
if (VulkanRenderer* vkRenderer = VulkanRenderer::GetInstance())
|
||||||
|
{
|
||||||
|
auto fixedBuffer = vkRenderer->buffer_genStrideWorkaroundVertexBuffer(bufferAddress, fixedBufferSize, bufferStride);
|
||||||
|
vkRenderer->buffer_bindVertexStrideWorkaroundBuffer(fixedBuffer.first, fixedBuffer.second, bufferIndex, fixedBufferSize);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
uint32 bindOffset = LatteBufferCache_retrieveDataInCache(bufferAddress, fixedBufferSize);
|
uint32 bindOffset = LatteBufferCache_retrieveDataInCache(bufferAddress, fixedBufferSize);
|
||||||
g_renderer->buffer_bindVertexBuffer(bufferIndex, bindOffset, fixedBufferSize);
|
g_renderer->buffer_bindVertexBuffer(bufferIndex, bindOffset, fixedBufferSize);
|
||||||
}
|
}
|
||||||
|
@ -27,6 +27,8 @@ void VKRSynchronizedRingAllocator::allocateAdditionalUploadBuffer(uint32 sizeReq
|
|||||||
m_vkrMemMgr->CreateBuffer(bufferAllocSize, VK_BUFFER_USAGE_TRANSFER_SRC_BIT, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, newBuffer.vk_buffer, newBuffer.vk_mem);
|
m_vkrMemMgr->CreateBuffer(bufferAllocSize, VK_BUFFER_USAGE_TRANSFER_SRC_BIT, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, newBuffer.vk_buffer, newBuffer.vk_mem);
|
||||||
else if (m_bufferType == BUFFER_TYPE::INDEX)
|
else if (m_bufferType == BUFFER_TYPE::INDEX)
|
||||||
m_vkrMemMgr->CreateBuffer(bufferAllocSize, VK_BUFFER_USAGE_INDEX_BUFFER_BIT, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, newBuffer.vk_buffer, newBuffer.vk_mem);
|
m_vkrMemMgr->CreateBuffer(bufferAllocSize, VK_BUFFER_USAGE_INDEX_BUFFER_BIT, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, newBuffer.vk_buffer, newBuffer.vk_mem);
|
||||||
|
else if (m_bufferType == BUFFER_TYPE::STRIDE)
|
||||||
|
m_vkrMemMgr->CreateBuffer(bufferAllocSize, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, newBuffer.vk_buffer, newBuffer.vk_mem);
|
||||||
else
|
else
|
||||||
cemu_assert_debug(false);
|
cemu_assert_debug(false);
|
||||||
|
|
||||||
|
@ -75,6 +75,7 @@ public:
|
|||||||
{
|
{
|
||||||
STAGING, // staging upload buffer
|
STAGING, // staging upload buffer
|
||||||
INDEX, // buffer for index data
|
INDEX, // buffer for index data
|
||||||
|
STRIDE, // buffer for stride-adjusted vertex data
|
||||||
};
|
};
|
||||||
|
|
||||||
VKRSynchronizedRingAllocator(class VulkanRenderer* vkRenderer, class VKRMemoryManager* vkMemoryManager, BUFFER_TYPE bufferType, uint32 minimumBufferAllocSize) : m_vkr(vkRenderer), m_vkrMemMgr(vkMemoryManager), m_bufferType(bufferType), m_minimumBufferAllocSize(minimumBufferAllocSize) {};
|
VKRSynchronizedRingAllocator(class VulkanRenderer* vkRenderer, class VKRMemoryManager* vkMemoryManager, BUFFER_TYPE bufferType, uint32 minimumBufferAllocSize) : m_vkr(vkRenderer), m_vkrMemMgr(vkMemoryManager), m_bufferType(bufferType), m_minimumBufferAllocSize(minimumBufferAllocSize) {};
|
||||||
@ -138,7 +139,10 @@ class VKRMemoryManager
|
|||||||
{
|
{
|
||||||
friend class VKRSynchronizedRingAllocator;
|
friend class VKRSynchronizedRingAllocator;
|
||||||
public:
|
public:
|
||||||
VKRMemoryManager(class VulkanRenderer* renderer) : m_stagingBuffer(renderer, this, VKRSynchronizedRingAllocator::BUFFER_TYPE::STAGING, 32u * 1024 * 1024), m_indexBuffer(renderer, this, VKRSynchronizedRingAllocator::BUFFER_TYPE::INDEX, 4u * 1024 * 1024)
|
VKRMemoryManager(class VulkanRenderer* renderer) :
|
||||||
|
m_stagingBuffer(renderer, this, VKRSynchronizedRingAllocator::BUFFER_TYPE::STAGING, 32u * 1024 * 1024),
|
||||||
|
m_indexBuffer(renderer, this, VKRSynchronizedRingAllocator::BUFFER_TYPE::INDEX, 4u * 1024 * 1024),
|
||||||
|
m_vertexStrideMetalBuffer(renderer, this, VKRSynchronizedRingAllocator::BUFFER_TYPE::STRIDE, 4u * 1024 * 1024)
|
||||||
{
|
{
|
||||||
m_vkr = renderer;
|
m_vkr = renderer;
|
||||||
}
|
}
|
||||||
@ -164,12 +168,14 @@ public:
|
|||||||
|
|
||||||
VKRSynchronizedRingAllocator& getStagingAllocator() { return m_stagingBuffer; }; // allocator for texture/attribute/uniform uploads
|
VKRSynchronizedRingAllocator& getStagingAllocator() { return m_stagingBuffer; }; // allocator for texture/attribute/uniform uploads
|
||||||
VKRSynchronizedRingAllocator& getIndexAllocator() { return m_indexBuffer; }; // allocator for index data
|
VKRSynchronizedRingAllocator& getIndexAllocator() { return m_indexBuffer; }; // allocator for index data
|
||||||
|
VKRSynchronizedRingAllocator& getMetalStrideWorkaroundAllocator() { return m_vertexStrideMetalBuffer; }; // allocator for stride-adjusted vertex data
|
||||||
|
|
||||||
void cleanupBuffers(uint64 latestFinishedCommandBufferId)
|
void cleanupBuffers(uint64 latestFinishedCommandBufferId)
|
||||||
{
|
{
|
||||||
LatteIndices_invalidateAll();
|
LatteIndices_invalidateAll();
|
||||||
m_stagingBuffer.CleanupBuffer(latestFinishedCommandBufferId);
|
m_stagingBuffer.CleanupBuffer(latestFinishedCommandBufferId);
|
||||||
m_indexBuffer.CleanupBuffer(latestFinishedCommandBufferId);
|
m_indexBuffer.CleanupBuffer(latestFinishedCommandBufferId);
|
||||||
|
m_vertexStrideMetalBuffer.CleanupBuffer(latestFinishedCommandBufferId);
|
||||||
}
|
}
|
||||||
|
|
||||||
// memory helpers
|
// memory helpers
|
||||||
@ -197,4 +203,5 @@ public:
|
|||||||
class VulkanRenderer* m_vkr;
|
class VulkanRenderer* m_vkr;
|
||||||
VKRSynchronizedRingAllocator m_stagingBuffer;
|
VKRSynchronizedRingAllocator m_stagingBuffer;
|
||||||
VKRSynchronizedRingAllocator m_indexBuffer;
|
VKRSynchronizedRingAllocator m_indexBuffer;
|
||||||
|
VKRSynchronizedRingAllocator m_vertexStrideMetalBuffer;
|
||||||
};
|
};
|
||||||
|
@ -459,8 +459,7 @@ void PipelineCompiler::InitVertexInputState(const LatteContextRegister& latteReg
|
|||||||
VkVertexInputBindingDescription entry{};
|
VkVertexInputBindingDescription entry{};
|
||||||
#if BOOST_OS_MACOS
|
#if BOOST_OS_MACOS
|
||||||
if (bufferStride % 4 != 0) {
|
if (bufferStride % 4 != 0) {
|
||||||
forceLog_printf("MoltenVK error: vertex stride was %d, expected multiple of 4", bufferStride);
|
bufferStride = bufferStride + (4-(bufferStride % 4));
|
||||||
bufferStride = 0;
|
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
entry.stride = bufferStride;
|
entry.stride = bufferStride;
|
||||||
|
@ -3458,6 +3458,36 @@ void VulkanRenderer::buffer_bindVertexBuffer(uint32 bufferIndex, uint32 offset,
|
|||||||
vkCmdBindVertexBuffers(m_state.currentCommandBuffer, bufferIndex, 1, &attrBuffer, &attrOffset);
|
vkCmdBindVertexBuffers(m_state.currentCommandBuffer, bufferIndex, 1, &attrBuffer, &attrOffset);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void VulkanRenderer::buffer_bindVertexStrideWorkaroundBuffer(VkBuffer fixedBuffer, uint32 offset, uint32 bufferIndex, uint32 size)
|
||||||
|
{
|
||||||
|
cemu_assert_debug(bufferIndex < LATTE_MAX_VERTEX_BUFFERS);
|
||||||
|
m_state.currentVertexBinding[bufferIndex].offset = 0xFFFFFFFF;
|
||||||
|
VkBuffer attrBuffer = fixedBuffer;
|
||||||
|
VkDeviceSize attrOffset = offset;
|
||||||
|
vkCmdBindVertexBuffers(m_state.currentCommandBuffer, bufferIndex, 1, &attrBuffer, &attrOffset);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::pair<VkBuffer, uint32> VulkanRenderer::buffer_genStrideWorkaroundVertexBuffer(MPTR buffer, uint32 size, uint32 oldStride)
|
||||||
|
{
|
||||||
|
cemu_assert_debug(oldStride % 4 != 0);
|
||||||
|
|
||||||
|
std::span<uint8> old_buffer{memory_getPointerFromPhysicalOffset(buffer), size};
|
||||||
|
|
||||||
|
//new stride is the nearest multiple of 4
|
||||||
|
uint32 newStride = oldStride + (4-(oldStride % 4));
|
||||||
|
uint32 newSize = size / oldStride * newStride;
|
||||||
|
|
||||||
|
auto new_buffer_alloc = memoryManager->getMetalStrideWorkaroundAllocator().AllocateBufferMemory(newSize, 128);
|
||||||
|
|
||||||
|
std::span<uint8> new_buffer{new_buffer_alloc.memPtr, new_buffer_alloc.size};
|
||||||
|
|
||||||
|
for(size_t elem = 0; elem < size / oldStride; elem++)
|
||||||
|
{
|
||||||
|
memcpy(&new_buffer[elem * newStride], &old_buffer[elem * oldStride], oldStride);
|
||||||
|
}
|
||||||
|
return {new_buffer_alloc.vkBuffer, new_buffer_alloc.bufferOffset};
|
||||||
|
}
|
||||||
|
|
||||||
void VulkanRenderer::buffer_bindUniformBuffer(LatteConst::ShaderType shaderType, uint32 bufferIndex, uint32 offset, uint32 size)
|
void VulkanRenderer::buffer_bindUniformBuffer(LatteConst::ShaderType shaderType, uint32 bufferIndex, uint32 offset, uint32 size)
|
||||||
{
|
{
|
||||||
cemu_assert_debug(!m_useHostMemoryForCache);
|
cemu_assert_debug(!m_useHostMemoryForCache);
|
||||||
|
@ -342,6 +342,8 @@ public:
|
|||||||
void bufferCache_copy(uint32 srcOffset, uint32 dstOffset, uint32 size) override;
|
void bufferCache_copy(uint32 srcOffset, uint32 dstOffset, uint32 size) override;
|
||||||
|
|
||||||
void buffer_bindVertexBuffer(uint32 bufferIndex, uint32 buffer, uint32 size) override;
|
void buffer_bindVertexBuffer(uint32 bufferIndex, uint32 buffer, uint32 size) override;
|
||||||
|
void buffer_bindVertexStrideWorkaroundBuffer(VkBuffer fixedBuffer, uint32 offset, uint32 bufferIndex, uint32 size);
|
||||||
|
std::pair<VkBuffer, uint32> buffer_genStrideWorkaroundVertexBuffer(MPTR buffer, uint32 size, uint32 oldStride);
|
||||||
void buffer_bindUniformBuffer(LatteConst::ShaderType shaderType, uint32 bufferIndex, uint32 offset, uint32 size) override;
|
void buffer_bindUniformBuffer(LatteConst::ShaderType shaderType, uint32 bufferIndex, uint32 offset, uint32 size) override;
|
||||||
|
|
||||||
RendererShader* shader_create(RendererShader::ShaderType type, uint64 baseHash, uint64 auxHash, const std::string& source, bool isGameShader, bool isGfxPackShader) override;
|
RendererShader* shader_create(RendererShader::ShaderType type, uint64 baseHash, uint64 auxHash, const std::string& source, bool isGameShader, bool isGfxPackShader) override;
|
||||||
|
Loading…
Reference in New Issue
Block a user