do vertex restride on the GPU & don't over-sync

This commit is contained in:
Samuliak 2024-09-23 16:47:22 +02:00
parent 8b68df0c59
commit 3cf831d46a
3 changed files with 14 additions and 12 deletions

View File

@ -9,7 +9,7 @@ MetalVertexBufferCache::~MetalVertexBufferCache()
{ {
} }
MetalRestridedBufferRange MetalVertexBufferCache::RestrideBufferIfNeeded(MTL::Buffer* bufferCache, uint32 bufferIndex, size_t stride) MetalRestridedBufferRange MetalVertexBufferCache::RestrideBufferIfNeeded(MTL::Buffer* bufferCache, uint32 bufferIndex, size_t stride, std::vector<MTL::Resource*>& barrierBuffers)
{ {
auto vertexBufferRange = m_bufferRanges[bufferIndex]; auto vertexBufferRange = m_bufferRanges[bufferIndex];
auto& restrideInfo = *vertexBufferRange.restrideInfo; auto& restrideInfo = *vertexBufferRange.restrideInfo;
@ -28,14 +28,14 @@ MetalRestridedBufferRange MetalVertexBufferCache::RestrideBufferIfNeeded(MTL::Bu
restrideInfo.allocation = m_bufferAllocator.GetBufferAllocation(newSize); restrideInfo.allocation = m_bufferAllocator.GetBufferAllocation(newSize);
buffer = m_bufferAllocator.GetBuffer(restrideInfo.allocation.bufferIndex); buffer = m_bufferAllocator.GetBuffer(restrideInfo.allocation.bufferIndex);
// HACK: the restriding is done on the CPU, since doing it on the GPU was causing over-synchronization /*
uint8* oldPtr = (uint8*)bufferCache->contents() + vertexBufferRange.offset; uint8* oldPtr = (uint8*)bufferCache->contents() + vertexBufferRange.offset;
uint8* newPtr = (uint8*)buffer->contents() + restrideInfo.allocation.offset; uint8* newPtr = (uint8*)buffer->contents() + restrideInfo.allocation.offset;
for (size_t elem = 0; elem < vertexBufferRange.size / stride; elem++) for (size_t elem = 0; elem < vertexBufferRange.size / stride; elem++)
memcpy(newPtr + elem * newStride, oldPtr + elem * stride, stride); memcpy(newPtr + elem * newStride, oldPtr + elem * stride, stride);
*/
/*
if (m_mtlr->GetEncoderType() == MetalEncoderType::Render) if (m_mtlr->GetEncoderType() == MetalEncoderType::Render)
{ {
auto renderCommandEncoder = static_cast<MTL::RenderCommandEncoder*>(m_mtlr->GetCommandEncoder()); auto renderCommandEncoder = static_cast<MTL::RenderCommandEncoder*>(m_mtlr->GetCommandEncoder());
@ -56,16 +56,12 @@ MetalRestridedBufferRange MetalVertexBufferCache::RestrideBufferIfNeeded(MTL::Bu
renderCommandEncoder->drawPrimitives(MTL::PrimitiveTypeTriangleStrip, NS::UInteger(0), vertexBufferRange.size / stride); renderCommandEncoder->drawPrimitives(MTL::PrimitiveTypeTriangleStrip, NS::UInteger(0), vertexBufferRange.size / stride);
// TODO: do the barriers in one call? vectorAppendUnique(barrierBuffers, static_cast<MTL::Resource*>(buffer));
MTL::Resource* barrierBuffers[] = {buffer};
renderCommandEncoder->memoryBarrier(barrierBuffers, 1, MTL::RenderStageVertex, MTL::RenderStageVertex);
} }
else else
{ {
debug_printf("vertex buffer restride needs an active render command encoder\n");
cemu_assert_suspicious(); cemu_assert_suspicious();
} }
*/
restrideInfo.memoryInvalidated = false; restrideInfo.memoryInvalidated = false;
restrideInfo.lastStride = newStride; restrideInfo.lastStride = newStride;

View File

@ -42,7 +42,7 @@ public:
range.offset = INVALID_OFFSET; range.offset = INVALID_OFFSET;
} }
MetalRestridedBufferRange RestrideBufferIfNeeded(MTL::Buffer* bufferCache, uint32 bufferIndex, size_t stride); MetalRestridedBufferRange RestrideBufferIfNeeded(MTL::Buffer* bufferCache, uint32 bufferIndex, size_t stride, std::vector<MTL::Resource*>& barrierBuffers);
private: private:
class MetalRenderer* m_mtlr; class MetalRenderer* m_mtlr;
@ -105,9 +105,9 @@ public:
m_vertexBufferCache.UntrackVertexBuffer(bufferIndex); m_vertexBufferCache.UntrackVertexBuffer(bufferIndex);
} }
MetalRestridedBufferRange RestrideBufferIfNeeded(uint32 bufferIndex, size_t stride) MetalRestridedBufferRange RestrideBufferIfNeeded(uint32 bufferIndex, size_t stride, std::vector<MTL::Resource*>& barrierBuffers)
{ {
return m_vertexBufferCache.RestrideBufferIfNeeded(m_bufferCache, bufferIndex, stride); return m_vertexBufferCache.RestrideBufferIfNeeded(m_bufferCache, bufferIndex, stride, barrierBuffers);
} }
private: private:

View File

@ -1141,6 +1141,7 @@ void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32
// Resources // Resources
// Vertex buffers // Vertex buffers
std::vector<MTL::Resource*> barrierBuffers;
for (uint8 i = 0; i < MAX_MTL_BUFFERS; i++) for (uint8 i = 0; i < MAX_MTL_BUFFERS; i++)
{ {
auto& vertexBufferRange = m_state.m_vertexBuffers[i]; auto& vertexBufferRange = m_state.m_vertexBuffers[i];
@ -1161,7 +1162,7 @@ void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32
uint32 bufferBaseRegisterIndex = mmSQ_VTX_ATTRIBUTE_BLOCK_START + i * 7; uint32 bufferBaseRegisterIndex = mmSQ_VTX_ATTRIBUTE_BLOCK_START + i * 7;
uint32 bufferStride = (LatteGPUState.contextNew.GetRawView()[bufferBaseRegisterIndex + 2] >> 11) & 0xFFFF; uint32 bufferStride = (LatteGPUState.contextNew.GetRawView()[bufferBaseRegisterIndex + 2] >> 11) & 0xFFFF;
auto restridedBuffer = m_memoryManager->RestrideBufferIfNeeded(i, bufferStride); auto restridedBuffer = m_memoryManager->RestrideBufferIfNeeded(i, bufferStride, barrierBuffers);
buffer = restridedBuffer.buffer; buffer = restridedBuffer.buffer;
offset = restridedBuffer.offset; offset = restridedBuffer.offset;
@ -1172,6 +1173,11 @@ void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32
} }
} }
if (!barrierBuffers.empty())
{
renderCommandEncoder->memoryBarrier(barrierBuffers.data(), barrierBuffers.size(), MTL::RenderStageVertex, MTL::RenderStageVertex);
}
// Render pipeline state // Render pipeline state
MTL::RenderPipelineState* renderPipelineState; MTL::RenderPipelineState* renderPipelineState;
if (usesGeometryShader) if (usesGeometryShader)