prepare for vertex stride workaround

This commit is contained in:
Samuliak 2024-08-06 19:08:22 +02:00
parent 99ff282720
commit 82dcbd98a6
7 changed files with 197 additions and 29 deletions

View File

@ -538,6 +538,7 @@ if(ENABLE_METAL)
target_sources(CemuCafe PRIVATE
HW/Latte/Renderer/Metal/MetalRenderer.cpp
HW/Latte/Renderer/Metal/MetalRenderer.h
HW/Latte/Renderer/Metal/MetalCommon.h
HW/Latte/Renderer/Metal/MetalCppImpl.cpp
HW/Latte/Renderer/Metal/MetalLayer.mm
HW/Latte/Renderer/Metal/MetalLayer.h

View File

@ -0,0 +1,6 @@
#pragma once
inline size_t align(size_t size, size_t alignment)
{
return (size + alignment - 1) & ~(alignment - 1);
}

View File

@ -1,3 +1,4 @@
#include "Cafe/HW/Latte/Renderer/Metal/MetalCommon.h"
#include "Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.h"
#include "Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h"
@ -14,7 +15,7 @@ MetalBufferAllocator::~MetalBufferAllocator()
MetalBufferAllocation MetalBufferAllocator::GetBufferAllocation(size_t size, size_t alignment)
{
// Align the size
size = (size + alignment - 1) & ~(alignment - 1);
size = align(size, alignment);
// First, try to find a free range
for (uint32 i = 0; i < m_freeBufferRanges.size(); i++)
@ -63,6 +64,65 @@ MetalBufferAllocation MetalBufferAllocator::GetBufferAllocation(size_t size, siz
return allocation;
}
MetalVertexBufferCache::~MetalVertexBufferCache()
{
for (uint32 i = 0; i < LATTE_MAX_VERTEX_BUFFERS; i++)
{
auto vertexBufferRange = m_bufferRanges[i];
if (vertexBufferRange)
{
if (vertexBufferRange->restrideInfo.buffer)
{
vertexBufferRange->restrideInfo.buffer->release();
}
}
}
}
MetalRestridedBufferRange MetalVertexBufferCache::RestrideBufferIfNeeded(uint32 bufferIndex, size_t stride)
{
auto vertexBufferRange = m_bufferRanges[bufferIndex];
auto& restrideInfo = vertexBufferRange->restrideInfo;
if (stride % 4 == 0)
{
// No restride needed
return {nullptr, vertexBufferRange->offset};
}
if (restrideInfo.memoryInvalidated || stride != restrideInfo.lastStride)
{
// TODO: restride
throw std::runtime_error("restride needed");
restrideInfo.memoryInvalidated = false;
restrideInfo.lastStride = stride;
}
// TODO: remove
throw std::runtime_error("restride unimplemented");
return {restrideInfo.buffer, 0};
}
void MetalVertexBufferCache::MemoryRangeChanged(size_t offset, size_t size)
{
for (uint32 i = 0; i < LATTE_MAX_VERTEX_BUFFERS; i++)
{
auto vertexBufferRange = m_bufferRanges[i];
if (vertexBufferRange)
{
if ((offset < vertexBufferRange->offset && (offset + size) < (vertexBufferRange->offset + vertexBufferRange->size)) ||
(offset > vertexBufferRange->offset && (offset + size) > (vertexBufferRange->offset + vertexBufferRange->size)))
{
continue;
}
vertexBufferRange->restrideInfo.memoryInvalidated = true;
}
}
}
MetalMemoryManager::~MetalMemoryManager()
{
if (m_bufferCache)
@ -85,7 +145,7 @@ void MetalMemoryManager::InitBufferCache(size_t size)
{
if (m_bufferCache)
{
printf("MetalMemoryManager::InitBufferCache: buffer cache already initialized\n");
debug_printf("MetalMemoryManager::InitBufferCache: buffer cache already initialized\n");
return;
}
@ -101,18 +161,21 @@ void MetalMemoryManager::UploadToBufferCache(const void* data, size_t offset, si
if (!m_bufferCache)
{
printf("MetalMemoryManager::UploadToBufferCache: buffer cache not initialized\n");
debug_printf("MetalMemoryManager::UploadToBufferCache: buffer cache not initialized\n");
return;
}
memcpy((uint8*)m_bufferCache->contents() + offset, data, size);
// Notify vertex buffer cache about the change
m_vertexBufferCache.MemoryRangeChanged(offset, size);
}
void MetalMemoryManager::CopyBufferCache(size_t srcOffset, size_t dstOffset, size_t size)
{
if (!m_bufferCache)
{
printf("MetalMemoryManager::CopyBufferCache: buffer cache not initialized\n");
debug_printf("MetalMemoryManager::CopyBufferCache: buffer cache not initialized\n");
return;
}

View File

@ -3,6 +3,7 @@
#include <Metal/Metal.hpp>
#include "Cafe/HW/Latte/ISA/LatteReg.h"
#include "Cafe/HW/Latte/Core/LatteConst.h"
//const uint32 bufferAllocatorIndexShift = 24;
@ -51,10 +52,65 @@ private:
std::vector<MetalBufferRange> m_freeBufferRanges;
};
struct MetalRestridedBufferRange
{
MTL::Buffer* buffer;
size_t offset;
};
// TODO: use one big buffer for all the restrided vertex buffers?
struct MetalRestrideInfo
{
bool memoryInvalidated = true;
size_t lastStride = 0;
MTL::Buffer* buffer = nullptr;
};
struct MetalVertexBufferRange
{
size_t offset;
size_t size;
MetalRestrideInfo& restrideInfo;
};
class MetalVertexBufferCache
{
public:
friend class MetalMemoryManager;
MetalVertexBufferCache(class MetalRenderer* metalRenderer) : m_mtlr{metalRenderer} {}
~MetalVertexBufferCache();
// Vertex buffer cache
void TrackVertexBuffer(uint32 bufferIndex, size_t offset, size_t size, MetalRestrideInfo& restrideInfo)
{
m_bufferRanges[bufferIndex] = new MetalVertexBufferRange{offset, size, restrideInfo};
}
void UntrackVertexBuffer(uint32 bufferIndex)
{
auto& range = m_bufferRanges[bufferIndex];
if (range->restrideInfo.buffer)
{
range->restrideInfo.buffer->release();
}
range = nullptr;
}
MetalRestridedBufferRange RestrideBufferIfNeeded(uint32 bufferIndex, size_t stride);
private:
class MetalRenderer* m_mtlr;
MetalVertexBufferRange* m_bufferRanges[LATTE_MAX_VERTEX_BUFFERS] = {nullptr};
void MemoryRangeChanged(size_t offset, size_t size);
};
class MetalMemoryManager
{
public:
MetalMemoryManager(class MetalRenderer* metalRenderer) : m_mtlr{metalRenderer}, m_bufferAllocator(metalRenderer) {}
MetalMemoryManager(class MetalRenderer* metalRenderer) : m_mtlr{metalRenderer}, m_bufferAllocator(metalRenderer), m_vertexBufferCache(metalRenderer) {}
~MetalMemoryManager();
void ResetTemporaryBuffers()
@ -90,6 +146,28 @@ public:
void UploadToBufferCache(const void* data, size_t offset, size_t size);
void CopyBufferCache(size_t srcOffset, size_t dstOffset, size_t size);
// Vertex buffer cache
void TrackVertexBuffer(uint32 bufferIndex, size_t offset, size_t size, MetalRestrideInfo& restrideInfo)
{
m_vertexBufferCache.TrackVertexBuffer(bufferIndex, offset, size, restrideInfo);
}
void UntrackVertexBuffer(uint32 bufferIndex)
{
m_vertexBufferCache.UntrackVertexBuffer(bufferIndex);
}
MetalRestridedBufferRange RestrideBufferIfNeeded(uint32 bufferIndex, size_t stride)
{
auto range = m_vertexBufferCache.RestrideBufferIfNeeded(bufferIndex, stride);
if (!range.buffer)
{
range.buffer = m_bufferCache;
}
return range;
}
private:
class MetalRenderer* m_mtlr;
@ -97,6 +175,7 @@ private:
MetalBufferAllocator m_bufferAllocator;//s[2];
//uint8 m_bufferAllocatorIndex = 0;
MetalVertexBufferCache m_vertexBufferCache;
MTL::Buffer* m_bufferCache = nullptr;
};

View File

@ -1,12 +1,14 @@
#include "Cafe/HW/Latte/Renderer/Metal/MetalCommon.h"
#include "Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.h"
#include "Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h"
#include "HW/Latte/Core/FetchShader.h"
#include "HW/Latte/ISA/RegDefines.h"
#include "HW/Latte/Renderer/Metal/CachedFBOMtl.h"
#include "HW/Latte/Renderer/Metal/LatteToMtl.h"
#include "HW/Latte/Renderer/Metal/RendererShaderMtl.h"
#include "HW/Latte/Renderer/Metal/LatteTextureViewMtl.h"
#include "HW/Latte/Core/FetchShader.h"
#include "HW/Latte/ISA/RegDefines.h"
MetalPipelineCache::~MetalPipelineCache()
{
for (auto& pair : m_pipelineCache)
@ -59,12 +61,7 @@ MTL::RenderPipelineState* MetalPipelineCache::GetPipelineState(const LatteFetchS
uint32 bufferIndex = bufferGroup.attributeBufferIndex;
uint32 bufferBaseRegisterIndex = mmSQ_VTX_ATTRIBUTE_BLOCK_START + bufferIndex * 7;
uint32 bufferStride = (LatteGPUState.contextNew.GetRawView()[bufferBaseRegisterIndex + 2] >> 11) & 0xFFFF;
uint32 strideRemainder = bufferStride % 4;
if (strideRemainder != 0)
{
debug_printf("vertex stride must be a multiple of 4, remainder: %u\n", strideRemainder);
}
bufferStride = align(bufferStride, 4);
auto layout = vertexDescriptor->layouts()->object(GET_MTL_VERTEX_BUFFER_INDEX(bufferIndex));
layout->setStride(bufferStride);

View File

@ -6,7 +6,6 @@
#include "Cafe/HW/Latte/Renderer/Metal/CachedFBOMtl.h"
#include "Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.h"
#include "Cafe/HW/Latte/Renderer/Metal/MetalDepthStencilCache.h"
#include "Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.h"
#include "Cafe/HW/Latte/Renderer/Metal/LatteToMtl.h"
#include "Cafe/HW/Latte/Renderer/Metal/ShaderSourcePresent.h"
@ -440,11 +439,22 @@ void MetalRenderer::bufferCache_copyStreamoutToMainBuffer(uint32 srcOffset, uint
void MetalRenderer::buffer_bindVertexBuffer(uint32 bufferIndex, uint32 offset, uint32 size)
{
if (m_state.vertexBuffers[bufferIndex].offset == offset)
cemu_assert_debug(bufferIndex < LATTE_MAX_VERTEX_BUFFERS);
auto& buffer = m_state.vertexBuffers[bufferIndex];
if (buffer.offset == offset && buffer.size == size)
return;
cemu_assert_debug(bufferIndex < LATTE_MAX_VERTEX_BUFFERS);
m_state.vertexBuffers[bufferIndex].needsRebind = true;
m_state.vertexBuffers[bufferIndex].offset = offset;
if (buffer.offset != INVALID_OFFSET)
{
m_memoryManager->UntrackVertexBuffer(bufferIndex);
}
buffer.needsRebind = true;
buffer.offset = offset;
buffer.size = size;
buffer.restrideInfo = {};
m_memoryManager->TrackVertexBuffer(bufferIndex, offset, size, buffer.restrideInfo);
}
void MetalRenderer::buffer_bindUniformBuffer(LatteConst::ShaderType shaderType, uint32 bufferIndex, uint32 offset, uint32 size)
@ -598,15 +608,25 @@ void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32
LatteBufferCache_Sync(indexMin + baseVertex, indexMax + baseVertex, baseInstance, instanceCount);
// Vertex buffers
for (uint8 i = 0; i < MAX_MTL_BUFFERS; i++)
{
auto& vertexBufferRange = m_state.vertexBuffers[i];
if (vertexBufferRange.needsRebind)
for (uint8 i = 0; i < MAX_MTL_BUFFERS; i++)
{
auto& vertexBufferRange = m_state.vertexBuffers[i];
if (vertexBufferRange.offset != INVALID_OFFSET)
{
renderCommandEncoder->setVertexBuffer(m_memoryManager->GetBufferCache(), vertexBufferRange.offset, GET_MTL_VERTEX_BUFFER_INDEX(i));
vertexBufferRange.needsRebind = false;
// Restride
uint32 bufferBaseRegisterIndex = mmSQ_VTX_ATTRIBUTE_BLOCK_START + i * 7;
uint32 bufferStride = (LatteGPUState.contextNew.GetRawView()[bufferBaseRegisterIndex + 2] >> 11) & 0xFFFF;
auto restridedBuffer = m_memoryManager->RestrideBufferIfNeeded(i, bufferStride);
// Bind
if (vertexBufferRange.needsRebind)
{
renderCommandEncoder->setVertexBuffer(restridedBuffer.buffer, restridedBuffer.offset, GET_MTL_VERTEX_BUFFER_INDEX(i));
vertexBufferRange.needsRebind = false;
}
}
}
}
// Uniform buffers, textures and samplers
BindStageResources(renderCommandEncoder, vertexShader);
@ -1186,10 +1206,7 @@ void MetalRenderer::RebindRenderState(MTL::RenderCommandEncoder* renderCommandEn
{
auto& vertexBufferRange = m_state.vertexBuffers[i];
if (vertexBufferRange.offset != INVALID_OFFSET)
{
renderCommandEncoder->setVertexBuffer(m_memoryManager->GetBufferCache(), vertexBufferRange.offset, GET_MTL_VERTEX_BUFFER_INDEX(i));
vertexBufferRange.needsRebind = false;
}
vertexBufferRange.needsRebind = true;
}
}

View File

@ -6,6 +6,8 @@
#include "Cafe/HW/Latte/Renderer/Renderer.h"
#include "Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.h"
#define MAX_MTL_BUFFERS 31
#define GET_MTL_VERTEX_BUFFER_INDEX(index) (MAX_MTL_BUFFERS - index - 2)
// TODO: don't harcdode the support buffer binding
@ -20,6 +22,9 @@ struct MetalBoundBuffer
{
bool needsRebind = false;
size_t offset = INVALID_OFFSET;
size_t size = 0;
// Memory manager will write restride info to this variable
MetalRestrideInfo restrideInfo;
};
struct MetalState