do vertex restride on the CPU

This commit is contained in:
Samuliak 2024-09-21 09:28:46 +02:00
parent f03c0a2769
commit a460a5d28a
4 changed files with 34 additions and 31 deletions

View File

@ -27,19 +27,16 @@ void LatteTextureReadbackInfoMtl::StartTransfer()
blitCommandEncoder->copyFromTexture(baseTexture->GetTexture(), 0, 0, MTL::Origin{0, 0, 0}, MTL::Size{(uint32)baseTexture->width, (uint32)baseTexture->height, 1}, m_mtlr->GetTextureReadbackBuffer(), m_bufferOffset, bytesPerRow, bytesPerImage); blitCommandEncoder->copyFromTexture(baseTexture->GetTexture(), 0, 0, MTL::Origin{0, 0, 0}, MTL::Size{(uint32)baseTexture->width, (uint32)baseTexture->height, 1}, m_mtlr->GetTextureReadbackBuffer(), m_bufferOffset, bytesPerRow, bytesPerImage);
m_commandBuffer = m_mtlr->GetCurrentCommandBuffer()->retain(); m_commandBuffer = m_mtlr->GetCurrentCommandBuffer()->retain();
m_mtlr->RequestSoonCommit(); // TODO: uncomment?
//m_mtlr->RequestSoonCommit();
m_mtlr->CommitCommandBuffer();
} }
bool LatteTextureReadbackInfoMtl::IsFinished() bool LatteTextureReadbackInfoMtl::IsFinished()
{ {
// TODO: is this needed?
if (!m_commandBuffer)
return false;
// TODO: remove this?
// Command buffer wasn't even comitted, let's commit immediately // Command buffer wasn't even comitted, let's commit immediately
if (m_mtlr->GetCurrentCommandBuffer() == m_commandBuffer) //if (m_mtlr->GetCurrentCommandBuffer() == m_commandBuffer)
m_mtlr->CommitCommandBuffer(); // m_mtlr->CommitCommandBuffer();
return CommandBufferCompleted(m_commandBuffer); return CommandBufferCompleted(m_commandBuffer);
} }

View File

@ -28,15 +28,14 @@ MetalRestridedBufferRange MetalVertexBufferCache::RestrideBufferIfNeeded(MTL::Bu
restrideInfo.allocation = m_bufferAllocator.GetBufferAllocation(newSize); restrideInfo.allocation = m_bufferAllocator.GetBufferAllocation(newSize);
buffer = m_bufferAllocator.GetBuffer(restrideInfo.allocation.bufferIndex); buffer = m_bufferAllocator.GetBuffer(restrideInfo.allocation.bufferIndex);
//uint8* oldPtr = (uint8*)bufferCache->contents() + vertexBufferRange.offset; // HACK: the restriding is done on the CPU, since doing it on the GPU was causing over-synchronization
//uint8* newPtr = (uint8*)buffer->contents() + restrideInfo.allocation.bufferOffset; uint8* oldPtr = (uint8*)bufferCache->contents() + vertexBufferRange.offset;
uint8* newPtr = (uint8*)buffer->contents() + restrideInfo.allocation.offset;
//for (size_t elem = 0; elem < vertexBufferRange.size / stride; elem++) for (size_t elem = 0; elem < vertexBufferRange.size / stride; elem++)
//{ memcpy(newPtr + elem * newStride, oldPtr + elem * stride, stride);
// memcpy(newPtr + elem * newStride, oldPtr + elem * stride, stride);
//}
//debug_printf("Restrided vertex buffer (old stride: %zu, new stride: %zu, old size: %zu, new size: %zu)\n", stride, newStride, vertexBufferRange.size, newSize);
/*
if (m_mtlr->GetEncoderType() == MetalEncoderType::Render) if (m_mtlr->GetEncoderType() == MetalEncoderType::Render)
{ {
auto renderCommandEncoder = static_cast<MTL::RenderCommandEncoder*>(m_mtlr->GetCommandEncoder()); auto renderCommandEncoder = static_cast<MTL::RenderCommandEncoder*>(m_mtlr->GetCommandEncoder());
@ -60,18 +59,19 @@ MetalRestridedBufferRange MetalVertexBufferCache::RestrideBufferIfNeeded(MTL::Bu
// TODO: do the barriers in one call? // TODO: do the barriers in one call?
MTL::Resource* barrierBuffers[] = {buffer}; MTL::Resource* barrierBuffers[] = {buffer};
renderCommandEncoder->memoryBarrier(barrierBuffers, 1, MTL::RenderStageVertex, MTL::RenderStageVertex); renderCommandEncoder->memoryBarrier(barrierBuffers, 1, MTL::RenderStageVertex, MTL::RenderStageVertex);
// Debug
m_mtlr->GetPerformanceMonitor().m_vertexBufferRestrides++;
} }
else else
{ {
debug_printf("vertex buffer restride needs an active render encoder\n"); debug_printf("vertex buffer restride needs an active render command encoder\n");
cemu_assert_suspicious(); cemu_assert_suspicious();
} }
*/
restrideInfo.memoryInvalidated = false; restrideInfo.memoryInvalidated = false;
restrideInfo.lastStride = newStride; restrideInfo.lastStride = newStride;
// Debug
m_mtlr->GetPerformanceMonitor().m_vertexBufferRestrides++;
} }
else else
{ {
@ -121,7 +121,7 @@ void MetalMemoryManager::InitBufferCache(size_t size)
{ {
cemu_assert_debug(!m_bufferCache); cemu_assert_debug(!m_bufferCache);
m_bufferCache = m_mtlr->GetDevice()->newBuffer(size, MTL::ResourceStorageModePrivate); m_bufferCache = m_mtlr->GetDevice()->newBuffer(size, MTL::ResourceStorageModeShared);
#ifdef CEMU_DEBUG_ASSERT #ifdef CEMU_DEBUG_ASSERT
m_bufferCache->setLabel(GetLabel("Buffer cache", m_bufferCache)); m_bufferCache->setLabel(GetLabel("Buffer cache", m_bufferCache));
#endif #endif

View File

@ -58,7 +58,7 @@ private:
class MetalMemoryManager class MetalMemoryManager
{ {
public: public:
MetalMemoryManager(class MetalRenderer* metalRenderer) : m_mtlr{metalRenderer}, m_bufferAllocator(metalRenderer, m_mtlr->GetOptimalBufferStorageMode()), m_framePersistentBufferAllocator(metalRenderer, MTL::ResourceStorageModePrivate), m_tempBufferAllocator(metalRenderer), m_vertexBufferCache(metalRenderer, m_framePersistentBufferAllocator) {} MetalMemoryManager(class MetalRenderer* metalRenderer) : m_mtlr{metalRenderer}, m_bufferAllocator(metalRenderer, m_mtlr->GetOptimalBufferStorageMode()), m_framePersistentBufferAllocator(metalRenderer, MTL::ResourceStorageModeShared), m_tempBufferAllocator(metalRenderer), m_vertexBufferCache(metalRenderer, m_framePersistentBufferAllocator) {}
~MetalMemoryManager(); ~MetalMemoryManager();
// Pipelines // Pipelines

View File

@ -26,7 +26,7 @@
#include "imgui/imgui_extension.h" #include "imgui/imgui_extension.h"
#include "imgui/imgui_impl_metal.h" #include "imgui/imgui_impl_metal.h"
#define DEFAULT_COMMIT_TRESHOLD 256 #define DEFAULT_COMMIT_TRESHOLD 196
#define OCCLUSION_QUERY_POOL_SIZE 1024 #define OCCLUSION_QUERY_POOL_SIZE 1024
extern bool hasValidFramebufferAttached; extern bool hasValidFramebufferAttached;
@ -917,15 +917,18 @@ void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32
const auto fetchShader = LatteSHRC_GetActiveFetchShader(); const auto fetchShader = LatteSHRC_GetActiveFetchShader();
// Check if we need to end the render pass // Check if we need to end the render pass
// Fragment shader is most likely to require a render pass flush, so check for it first if (!m_state.m_isFirstDrawInRenderPass)
bool endRenderPass = CheckIfRenderPassNeedsFlush(pixelShader); {
if (!endRenderPass) // Fragment shader is most likely to require a render pass flush, so check for it first
endRenderPass = CheckIfRenderPassNeedsFlush(vertexShader); bool endRenderPass = CheckIfRenderPassNeedsFlush(pixelShader);
if (!endRenderPass && geometryShader) if (!endRenderPass)
endRenderPass = CheckIfRenderPassNeedsFlush(geometryShader); endRenderPass = CheckIfRenderPassNeedsFlush(vertexShader);
if (!endRenderPass && geometryShader)
endRenderPass = CheckIfRenderPassNeedsFlush(geometryShader);
if (endRenderPass) if (endRenderPass)
EndEncoding(); EndEncoding();
}
// Primitive type // Primitive type
const LattePrimitiveMode primitiveMode = static_cast<LattePrimitiveMode>(LatteGPUState.contextRegister[mmVGT_PRIMITIVE_TYPE]); const LattePrimitiveMode primitiveMode = static_cast<LattePrimitiveMode>(LatteGPUState.contextRegister[mmVGT_PRIMITIVE_TYPE]);
@ -1889,7 +1892,9 @@ void MetalRenderer::ClearColorTextureInternal(MTL::Texture* mtlTexture, sint32 s
void MetalRenderer::CopyBufferToBuffer(MTL::Buffer* src, uint32 srcOffset, MTL::Buffer* dst, uint32 dstOffset, uint32 size, MTL::RenderStages after, MTL::RenderStages before) void MetalRenderer::CopyBufferToBuffer(MTL::Buffer* src, uint32 srcOffset, MTL::Buffer* dst, uint32 dstOffset, uint32 size, MTL::RenderStages after, MTL::RenderStages before)
{ {
// TODO: uncomment and fix performance issues
// Do the copy in a vertex shader on Apple GPUs // Do the copy in a vertex shader on Apple GPUs
/*
if (m_isAppleGPU && m_encoderType == MetalEncoderType::Render) if (m_isAppleGPU && m_encoderType == MetalEncoderType::Render)
{ {
auto renderCommandEncoder = static_cast<MTL::RenderCommandEncoder*>(m_commandEncoder); auto renderCommandEncoder = static_cast<MTL::RenderCommandEncoder*>(m_commandEncoder);
@ -1910,10 +1915,11 @@ void MetalRenderer::CopyBufferToBuffer(MTL::Buffer* src, uint32 srcOffset, MTL::
} }
else else
{ {
*/
auto blitCommandEncoder = GetBlitCommandEncoder(); auto blitCommandEncoder = GetBlitCommandEncoder();
blitCommandEncoder->copyFromBuffer(src, srcOffset, dst, dstOffset, size); blitCommandEncoder->copyFromBuffer(src, srcOffset, dst, dstOffset, size);
} //}
} }
void MetalRenderer::SwapBuffer(bool mainWindow) void MetalRenderer::SwapBuffer(bool mainWindow)