do vertex restride on the CPU

This commit is contained in:
Samuliak 2024-09-21 09:28:46 +02:00
parent f03c0a2769
commit a460a5d28a
4 changed files with 34 additions and 31 deletions

View File

@ -27,19 +27,16 @@ void LatteTextureReadbackInfoMtl::StartTransfer()
blitCommandEncoder->copyFromTexture(baseTexture->GetTexture(), 0, 0, MTL::Origin{0, 0, 0}, MTL::Size{(uint32)baseTexture->width, (uint32)baseTexture->height, 1}, m_mtlr->GetTextureReadbackBuffer(), m_bufferOffset, bytesPerRow, bytesPerImage);
m_commandBuffer = m_mtlr->GetCurrentCommandBuffer()->retain();
m_mtlr->RequestSoonCommit();
// TODO: uncomment?
//m_mtlr->RequestSoonCommit();
m_mtlr->CommitCommandBuffer();
}
bool LatteTextureReadbackInfoMtl::IsFinished()
{
// TODO: is this needed?
if (!m_commandBuffer)
return false;
// TODO: remove this?
// Command buffer wasn't even comitted, let's commit immediately
if (m_mtlr->GetCurrentCommandBuffer() == m_commandBuffer)
m_mtlr->CommitCommandBuffer();
//if (m_mtlr->GetCurrentCommandBuffer() == m_commandBuffer)
// m_mtlr->CommitCommandBuffer();
return CommandBufferCompleted(m_commandBuffer);
}

View File

@ -28,15 +28,14 @@ MetalRestridedBufferRange MetalVertexBufferCache::RestrideBufferIfNeeded(MTL::Bu
restrideInfo.allocation = m_bufferAllocator.GetBufferAllocation(newSize);
buffer = m_bufferAllocator.GetBuffer(restrideInfo.allocation.bufferIndex);
//uint8* oldPtr = (uint8*)bufferCache->contents() + vertexBufferRange.offset;
//uint8* newPtr = (uint8*)buffer->contents() + restrideInfo.allocation.bufferOffset;
// HACK: the restriding is done on the CPU, since doing it on the GPU was causing over-synchronization
uint8* oldPtr = (uint8*)bufferCache->contents() + vertexBufferRange.offset;
uint8* newPtr = (uint8*)buffer->contents() + restrideInfo.allocation.offset;
//for (size_t elem = 0; elem < vertexBufferRange.size / stride; elem++)
//{
// memcpy(newPtr + elem * newStride, oldPtr + elem * stride, stride);
//}
//debug_printf("Restrided vertex buffer (old stride: %zu, new stride: %zu, old size: %zu, new size: %zu)\n", stride, newStride, vertexBufferRange.size, newSize);
for (size_t elem = 0; elem < vertexBufferRange.size / stride; elem++)
memcpy(newPtr + elem * newStride, oldPtr + elem * stride, stride);
/*
if (m_mtlr->GetEncoderType() == MetalEncoderType::Render)
{
auto renderCommandEncoder = static_cast<MTL::RenderCommandEncoder*>(m_mtlr->GetCommandEncoder());
@ -60,18 +59,19 @@ MetalRestridedBufferRange MetalVertexBufferCache::RestrideBufferIfNeeded(MTL::Bu
// TODO: do the barriers in one call?
MTL::Resource* barrierBuffers[] = {buffer};
renderCommandEncoder->memoryBarrier(barrierBuffers, 1, MTL::RenderStageVertex, MTL::RenderStageVertex);
// Debug
m_mtlr->GetPerformanceMonitor().m_vertexBufferRestrides++;
}
else
{
debug_printf("vertex buffer restride needs an active render encoder\n");
debug_printf("vertex buffer restride needs an active render command encoder\n");
cemu_assert_suspicious();
}
*/
restrideInfo.memoryInvalidated = false;
restrideInfo.lastStride = newStride;
// Debug
m_mtlr->GetPerformanceMonitor().m_vertexBufferRestrides++;
}
else
{
@ -121,7 +121,7 @@ void MetalMemoryManager::InitBufferCache(size_t size)
{
cemu_assert_debug(!m_bufferCache);
m_bufferCache = m_mtlr->GetDevice()->newBuffer(size, MTL::ResourceStorageModePrivate);
m_bufferCache = m_mtlr->GetDevice()->newBuffer(size, MTL::ResourceStorageModeShared);
#ifdef CEMU_DEBUG_ASSERT
m_bufferCache->setLabel(GetLabel("Buffer cache", m_bufferCache));
#endif

View File

@ -58,7 +58,7 @@ private:
class MetalMemoryManager
{
public:
MetalMemoryManager(class MetalRenderer* metalRenderer) : m_mtlr{metalRenderer}, m_bufferAllocator(metalRenderer, m_mtlr->GetOptimalBufferStorageMode()), m_framePersistentBufferAllocator(metalRenderer, MTL::ResourceStorageModePrivate), m_tempBufferAllocator(metalRenderer), m_vertexBufferCache(metalRenderer, m_framePersistentBufferAllocator) {}
MetalMemoryManager(class MetalRenderer* metalRenderer) : m_mtlr{metalRenderer}, m_bufferAllocator(metalRenderer, m_mtlr->GetOptimalBufferStorageMode()), m_framePersistentBufferAllocator(metalRenderer, MTL::ResourceStorageModeShared), m_tempBufferAllocator(metalRenderer), m_vertexBufferCache(metalRenderer, m_framePersistentBufferAllocator) {}
~MetalMemoryManager();
// Pipelines

View File

@ -26,7 +26,7 @@
#include "imgui/imgui_extension.h"
#include "imgui/imgui_impl_metal.h"
#define DEFAULT_COMMIT_TRESHOLD 256
#define DEFAULT_COMMIT_TRESHOLD 196
#define OCCLUSION_QUERY_POOL_SIZE 1024
extern bool hasValidFramebufferAttached;
@ -917,15 +917,18 @@ void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32
const auto fetchShader = LatteSHRC_GetActiveFetchShader();
// Check if we need to end the render pass
// Fragment shader is most likely to require a render pass flush, so check for it first
bool endRenderPass = CheckIfRenderPassNeedsFlush(pixelShader);
if (!endRenderPass)
endRenderPass = CheckIfRenderPassNeedsFlush(vertexShader);
if (!endRenderPass && geometryShader)
endRenderPass = CheckIfRenderPassNeedsFlush(geometryShader);
if (!m_state.m_isFirstDrawInRenderPass)
{
// Fragment shader is most likely to require a render pass flush, so check for it first
bool endRenderPass = CheckIfRenderPassNeedsFlush(pixelShader);
if (!endRenderPass)
endRenderPass = CheckIfRenderPassNeedsFlush(vertexShader);
if (!endRenderPass && geometryShader)
endRenderPass = CheckIfRenderPassNeedsFlush(geometryShader);
if (endRenderPass)
EndEncoding();
if (endRenderPass)
EndEncoding();
}
// Primitive type
const LattePrimitiveMode primitiveMode = static_cast<LattePrimitiveMode>(LatteGPUState.contextRegister[mmVGT_PRIMITIVE_TYPE]);
@ -1889,7 +1892,9 @@ void MetalRenderer::ClearColorTextureInternal(MTL::Texture* mtlTexture, sint32 s
void MetalRenderer::CopyBufferToBuffer(MTL::Buffer* src, uint32 srcOffset, MTL::Buffer* dst, uint32 dstOffset, uint32 size, MTL::RenderStages after, MTL::RenderStages before)
{
// TODO: uncomment and fix performance issues
// Do the copy in a vertex shader on Apple GPUs
/*
if (m_isAppleGPU && m_encoderType == MetalEncoderType::Render)
{
auto renderCommandEncoder = static_cast<MTL::RenderCommandEncoder*>(m_commandEncoder);
@ -1910,10 +1915,11 @@ void MetalRenderer::CopyBufferToBuffer(MTL::Buffer* src, uint32 srcOffset, MTL::
}
else
{
*/
auto blitCommandEncoder = GetBlitCommandEncoder();
blitCommandEncoder->copyFromBuffer(src, srcOffset, dst, dstOffset, size);
}
//}
}
void MetalRenderer::SwapBuffer(bool mainWindow)