mirror of
https://github.com/cemu-project/Cemu.git
synced 2025-01-07 15:48:15 +01:00
do vertex restride on the CPU
This commit is contained in:
parent
f03c0a2769
commit
a460a5d28a
@ -27,19 +27,16 @@ void LatteTextureReadbackInfoMtl::StartTransfer()
|
||||
blitCommandEncoder->copyFromTexture(baseTexture->GetTexture(), 0, 0, MTL::Origin{0, 0, 0}, MTL::Size{(uint32)baseTexture->width, (uint32)baseTexture->height, 1}, m_mtlr->GetTextureReadbackBuffer(), m_bufferOffset, bytesPerRow, bytesPerImage);
|
||||
|
||||
m_commandBuffer = m_mtlr->GetCurrentCommandBuffer()->retain();
|
||||
m_mtlr->RequestSoonCommit();
|
||||
// TODO: uncomment?
|
||||
//m_mtlr->RequestSoonCommit();
|
||||
m_mtlr->CommitCommandBuffer();
|
||||
}
|
||||
|
||||
bool LatteTextureReadbackInfoMtl::IsFinished()
|
||||
{
|
||||
// TODO: is this needed?
|
||||
if (!m_commandBuffer)
|
||||
return false;
|
||||
|
||||
// TODO: remove this?
|
||||
// Command buffer wasn't even comitted, let's commit immediately
|
||||
if (m_mtlr->GetCurrentCommandBuffer() == m_commandBuffer)
|
||||
m_mtlr->CommitCommandBuffer();
|
||||
//if (m_mtlr->GetCurrentCommandBuffer() == m_commandBuffer)
|
||||
// m_mtlr->CommitCommandBuffer();
|
||||
|
||||
return CommandBufferCompleted(m_commandBuffer);
|
||||
}
|
||||
|
@ -28,15 +28,14 @@ MetalRestridedBufferRange MetalVertexBufferCache::RestrideBufferIfNeeded(MTL::Bu
|
||||
restrideInfo.allocation = m_bufferAllocator.GetBufferAllocation(newSize);
|
||||
buffer = m_bufferAllocator.GetBuffer(restrideInfo.allocation.bufferIndex);
|
||||
|
||||
//uint8* oldPtr = (uint8*)bufferCache->contents() + vertexBufferRange.offset;
|
||||
//uint8* newPtr = (uint8*)buffer->contents() + restrideInfo.allocation.bufferOffset;
|
||||
// HACK: the restriding is done on the CPU, since doing it on the GPU was causing over-synchronization
|
||||
uint8* oldPtr = (uint8*)bufferCache->contents() + vertexBufferRange.offset;
|
||||
uint8* newPtr = (uint8*)buffer->contents() + restrideInfo.allocation.offset;
|
||||
|
||||
//for (size_t elem = 0; elem < vertexBufferRange.size / stride; elem++)
|
||||
//{
|
||||
// memcpy(newPtr + elem * newStride, oldPtr + elem * stride, stride);
|
||||
//}
|
||||
//debug_printf("Restrided vertex buffer (old stride: %zu, new stride: %zu, old size: %zu, new size: %zu)\n", stride, newStride, vertexBufferRange.size, newSize);
|
||||
for (size_t elem = 0; elem < vertexBufferRange.size / stride; elem++)
|
||||
memcpy(newPtr + elem * newStride, oldPtr + elem * stride, stride);
|
||||
|
||||
/*
|
||||
if (m_mtlr->GetEncoderType() == MetalEncoderType::Render)
|
||||
{
|
||||
auto renderCommandEncoder = static_cast<MTL::RenderCommandEncoder*>(m_mtlr->GetCommandEncoder());
|
||||
@ -60,18 +59,19 @@ MetalRestridedBufferRange MetalVertexBufferCache::RestrideBufferIfNeeded(MTL::Bu
|
||||
// TODO: do the barriers in one call?
|
||||
MTL::Resource* barrierBuffers[] = {buffer};
|
||||
renderCommandEncoder->memoryBarrier(barrierBuffers, 1, MTL::RenderStageVertex, MTL::RenderStageVertex);
|
||||
|
||||
// Debug
|
||||
m_mtlr->GetPerformanceMonitor().m_vertexBufferRestrides++;
|
||||
}
|
||||
else
|
||||
{
|
||||
debug_printf("vertex buffer restride needs an active render encoder\n");
|
||||
debug_printf("vertex buffer restride needs an active render command encoder\n");
|
||||
cemu_assert_suspicious();
|
||||
}
|
||||
*/
|
||||
|
||||
restrideInfo.memoryInvalidated = false;
|
||||
restrideInfo.lastStride = newStride;
|
||||
|
||||
// Debug
|
||||
m_mtlr->GetPerformanceMonitor().m_vertexBufferRestrides++;
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -121,7 +121,7 @@ void MetalMemoryManager::InitBufferCache(size_t size)
|
||||
{
|
||||
cemu_assert_debug(!m_bufferCache);
|
||||
|
||||
m_bufferCache = m_mtlr->GetDevice()->newBuffer(size, MTL::ResourceStorageModePrivate);
|
||||
m_bufferCache = m_mtlr->GetDevice()->newBuffer(size, MTL::ResourceStorageModeShared);
|
||||
#ifdef CEMU_DEBUG_ASSERT
|
||||
m_bufferCache->setLabel(GetLabel("Buffer cache", m_bufferCache));
|
||||
#endif
|
||||
|
@ -58,7 +58,7 @@ private:
|
||||
class MetalMemoryManager
|
||||
{
|
||||
public:
|
||||
MetalMemoryManager(class MetalRenderer* metalRenderer) : m_mtlr{metalRenderer}, m_bufferAllocator(metalRenderer, m_mtlr->GetOptimalBufferStorageMode()), m_framePersistentBufferAllocator(metalRenderer, MTL::ResourceStorageModePrivate), m_tempBufferAllocator(metalRenderer), m_vertexBufferCache(metalRenderer, m_framePersistentBufferAllocator) {}
|
||||
MetalMemoryManager(class MetalRenderer* metalRenderer) : m_mtlr{metalRenderer}, m_bufferAllocator(metalRenderer, m_mtlr->GetOptimalBufferStorageMode()), m_framePersistentBufferAllocator(metalRenderer, MTL::ResourceStorageModeShared), m_tempBufferAllocator(metalRenderer), m_vertexBufferCache(metalRenderer, m_framePersistentBufferAllocator) {}
|
||||
~MetalMemoryManager();
|
||||
|
||||
// Pipelines
|
||||
|
@ -26,7 +26,7 @@
|
||||
#include "imgui/imgui_extension.h"
|
||||
#include "imgui/imgui_impl_metal.h"
|
||||
|
||||
#define DEFAULT_COMMIT_TRESHOLD 256
|
||||
#define DEFAULT_COMMIT_TRESHOLD 196
|
||||
#define OCCLUSION_QUERY_POOL_SIZE 1024
|
||||
|
||||
extern bool hasValidFramebufferAttached;
|
||||
@ -917,15 +917,18 @@ void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32
|
||||
const auto fetchShader = LatteSHRC_GetActiveFetchShader();
|
||||
|
||||
// Check if we need to end the render pass
|
||||
// Fragment shader is most likely to require a render pass flush, so check for it first
|
||||
bool endRenderPass = CheckIfRenderPassNeedsFlush(pixelShader);
|
||||
if (!endRenderPass)
|
||||
endRenderPass = CheckIfRenderPassNeedsFlush(vertexShader);
|
||||
if (!endRenderPass && geometryShader)
|
||||
endRenderPass = CheckIfRenderPassNeedsFlush(geometryShader);
|
||||
if (!m_state.m_isFirstDrawInRenderPass)
|
||||
{
|
||||
// Fragment shader is most likely to require a render pass flush, so check for it first
|
||||
bool endRenderPass = CheckIfRenderPassNeedsFlush(pixelShader);
|
||||
if (!endRenderPass)
|
||||
endRenderPass = CheckIfRenderPassNeedsFlush(vertexShader);
|
||||
if (!endRenderPass && geometryShader)
|
||||
endRenderPass = CheckIfRenderPassNeedsFlush(geometryShader);
|
||||
|
||||
if (endRenderPass)
|
||||
EndEncoding();
|
||||
if (endRenderPass)
|
||||
EndEncoding();
|
||||
}
|
||||
|
||||
// Primitive type
|
||||
const LattePrimitiveMode primitiveMode = static_cast<LattePrimitiveMode>(LatteGPUState.contextRegister[mmVGT_PRIMITIVE_TYPE]);
|
||||
@ -1889,7 +1892,9 @@ void MetalRenderer::ClearColorTextureInternal(MTL::Texture* mtlTexture, sint32 s
|
||||
|
||||
void MetalRenderer::CopyBufferToBuffer(MTL::Buffer* src, uint32 srcOffset, MTL::Buffer* dst, uint32 dstOffset, uint32 size, MTL::RenderStages after, MTL::RenderStages before)
|
||||
{
|
||||
// TODO: uncomment and fix performance issues
|
||||
// Do the copy in a vertex shader on Apple GPUs
|
||||
/*
|
||||
if (m_isAppleGPU && m_encoderType == MetalEncoderType::Render)
|
||||
{
|
||||
auto renderCommandEncoder = static_cast<MTL::RenderCommandEncoder*>(m_commandEncoder);
|
||||
@ -1910,10 +1915,11 @@ void MetalRenderer::CopyBufferToBuffer(MTL::Buffer* src, uint32 srcOffset, MTL::
|
||||
}
|
||||
else
|
||||
{
|
||||
*/
|
||||
auto blitCommandEncoder = GetBlitCommandEncoder();
|
||||
|
||||
blitCommandEncoder->copyFromBuffer(src, srcOffset, dst, dstOffset, size);
|
||||
}
|
||||
//}
|
||||
}
|
||||
|
||||
void MetalRenderer::SwapBuffer(bool mainWindow)
|
||||
|
Loading…
Reference in New Issue
Block a user