mirror of
https://github.com/cemu-project/Cemu.git
synced 2025-01-24 07:41:13 +01:00
do vertex restride on the CPU
This commit is contained in:
parent
f03c0a2769
commit
a460a5d28a
@ -27,19 +27,16 @@ void LatteTextureReadbackInfoMtl::StartTransfer()
|
|||||||
blitCommandEncoder->copyFromTexture(baseTexture->GetTexture(), 0, 0, MTL::Origin{0, 0, 0}, MTL::Size{(uint32)baseTexture->width, (uint32)baseTexture->height, 1}, m_mtlr->GetTextureReadbackBuffer(), m_bufferOffset, bytesPerRow, bytesPerImage);
|
blitCommandEncoder->copyFromTexture(baseTexture->GetTexture(), 0, 0, MTL::Origin{0, 0, 0}, MTL::Size{(uint32)baseTexture->width, (uint32)baseTexture->height, 1}, m_mtlr->GetTextureReadbackBuffer(), m_bufferOffset, bytesPerRow, bytesPerImage);
|
||||||
|
|
||||||
m_commandBuffer = m_mtlr->GetCurrentCommandBuffer()->retain();
|
m_commandBuffer = m_mtlr->GetCurrentCommandBuffer()->retain();
|
||||||
m_mtlr->RequestSoonCommit();
|
// TODO: uncomment?
|
||||||
|
//m_mtlr->RequestSoonCommit();
|
||||||
|
m_mtlr->CommitCommandBuffer();
|
||||||
}
|
}
|
||||||
|
|
||||||
bool LatteTextureReadbackInfoMtl::IsFinished()
|
bool LatteTextureReadbackInfoMtl::IsFinished()
|
||||||
{
|
{
|
||||||
// TODO: is this needed?
|
|
||||||
if (!m_commandBuffer)
|
|
||||||
return false;
|
|
||||||
|
|
||||||
// TODO: remove this?
|
|
||||||
// Command buffer wasn't even comitted, let's commit immediately
|
// Command buffer wasn't even comitted, let's commit immediately
|
||||||
if (m_mtlr->GetCurrentCommandBuffer() == m_commandBuffer)
|
//if (m_mtlr->GetCurrentCommandBuffer() == m_commandBuffer)
|
||||||
m_mtlr->CommitCommandBuffer();
|
// m_mtlr->CommitCommandBuffer();
|
||||||
|
|
||||||
return CommandBufferCompleted(m_commandBuffer);
|
return CommandBufferCompleted(m_commandBuffer);
|
||||||
}
|
}
|
||||||
|
@ -28,15 +28,14 @@ MetalRestridedBufferRange MetalVertexBufferCache::RestrideBufferIfNeeded(MTL::Bu
|
|||||||
restrideInfo.allocation = m_bufferAllocator.GetBufferAllocation(newSize);
|
restrideInfo.allocation = m_bufferAllocator.GetBufferAllocation(newSize);
|
||||||
buffer = m_bufferAllocator.GetBuffer(restrideInfo.allocation.bufferIndex);
|
buffer = m_bufferAllocator.GetBuffer(restrideInfo.allocation.bufferIndex);
|
||||||
|
|
||||||
//uint8* oldPtr = (uint8*)bufferCache->contents() + vertexBufferRange.offset;
|
// HACK: the restriding is done on the CPU, since doing it on the GPU was causing over-synchronization
|
||||||
//uint8* newPtr = (uint8*)buffer->contents() + restrideInfo.allocation.bufferOffset;
|
uint8* oldPtr = (uint8*)bufferCache->contents() + vertexBufferRange.offset;
|
||||||
|
uint8* newPtr = (uint8*)buffer->contents() + restrideInfo.allocation.offset;
|
||||||
|
|
||||||
//for (size_t elem = 0; elem < vertexBufferRange.size / stride; elem++)
|
for (size_t elem = 0; elem < vertexBufferRange.size / stride; elem++)
|
||||||
//{
|
memcpy(newPtr + elem * newStride, oldPtr + elem * stride, stride);
|
||||||
// memcpy(newPtr + elem * newStride, oldPtr + elem * stride, stride);
|
|
||||||
//}
|
|
||||||
//debug_printf("Restrided vertex buffer (old stride: %zu, new stride: %zu, old size: %zu, new size: %zu)\n", stride, newStride, vertexBufferRange.size, newSize);
|
|
||||||
|
|
||||||
|
/*
|
||||||
if (m_mtlr->GetEncoderType() == MetalEncoderType::Render)
|
if (m_mtlr->GetEncoderType() == MetalEncoderType::Render)
|
||||||
{
|
{
|
||||||
auto renderCommandEncoder = static_cast<MTL::RenderCommandEncoder*>(m_mtlr->GetCommandEncoder());
|
auto renderCommandEncoder = static_cast<MTL::RenderCommandEncoder*>(m_mtlr->GetCommandEncoder());
|
||||||
@ -60,18 +59,19 @@ MetalRestridedBufferRange MetalVertexBufferCache::RestrideBufferIfNeeded(MTL::Bu
|
|||||||
// TODO: do the barriers in one call?
|
// TODO: do the barriers in one call?
|
||||||
MTL::Resource* barrierBuffers[] = {buffer};
|
MTL::Resource* barrierBuffers[] = {buffer};
|
||||||
renderCommandEncoder->memoryBarrier(barrierBuffers, 1, MTL::RenderStageVertex, MTL::RenderStageVertex);
|
renderCommandEncoder->memoryBarrier(barrierBuffers, 1, MTL::RenderStageVertex, MTL::RenderStageVertex);
|
||||||
|
|
||||||
// Debug
|
|
||||||
m_mtlr->GetPerformanceMonitor().m_vertexBufferRestrides++;
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
debug_printf("vertex buffer restride needs an active render encoder\n");
|
debug_printf("vertex buffer restride needs an active render command encoder\n");
|
||||||
cemu_assert_suspicious();
|
cemu_assert_suspicious();
|
||||||
}
|
}
|
||||||
|
*/
|
||||||
|
|
||||||
restrideInfo.memoryInvalidated = false;
|
restrideInfo.memoryInvalidated = false;
|
||||||
restrideInfo.lastStride = newStride;
|
restrideInfo.lastStride = newStride;
|
||||||
|
|
||||||
|
// Debug
|
||||||
|
m_mtlr->GetPerformanceMonitor().m_vertexBufferRestrides++;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
@ -121,7 +121,7 @@ void MetalMemoryManager::InitBufferCache(size_t size)
|
|||||||
{
|
{
|
||||||
cemu_assert_debug(!m_bufferCache);
|
cemu_assert_debug(!m_bufferCache);
|
||||||
|
|
||||||
m_bufferCache = m_mtlr->GetDevice()->newBuffer(size, MTL::ResourceStorageModePrivate);
|
m_bufferCache = m_mtlr->GetDevice()->newBuffer(size, MTL::ResourceStorageModeShared);
|
||||||
#ifdef CEMU_DEBUG_ASSERT
|
#ifdef CEMU_DEBUG_ASSERT
|
||||||
m_bufferCache->setLabel(GetLabel("Buffer cache", m_bufferCache));
|
m_bufferCache->setLabel(GetLabel("Buffer cache", m_bufferCache));
|
||||||
#endif
|
#endif
|
||||||
|
@ -58,7 +58,7 @@ private:
|
|||||||
class MetalMemoryManager
|
class MetalMemoryManager
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
MetalMemoryManager(class MetalRenderer* metalRenderer) : m_mtlr{metalRenderer}, m_bufferAllocator(metalRenderer, m_mtlr->GetOptimalBufferStorageMode()), m_framePersistentBufferAllocator(metalRenderer, MTL::ResourceStorageModePrivate), m_tempBufferAllocator(metalRenderer), m_vertexBufferCache(metalRenderer, m_framePersistentBufferAllocator) {}
|
MetalMemoryManager(class MetalRenderer* metalRenderer) : m_mtlr{metalRenderer}, m_bufferAllocator(metalRenderer, m_mtlr->GetOptimalBufferStorageMode()), m_framePersistentBufferAllocator(metalRenderer, MTL::ResourceStorageModeShared), m_tempBufferAllocator(metalRenderer), m_vertexBufferCache(metalRenderer, m_framePersistentBufferAllocator) {}
|
||||||
~MetalMemoryManager();
|
~MetalMemoryManager();
|
||||||
|
|
||||||
// Pipelines
|
// Pipelines
|
||||||
|
@ -26,7 +26,7 @@
|
|||||||
#include "imgui/imgui_extension.h"
|
#include "imgui/imgui_extension.h"
|
||||||
#include "imgui/imgui_impl_metal.h"
|
#include "imgui/imgui_impl_metal.h"
|
||||||
|
|
||||||
#define DEFAULT_COMMIT_TRESHOLD 256
|
#define DEFAULT_COMMIT_TRESHOLD 196
|
||||||
#define OCCLUSION_QUERY_POOL_SIZE 1024
|
#define OCCLUSION_QUERY_POOL_SIZE 1024
|
||||||
|
|
||||||
extern bool hasValidFramebufferAttached;
|
extern bool hasValidFramebufferAttached;
|
||||||
@ -917,6 +917,8 @@ void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32
|
|||||||
const auto fetchShader = LatteSHRC_GetActiveFetchShader();
|
const auto fetchShader = LatteSHRC_GetActiveFetchShader();
|
||||||
|
|
||||||
// Check if we need to end the render pass
|
// Check if we need to end the render pass
|
||||||
|
if (!m_state.m_isFirstDrawInRenderPass)
|
||||||
|
{
|
||||||
// Fragment shader is most likely to require a render pass flush, so check for it first
|
// Fragment shader is most likely to require a render pass flush, so check for it first
|
||||||
bool endRenderPass = CheckIfRenderPassNeedsFlush(pixelShader);
|
bool endRenderPass = CheckIfRenderPassNeedsFlush(pixelShader);
|
||||||
if (!endRenderPass)
|
if (!endRenderPass)
|
||||||
@ -926,6 +928,7 @@ void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32
|
|||||||
|
|
||||||
if (endRenderPass)
|
if (endRenderPass)
|
||||||
EndEncoding();
|
EndEncoding();
|
||||||
|
}
|
||||||
|
|
||||||
// Primitive type
|
// Primitive type
|
||||||
const LattePrimitiveMode primitiveMode = static_cast<LattePrimitiveMode>(LatteGPUState.contextRegister[mmVGT_PRIMITIVE_TYPE]);
|
const LattePrimitiveMode primitiveMode = static_cast<LattePrimitiveMode>(LatteGPUState.contextRegister[mmVGT_PRIMITIVE_TYPE]);
|
||||||
@ -1889,7 +1892,9 @@ void MetalRenderer::ClearColorTextureInternal(MTL::Texture* mtlTexture, sint32 s
|
|||||||
|
|
||||||
void MetalRenderer::CopyBufferToBuffer(MTL::Buffer* src, uint32 srcOffset, MTL::Buffer* dst, uint32 dstOffset, uint32 size, MTL::RenderStages after, MTL::RenderStages before)
|
void MetalRenderer::CopyBufferToBuffer(MTL::Buffer* src, uint32 srcOffset, MTL::Buffer* dst, uint32 dstOffset, uint32 size, MTL::RenderStages after, MTL::RenderStages before)
|
||||||
{
|
{
|
||||||
|
// TODO: uncomment and fix performance issues
|
||||||
// Do the copy in a vertex shader on Apple GPUs
|
// Do the copy in a vertex shader on Apple GPUs
|
||||||
|
/*
|
||||||
if (m_isAppleGPU && m_encoderType == MetalEncoderType::Render)
|
if (m_isAppleGPU && m_encoderType == MetalEncoderType::Render)
|
||||||
{
|
{
|
||||||
auto renderCommandEncoder = static_cast<MTL::RenderCommandEncoder*>(m_commandEncoder);
|
auto renderCommandEncoder = static_cast<MTL::RenderCommandEncoder*>(m_commandEncoder);
|
||||||
@ -1910,10 +1915,11 @@ void MetalRenderer::CopyBufferToBuffer(MTL::Buffer* src, uint32 srcOffset, MTL::
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
*/
|
||||||
auto blitCommandEncoder = GetBlitCommandEncoder();
|
auto blitCommandEncoder = GetBlitCommandEncoder();
|
||||||
|
|
||||||
blitCommandEncoder->copyFromBuffer(src, srcOffset, dst, dstOffset, size);
|
blitCommandEncoder->copyFromBuffer(src, srcOffset, dst, dstOffset, size);
|
||||||
}
|
//}
|
||||||
}
|
}
|
||||||
|
|
||||||
void MetalRenderer::SwapBuffer(bool mainWindow)
|
void MetalRenderer::SwapBuffer(bool mainWindow)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user