do buffer cache uploading on the GPU

This commit is contained in:
Samuliak 2024-08-29 19:06:01 +02:00
parent 2403cf948a
commit 1cfb841b5f
3 changed files with 70 additions and 69 deletions

View File

@ -3,6 +3,7 @@
#include "Cafe/HW/Latte/Renderer/Metal/MetalHybridComputePipeline.h" #include "Cafe/HW/Latte/Renderer/Metal/MetalHybridComputePipeline.h"
#include "Common/precompiled.h" #include "Common/precompiled.h"
#include "HW/Latte/Renderer/Metal/MetalRenderer.h" #include "HW/Latte/Renderer/Metal/MetalRenderer.h"
#include "Metal/MTLResource.hpp"
MetalVertexBufferCache::~MetalVertexBufferCache() MetalVertexBufferCache::~MetalVertexBufferCache()
{ {
@ -115,13 +116,9 @@ void* MetalMemoryManager::GetTextureUploadBuffer(size_t size)
void MetalMemoryManager::InitBufferCache(size_t size) void MetalMemoryManager::InitBufferCache(size_t size)
{ {
if (m_bufferCache) cemu_assert_debug(!m_bufferCache);
{
debug_printf("MetalMemoryManager::InitBufferCache: buffer cache already initialized\n");
return;
}
m_bufferCache = m_mtlr->GetDevice()->newBuffer(size, m_mtlr->GetOptimalBufferStorageMode() | MTL::ResourceCPUCacheModeWriteCombined); m_bufferCache = m_mtlr->GetDevice()->newBuffer(size, MTL::ResourceStorageModePrivate);
#ifdef CEMU_DEBUG_ASSERT #ifdef CEMU_DEBUG_ASSERT
m_bufferCache->setLabel(GetLabel("Buffer cache", m_bufferCache)); m_bufferCache->setLabel(GetLabel("Buffer cache", m_bufferCache));
#endif #endif
@ -129,20 +126,23 @@ void MetalMemoryManager::InitBufferCache(size_t size)
void MetalMemoryManager::UploadToBufferCache(const void* data, size_t offset, size_t size) void MetalMemoryManager::UploadToBufferCache(const void* data, size_t offset, size_t size)
{ {
if (!m_bufferCache) cemu_assert_debug(m_bufferCache);
{ cemu_assert_debug((offset + size) <= m_bufferCache->length());
debug_printf("MetalMemoryManager::UploadToBufferCache: buffer cache not initialized\n");
return;
}
if ((offset + size) > m_bufferCache->length()) auto allocation = m_tempBufferAllocator.GetBufferAllocation(size);
{ auto buffer = m_tempBufferAllocator.GetBufferOutsideOfCommandBuffer(allocation.bufferIndex);
debug_printf("MetalMemoryManager::UploadToBufferCache: out of bounds access (offset: %zu, size: %zu, buffer size: %zu)\n", offset, size, m_bufferCache->length()); memcpy((uint8*)buffer->contents() + allocation.offset, data, size);
}
memcpy((uint8*)m_bufferCache->contents() + offset, data, size); // Lock the buffer to make sure it's not deallocated before the copy is done
if (!m_mtlr->HasUnifiedMemory()) m_tempBufferAllocator.LockBuffer(allocation.bufferIndex);
m_bufferCache->didModifyRange(NS::Range(offset, size));
m_mtlr->CopyBufferToBuffer(buffer, allocation.offset, m_bufferCache, offset, size);
// Make sure the buffer has the right command buffer
m_tempBufferAllocator.GetBuffer(allocation.bufferIndex); // TODO: make a helper function for this
// We can now safely unlock the buffer
m_tempBufferAllocator.UnlockBuffer(allocation.bufferIndex);
// Notify vertex buffer cache about the change // Notify vertex buffer cache about the change
m_vertexBufferCache.MemoryRangeChanged(offset, size); m_vertexBufferCache.MemoryRangeChanged(offset, size);
@ -150,11 +150,7 @@ void MetalMemoryManager::UploadToBufferCache(const void* data, size_t offset, si
void MetalMemoryManager::CopyBufferCache(size_t srcOffset, size_t dstOffset, size_t size) void MetalMemoryManager::CopyBufferCache(size_t srcOffset, size_t dstOffset, size_t size)
{ {
if (!m_bufferCache) cemu_assert_debug(m_bufferCache);
{
debug_printf("MetalMemoryManager::CopyBufferCache: buffer cache not initialized\n");
return;
}
memcpy((uint8*)m_bufferCache->contents() + dstOffset, (uint8*)m_bufferCache->contents() + srcOffset, size); m_mtlr->CopyBufferToBuffer(m_bufferCache, srcOffset, m_bufferCache, dstOffset, size);
} }

View File

@ -22,11 +22,6 @@
#include "HW/Latte/Renderer/Metal/MetalCommon.h" #include "HW/Latte/Renderer/Metal/MetalCommon.h"
#include "HW/Latte/Renderer/Metal/MetalLayerHandle.h" #include "HW/Latte/Renderer/Metal/MetalLayerHandle.h"
#include "HW/Latte/Renderer/Renderer.h" #include "HW/Latte/Renderer/Renderer.h"
#include "Metal/MTLCommandBuffer.hpp"
#include "Metal/MTLDevice.hpp"
#include "Metal/MTLRenderCommandEncoder.hpp"
#include "Metal/MTLRenderPass.hpp"
#include "Metal/MTLRenderPipeline.hpp"
#include "imgui.h" #include "imgui.h"
#include <cstddef> #include <cstddef>
@ -780,31 +775,7 @@ void MetalRenderer::bufferCache_copy(uint32 srcOffset, uint32 dstOffset, uint32
void MetalRenderer::bufferCache_copyStreamoutToMainBuffer(uint32 srcOffset, uint32 dstOffset, uint32 size) void MetalRenderer::bufferCache_copyStreamoutToMainBuffer(uint32 srcOffset, uint32 dstOffset, uint32 size)
{ {
// Do the copy in a vertex shader on Apple GPUs CopyBufferToBuffer(m_xfbRingBuffer, srcOffset, m_memoryManager->GetBufferCache(), dstOffset, size);
if (m_isAppleGPU && m_encoderType == MetalEncoderType::Render)
{
auto renderCommandEncoder = static_cast<MTL::RenderCommandEncoder*>(m_commandEncoder);
MTL::Resource* barrierBuffers[] = {m_xfbRingBuffer};
renderCommandEncoder->memoryBarrier(barrierBuffers, 1, MTL::RenderStageVertex | MTL::RenderStageFragment | MTL::RenderStageObject | MTL::RenderStageMesh, MTL::RenderStageVertex);
renderCommandEncoder->setRenderPipelineState(m_copyBufferToBufferPipeline->GetRenderPipelineState());
m_state.m_encoderState.m_renderPipelineState = m_copyBufferToBufferPipeline->GetRenderPipelineState();
SetBuffer(renderCommandEncoder, METAL_SHADER_TYPE_VERTEX, m_xfbRingBuffer, srcOffset, GET_HELPER_BUFFER_BINDING(0));
SetBuffer(renderCommandEncoder, METAL_SHADER_TYPE_VERTEX, m_memoryManager->GetBufferCache(), dstOffset, GET_HELPER_BUFFER_BINDING(1));
renderCommandEncoder->drawPrimitives(MTL::PrimitiveTypePoint, NS::UInteger(0), NS::UInteger(size));
barrierBuffers[0] = m_memoryManager->GetBufferCache();
renderCommandEncoder->memoryBarrier(barrierBuffers, 1, MTL::RenderStageVertex, MTL::RenderStageVertex | MTL::RenderStageFragment | MTL::RenderStageObject | MTL::RenderStageMesh);
}
else
{
auto blitCommandEncoder = GetBlitCommandEncoder();
blitCommandEncoder->copyFromBuffer(m_xfbRingBuffer, srcOffset, m_memoryManager->GetBufferCache(), dstOffset, size);
}
} }
void MetalRenderer::buffer_bindVertexBuffer(uint32 bufferIndex, uint32 offset, uint32 size) void MetalRenderer::buffer_bindVertexBuffer(uint32 bufferIndex, uint32 offset, uint32 size)
@ -945,9 +916,6 @@ void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32
if (endRenderPass) if (endRenderPass)
EndEncoding(); EndEncoding();
// Render pass
auto renderCommandEncoder = GetRenderCommandEncoder();
// Primitive type // Primitive type
const LattePrimitiveMode primitiveMode = static_cast<LattePrimitiveMode>(LatteGPUState.contextRegister[mmVGT_PRIMITIVE_TYPE]); const LattePrimitiveMode primitiveMode = static_cast<LattePrimitiveMode>(LatteGPUState.contextRegister[mmVGT_PRIMITIVE_TYPE]);
auto mtlPrimitiveType = GetMtlPrimitiveType(primitiveMode); auto mtlPrimitiveType = GetMtlPrimitiveType(primitiveMode);
@ -955,6 +923,22 @@ void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32
bool usesGeometryShader = (geometryShader != nullptr || isPrimitiveRect); bool usesGeometryShader = (geometryShader != nullptr || isPrimitiveRect);
// Index buffer
Renderer::INDEX_TYPE hostIndexType;
uint32 hostIndexCount;
uint32 indexMin = 0;
uint32 indexMax = 0;
uint32 indexBufferOffset = 0;
uint32 indexBufferIndex = 0;
LatteIndices_decode(memory_getPointerFromVirtualOffset(indexDataMPTR), indexType, count, primitiveMode, indexMin, indexMax, hostIndexType, hostIndexCount, indexBufferOffset, indexBufferIndex);
// synchronize vertex and uniform cache and update buffer bindings
// We need to call this before getting the render command encoder, since it can cause buffer copies
LatteBufferCache_Sync(indexMin + baseVertex, indexMax + baseVertex, baseInstance, instanceCount);
// Render pass
auto renderCommandEncoder = GetRenderCommandEncoder();
// Depth stencil state // Depth stencil state
// Disable depth write when there is no depth attachment // Disable depth write when there is no depth attachment
@ -1120,18 +1104,6 @@ void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32
// Resources // Resources
// Index buffer
Renderer::INDEX_TYPE hostIndexType;
uint32 hostIndexCount;
uint32 indexMin = 0;
uint32 indexMax = 0;
uint32 indexBufferOffset = 0;
uint32 indexBufferIndex = 0;
LatteIndices_decode(memory_getPointerFromVirtualOffset(indexDataMPTR), indexType, count, primitiveMode, indexMin, indexMax, hostIndexType, hostIndexCount, indexBufferOffset, indexBufferIndex);
// synchronize vertex and uniform cache and update buffer bindings
LatteBufferCache_Sync(indexMin + baseVertex, indexMax + baseVertex, baseInstance, instanceCount);
// Vertex buffers // Vertex buffers
for (uint8 i = 0; i < MAX_MTL_BUFFERS; i++) for (uint8 i = 0; i < MAX_MTL_BUFFERS; i++)
{ {
@ -1851,6 +1823,37 @@ void MetalRenderer::ClearColorTextureInternal(MTL::Texture* mtlTexture, sint32 s
EndEncoding(); EndEncoding();
} }
void MetalRenderer::CopyBufferToBuffer(MTL::Buffer* src, uint32 srcOffset, MTL::Buffer* dst, uint32 dstOffset, uint32 size)
{
// Do the copy in a vertex shader on Apple GPUs
if (m_isAppleGPU && m_encoderType == MetalEncoderType::Render)
{
auto renderCommandEncoder = static_cast<MTL::RenderCommandEncoder*>(m_commandEncoder);
MTL::Resource* barrierBuffers[] = {src};
// TODO: let the caller choose the stages
renderCommandEncoder->memoryBarrier(barrierBuffers, 1, MTL::RenderStageVertex | MTL::RenderStageFragment | MTL::RenderStageObject | MTL::RenderStageMesh, MTL::RenderStageVertex);
renderCommandEncoder->setRenderPipelineState(m_copyBufferToBufferPipeline->GetRenderPipelineState());
m_state.m_encoderState.m_renderPipelineState = m_copyBufferToBufferPipeline->GetRenderPipelineState();
SetBuffer(renderCommandEncoder, METAL_SHADER_TYPE_VERTEX, src, srcOffset, GET_HELPER_BUFFER_BINDING(0));
SetBuffer(renderCommandEncoder, METAL_SHADER_TYPE_VERTEX, dst, dstOffset, GET_HELPER_BUFFER_BINDING(1));
renderCommandEncoder->drawPrimitives(MTL::PrimitiveTypePoint, NS::UInteger(0), NS::UInteger(size));
barrierBuffers[0] = dst;
// TODO: let the caller choose the stages
renderCommandEncoder->memoryBarrier(barrierBuffers, 1, MTL::RenderStageVertex, MTL::RenderStageVertex | MTL::RenderStageFragment | MTL::RenderStageObject | MTL::RenderStageMesh);
}
else
{
auto blitCommandEncoder = GetBlitCommandEncoder();
blitCommandEncoder->copyFromBuffer(src, srcOffset, dst, dstOffset, size);
}
}
void MetalRenderer::SwapBuffer(bool mainWindow) void MetalRenderer::SwapBuffer(bool mainWindow)
{ {
auto& layer = GetLayer(mainWindow); auto& layer = GetLayer(mainWindow);

View File

@ -376,6 +376,8 @@ public:
void ClearColorTextureInternal(MTL::Texture* mtlTexture, sint32 sliceIndex, sint32 mipIndex, float r, float g, float b, float a); void ClearColorTextureInternal(MTL::Texture* mtlTexture, sint32 sliceIndex, sint32 mipIndex, float r, float g, float b, float a);
void CopyBufferToBuffer(MTL::Buffer* src, uint32 srcOffset, MTL::Buffer* dst, uint32 dstOffset, uint32 size);
// Getters // Getters
bool IsAppleGPU() const bool IsAppleGPU() const
{ {