mirror of
https://github.com/cemu-project/Cemu.git
synced 2024-11-29 12:34:17 +01:00
do buffer cache uploading on the GPU
This commit is contained in:
parent
2403cf948a
commit
1cfb841b5f
@ -3,6 +3,7 @@
|
||||
#include "Cafe/HW/Latte/Renderer/Metal/MetalHybridComputePipeline.h"
|
||||
#include "Common/precompiled.h"
|
||||
#include "HW/Latte/Renderer/Metal/MetalRenderer.h"
|
||||
#include "Metal/MTLResource.hpp"
|
||||
|
||||
MetalVertexBufferCache::~MetalVertexBufferCache()
|
||||
{
|
||||
@ -115,13 +116,9 @@ void* MetalMemoryManager::GetTextureUploadBuffer(size_t size)
|
||||
|
||||
void MetalMemoryManager::InitBufferCache(size_t size)
|
||||
{
|
||||
if (m_bufferCache)
|
||||
{
|
||||
debug_printf("MetalMemoryManager::InitBufferCache: buffer cache already initialized\n");
|
||||
return;
|
||||
}
|
||||
cemu_assert_debug(!m_bufferCache);
|
||||
|
||||
m_bufferCache = m_mtlr->GetDevice()->newBuffer(size, m_mtlr->GetOptimalBufferStorageMode() | MTL::ResourceCPUCacheModeWriteCombined);
|
||||
m_bufferCache = m_mtlr->GetDevice()->newBuffer(size, MTL::ResourceStorageModePrivate);
|
||||
#ifdef CEMU_DEBUG_ASSERT
|
||||
m_bufferCache->setLabel(GetLabel("Buffer cache", m_bufferCache));
|
||||
#endif
|
||||
@ -129,20 +126,23 @@ void MetalMemoryManager::InitBufferCache(size_t size)
|
||||
|
||||
void MetalMemoryManager::UploadToBufferCache(const void* data, size_t offset, size_t size)
|
||||
{
|
||||
if (!m_bufferCache)
|
||||
{
|
||||
debug_printf("MetalMemoryManager::UploadToBufferCache: buffer cache not initialized\n");
|
||||
return;
|
||||
}
|
||||
cemu_assert_debug(m_bufferCache);
|
||||
cemu_assert_debug((offset + size) <= m_bufferCache->length());
|
||||
|
||||
if ((offset + size) > m_bufferCache->length())
|
||||
{
|
||||
debug_printf("MetalMemoryManager::UploadToBufferCache: out of bounds access (offset: %zu, size: %zu, buffer size: %zu)\n", offset, size, m_bufferCache->length());
|
||||
}
|
||||
auto allocation = m_tempBufferAllocator.GetBufferAllocation(size);
|
||||
auto buffer = m_tempBufferAllocator.GetBufferOutsideOfCommandBuffer(allocation.bufferIndex);
|
||||
memcpy((uint8*)buffer->contents() + allocation.offset, data, size);
|
||||
|
||||
memcpy((uint8*)m_bufferCache->contents() + offset, data, size);
|
||||
if (!m_mtlr->HasUnifiedMemory())
|
||||
m_bufferCache->didModifyRange(NS::Range(offset, size));
|
||||
// Lock the buffer to make sure it's not deallocated before the copy is done
|
||||
m_tempBufferAllocator.LockBuffer(allocation.bufferIndex);
|
||||
|
||||
m_mtlr->CopyBufferToBuffer(buffer, allocation.offset, m_bufferCache, offset, size);
|
||||
|
||||
// Make sure the buffer has the right command buffer
|
||||
m_tempBufferAllocator.GetBuffer(allocation.bufferIndex); // TODO: make a helper function for this
|
||||
|
||||
// We can now safely unlock the buffer
|
||||
m_tempBufferAllocator.UnlockBuffer(allocation.bufferIndex);
|
||||
|
||||
// Notify vertex buffer cache about the change
|
||||
m_vertexBufferCache.MemoryRangeChanged(offset, size);
|
||||
@ -150,11 +150,7 @@ void MetalMemoryManager::UploadToBufferCache(const void* data, size_t offset, si
|
||||
|
||||
void MetalMemoryManager::CopyBufferCache(size_t srcOffset, size_t dstOffset, size_t size)
|
||||
{
|
||||
if (!m_bufferCache)
|
||||
{
|
||||
debug_printf("MetalMemoryManager::CopyBufferCache: buffer cache not initialized\n");
|
||||
return;
|
||||
}
|
||||
cemu_assert_debug(m_bufferCache);
|
||||
|
||||
memcpy((uint8*)m_bufferCache->contents() + dstOffset, (uint8*)m_bufferCache->contents() + srcOffset, size);
|
||||
m_mtlr->CopyBufferToBuffer(m_bufferCache, srcOffset, m_bufferCache, dstOffset, size);
|
||||
}
|
||||
|
@ -22,11 +22,6 @@
|
||||
#include "HW/Latte/Renderer/Metal/MetalCommon.h"
|
||||
#include "HW/Latte/Renderer/Metal/MetalLayerHandle.h"
|
||||
#include "HW/Latte/Renderer/Renderer.h"
|
||||
#include "Metal/MTLCommandBuffer.hpp"
|
||||
#include "Metal/MTLDevice.hpp"
|
||||
#include "Metal/MTLRenderCommandEncoder.hpp"
|
||||
#include "Metal/MTLRenderPass.hpp"
|
||||
#include "Metal/MTLRenderPipeline.hpp"
|
||||
#include "imgui.h"
|
||||
#include <cstddef>
|
||||
|
||||
@ -780,31 +775,7 @@ void MetalRenderer::bufferCache_copy(uint32 srcOffset, uint32 dstOffset, uint32
|
||||
|
||||
void MetalRenderer::bufferCache_copyStreamoutToMainBuffer(uint32 srcOffset, uint32 dstOffset, uint32 size)
|
||||
{
|
||||
// Do the copy in a vertex shader on Apple GPUs
|
||||
if (m_isAppleGPU && m_encoderType == MetalEncoderType::Render)
|
||||
{
|
||||
auto renderCommandEncoder = static_cast<MTL::RenderCommandEncoder*>(m_commandEncoder);
|
||||
|
||||
MTL::Resource* barrierBuffers[] = {m_xfbRingBuffer};
|
||||
renderCommandEncoder->memoryBarrier(barrierBuffers, 1, MTL::RenderStageVertex | MTL::RenderStageFragment | MTL::RenderStageObject | MTL::RenderStageMesh, MTL::RenderStageVertex);
|
||||
|
||||
renderCommandEncoder->setRenderPipelineState(m_copyBufferToBufferPipeline->GetRenderPipelineState());
|
||||
m_state.m_encoderState.m_renderPipelineState = m_copyBufferToBufferPipeline->GetRenderPipelineState();
|
||||
|
||||
SetBuffer(renderCommandEncoder, METAL_SHADER_TYPE_VERTEX, m_xfbRingBuffer, srcOffset, GET_HELPER_BUFFER_BINDING(0));
|
||||
SetBuffer(renderCommandEncoder, METAL_SHADER_TYPE_VERTEX, m_memoryManager->GetBufferCache(), dstOffset, GET_HELPER_BUFFER_BINDING(1));
|
||||
|
||||
renderCommandEncoder->drawPrimitives(MTL::PrimitiveTypePoint, NS::UInteger(0), NS::UInteger(size));
|
||||
|
||||
barrierBuffers[0] = m_memoryManager->GetBufferCache();
|
||||
renderCommandEncoder->memoryBarrier(barrierBuffers, 1, MTL::RenderStageVertex, MTL::RenderStageVertex | MTL::RenderStageFragment | MTL::RenderStageObject | MTL::RenderStageMesh);
|
||||
}
|
||||
else
|
||||
{
|
||||
auto blitCommandEncoder = GetBlitCommandEncoder();
|
||||
|
||||
blitCommandEncoder->copyFromBuffer(m_xfbRingBuffer, srcOffset, m_memoryManager->GetBufferCache(), dstOffset, size);
|
||||
}
|
||||
CopyBufferToBuffer(m_xfbRingBuffer, srcOffset, m_memoryManager->GetBufferCache(), dstOffset, size);
|
||||
}
|
||||
|
||||
void MetalRenderer::buffer_bindVertexBuffer(uint32 bufferIndex, uint32 offset, uint32 size)
|
||||
@ -945,9 +916,6 @@ void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32
|
||||
if (endRenderPass)
|
||||
EndEncoding();
|
||||
|
||||
// Render pass
|
||||
auto renderCommandEncoder = GetRenderCommandEncoder();
|
||||
|
||||
// Primitive type
|
||||
const LattePrimitiveMode primitiveMode = static_cast<LattePrimitiveMode>(LatteGPUState.contextRegister[mmVGT_PRIMITIVE_TYPE]);
|
||||
auto mtlPrimitiveType = GetMtlPrimitiveType(primitiveMode);
|
||||
@ -955,6 +923,22 @@ void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32
|
||||
|
||||
bool usesGeometryShader = (geometryShader != nullptr || isPrimitiveRect);
|
||||
|
||||
// Index buffer
|
||||
Renderer::INDEX_TYPE hostIndexType;
|
||||
uint32 hostIndexCount;
|
||||
uint32 indexMin = 0;
|
||||
uint32 indexMax = 0;
|
||||
uint32 indexBufferOffset = 0;
|
||||
uint32 indexBufferIndex = 0;
|
||||
LatteIndices_decode(memory_getPointerFromVirtualOffset(indexDataMPTR), indexType, count, primitiveMode, indexMin, indexMax, hostIndexType, hostIndexCount, indexBufferOffset, indexBufferIndex);
|
||||
|
||||
// synchronize vertex and uniform cache and update buffer bindings
|
||||
// We need to call this before getting the render command encoder, since it can cause buffer copies
|
||||
LatteBufferCache_Sync(indexMin + baseVertex, indexMax + baseVertex, baseInstance, instanceCount);
|
||||
|
||||
// Render pass
|
||||
auto renderCommandEncoder = GetRenderCommandEncoder();
|
||||
|
||||
// Depth stencil state
|
||||
|
||||
// Disable depth write when there is no depth attachment
|
||||
@ -1120,18 +1104,6 @@ void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32
|
||||
|
||||
// Resources
|
||||
|
||||
// Index buffer
|
||||
Renderer::INDEX_TYPE hostIndexType;
|
||||
uint32 hostIndexCount;
|
||||
uint32 indexMin = 0;
|
||||
uint32 indexMax = 0;
|
||||
uint32 indexBufferOffset = 0;
|
||||
uint32 indexBufferIndex = 0;
|
||||
LatteIndices_decode(memory_getPointerFromVirtualOffset(indexDataMPTR), indexType, count, primitiveMode, indexMin, indexMax, hostIndexType, hostIndexCount, indexBufferOffset, indexBufferIndex);
|
||||
|
||||
// synchronize vertex and uniform cache and update buffer bindings
|
||||
LatteBufferCache_Sync(indexMin + baseVertex, indexMax + baseVertex, baseInstance, instanceCount);
|
||||
|
||||
// Vertex buffers
|
||||
for (uint8 i = 0; i < MAX_MTL_BUFFERS; i++)
|
||||
{
|
||||
@ -1851,6 +1823,37 @@ void MetalRenderer::ClearColorTextureInternal(MTL::Texture* mtlTexture, sint32 s
|
||||
EndEncoding();
|
||||
}
|
||||
|
||||
void MetalRenderer::CopyBufferToBuffer(MTL::Buffer* src, uint32 srcOffset, MTL::Buffer* dst, uint32 dstOffset, uint32 size)
|
||||
{
|
||||
// Do the copy in a vertex shader on Apple GPUs
|
||||
if (m_isAppleGPU && m_encoderType == MetalEncoderType::Render)
|
||||
{
|
||||
auto renderCommandEncoder = static_cast<MTL::RenderCommandEncoder*>(m_commandEncoder);
|
||||
|
||||
MTL::Resource* barrierBuffers[] = {src};
|
||||
// TODO: let the caller choose the stages
|
||||
renderCommandEncoder->memoryBarrier(barrierBuffers, 1, MTL::RenderStageVertex | MTL::RenderStageFragment | MTL::RenderStageObject | MTL::RenderStageMesh, MTL::RenderStageVertex);
|
||||
|
||||
renderCommandEncoder->setRenderPipelineState(m_copyBufferToBufferPipeline->GetRenderPipelineState());
|
||||
m_state.m_encoderState.m_renderPipelineState = m_copyBufferToBufferPipeline->GetRenderPipelineState();
|
||||
|
||||
SetBuffer(renderCommandEncoder, METAL_SHADER_TYPE_VERTEX, src, srcOffset, GET_HELPER_BUFFER_BINDING(0));
|
||||
SetBuffer(renderCommandEncoder, METAL_SHADER_TYPE_VERTEX, dst, dstOffset, GET_HELPER_BUFFER_BINDING(1));
|
||||
|
||||
renderCommandEncoder->drawPrimitives(MTL::PrimitiveTypePoint, NS::UInteger(0), NS::UInteger(size));
|
||||
|
||||
barrierBuffers[0] = dst;
|
||||
// TODO: let the caller choose the stages
|
||||
renderCommandEncoder->memoryBarrier(barrierBuffers, 1, MTL::RenderStageVertex, MTL::RenderStageVertex | MTL::RenderStageFragment | MTL::RenderStageObject | MTL::RenderStageMesh);
|
||||
}
|
||||
else
|
||||
{
|
||||
auto blitCommandEncoder = GetBlitCommandEncoder();
|
||||
|
||||
blitCommandEncoder->copyFromBuffer(src, srcOffset, dst, dstOffset, size);
|
||||
}
|
||||
}
|
||||
|
||||
void MetalRenderer::SwapBuffer(bool mainWindow)
|
||||
{
|
||||
auto& layer = GetLayer(mainWindow);
|
||||
|
@ -376,6 +376,8 @@ public:
|
||||
|
||||
void ClearColorTextureInternal(MTL::Texture* mtlTexture, sint32 sliceIndex, sint32 mipIndex, float r, float g, float b, float a);
|
||||
|
||||
void CopyBufferToBuffer(MTL::Buffer* src, uint32 srcOffset, MTL::Buffer* dst, uint32 dstOffset, uint32 size);
|
||||
|
||||
// Getters
|
||||
bool IsAppleGPU() const
|
||||
{
|
||||
|
Loading…
Reference in New Issue
Block a user