mirror of
https://github.com/cemu-project/Cemu.git
synced 2024-12-01 21:44:17 +01:00
do buffer cache uploading on the GPU
This commit is contained in:
parent
2403cf948a
commit
1cfb841b5f
@ -3,6 +3,7 @@
|
|||||||
#include "Cafe/HW/Latte/Renderer/Metal/MetalHybridComputePipeline.h"
|
#include "Cafe/HW/Latte/Renderer/Metal/MetalHybridComputePipeline.h"
|
||||||
#include "Common/precompiled.h"
|
#include "Common/precompiled.h"
|
||||||
#include "HW/Latte/Renderer/Metal/MetalRenderer.h"
|
#include "HW/Latte/Renderer/Metal/MetalRenderer.h"
|
||||||
|
#include "Metal/MTLResource.hpp"
|
||||||
|
|
||||||
MetalVertexBufferCache::~MetalVertexBufferCache()
|
MetalVertexBufferCache::~MetalVertexBufferCache()
|
||||||
{
|
{
|
||||||
@ -115,13 +116,9 @@ void* MetalMemoryManager::GetTextureUploadBuffer(size_t size)
|
|||||||
|
|
||||||
void MetalMemoryManager::InitBufferCache(size_t size)
|
void MetalMemoryManager::InitBufferCache(size_t size)
|
||||||
{
|
{
|
||||||
if (m_bufferCache)
|
cemu_assert_debug(!m_bufferCache);
|
||||||
{
|
|
||||||
debug_printf("MetalMemoryManager::InitBufferCache: buffer cache already initialized\n");
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
m_bufferCache = m_mtlr->GetDevice()->newBuffer(size, m_mtlr->GetOptimalBufferStorageMode() | MTL::ResourceCPUCacheModeWriteCombined);
|
m_bufferCache = m_mtlr->GetDevice()->newBuffer(size, MTL::ResourceStorageModePrivate);
|
||||||
#ifdef CEMU_DEBUG_ASSERT
|
#ifdef CEMU_DEBUG_ASSERT
|
||||||
m_bufferCache->setLabel(GetLabel("Buffer cache", m_bufferCache));
|
m_bufferCache->setLabel(GetLabel("Buffer cache", m_bufferCache));
|
||||||
#endif
|
#endif
|
||||||
@ -129,20 +126,23 @@ void MetalMemoryManager::InitBufferCache(size_t size)
|
|||||||
|
|
||||||
void MetalMemoryManager::UploadToBufferCache(const void* data, size_t offset, size_t size)
|
void MetalMemoryManager::UploadToBufferCache(const void* data, size_t offset, size_t size)
|
||||||
{
|
{
|
||||||
if (!m_bufferCache)
|
cemu_assert_debug(m_bufferCache);
|
||||||
{
|
cemu_assert_debug((offset + size) <= m_bufferCache->length());
|
||||||
debug_printf("MetalMemoryManager::UploadToBufferCache: buffer cache not initialized\n");
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
if ((offset + size) > m_bufferCache->length())
|
auto allocation = m_tempBufferAllocator.GetBufferAllocation(size);
|
||||||
{
|
auto buffer = m_tempBufferAllocator.GetBufferOutsideOfCommandBuffer(allocation.bufferIndex);
|
||||||
debug_printf("MetalMemoryManager::UploadToBufferCache: out of bounds access (offset: %zu, size: %zu, buffer size: %zu)\n", offset, size, m_bufferCache->length());
|
memcpy((uint8*)buffer->contents() + allocation.offset, data, size);
|
||||||
}
|
|
||||||
|
|
||||||
memcpy((uint8*)m_bufferCache->contents() + offset, data, size);
|
// Lock the buffer to make sure it's not deallocated before the copy is done
|
||||||
if (!m_mtlr->HasUnifiedMemory())
|
m_tempBufferAllocator.LockBuffer(allocation.bufferIndex);
|
||||||
m_bufferCache->didModifyRange(NS::Range(offset, size));
|
|
||||||
|
m_mtlr->CopyBufferToBuffer(buffer, allocation.offset, m_bufferCache, offset, size);
|
||||||
|
|
||||||
|
// Make sure the buffer has the right command buffer
|
||||||
|
m_tempBufferAllocator.GetBuffer(allocation.bufferIndex); // TODO: make a helper function for this
|
||||||
|
|
||||||
|
// We can now safely unlock the buffer
|
||||||
|
m_tempBufferAllocator.UnlockBuffer(allocation.bufferIndex);
|
||||||
|
|
||||||
// Notify vertex buffer cache about the change
|
// Notify vertex buffer cache about the change
|
||||||
m_vertexBufferCache.MemoryRangeChanged(offset, size);
|
m_vertexBufferCache.MemoryRangeChanged(offset, size);
|
||||||
@ -150,11 +150,7 @@ void MetalMemoryManager::UploadToBufferCache(const void* data, size_t offset, si
|
|||||||
|
|
||||||
void MetalMemoryManager::CopyBufferCache(size_t srcOffset, size_t dstOffset, size_t size)
|
void MetalMemoryManager::CopyBufferCache(size_t srcOffset, size_t dstOffset, size_t size)
|
||||||
{
|
{
|
||||||
if (!m_bufferCache)
|
cemu_assert_debug(m_bufferCache);
|
||||||
{
|
|
||||||
debug_printf("MetalMemoryManager::CopyBufferCache: buffer cache not initialized\n");
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
memcpy((uint8*)m_bufferCache->contents() + dstOffset, (uint8*)m_bufferCache->contents() + srcOffset, size);
|
m_mtlr->CopyBufferToBuffer(m_bufferCache, srcOffset, m_bufferCache, dstOffset, size);
|
||||||
}
|
}
|
||||||
|
@ -22,11 +22,6 @@
|
|||||||
#include "HW/Latte/Renderer/Metal/MetalCommon.h"
|
#include "HW/Latte/Renderer/Metal/MetalCommon.h"
|
||||||
#include "HW/Latte/Renderer/Metal/MetalLayerHandle.h"
|
#include "HW/Latte/Renderer/Metal/MetalLayerHandle.h"
|
||||||
#include "HW/Latte/Renderer/Renderer.h"
|
#include "HW/Latte/Renderer/Renderer.h"
|
||||||
#include "Metal/MTLCommandBuffer.hpp"
|
|
||||||
#include "Metal/MTLDevice.hpp"
|
|
||||||
#include "Metal/MTLRenderCommandEncoder.hpp"
|
|
||||||
#include "Metal/MTLRenderPass.hpp"
|
|
||||||
#include "Metal/MTLRenderPipeline.hpp"
|
|
||||||
#include "imgui.h"
|
#include "imgui.h"
|
||||||
#include <cstddef>
|
#include <cstddef>
|
||||||
|
|
||||||
@ -780,31 +775,7 @@ void MetalRenderer::bufferCache_copy(uint32 srcOffset, uint32 dstOffset, uint32
|
|||||||
|
|
||||||
void MetalRenderer::bufferCache_copyStreamoutToMainBuffer(uint32 srcOffset, uint32 dstOffset, uint32 size)
|
void MetalRenderer::bufferCache_copyStreamoutToMainBuffer(uint32 srcOffset, uint32 dstOffset, uint32 size)
|
||||||
{
|
{
|
||||||
// Do the copy in a vertex shader on Apple GPUs
|
CopyBufferToBuffer(m_xfbRingBuffer, srcOffset, m_memoryManager->GetBufferCache(), dstOffset, size);
|
||||||
if (m_isAppleGPU && m_encoderType == MetalEncoderType::Render)
|
|
||||||
{
|
|
||||||
auto renderCommandEncoder = static_cast<MTL::RenderCommandEncoder*>(m_commandEncoder);
|
|
||||||
|
|
||||||
MTL::Resource* barrierBuffers[] = {m_xfbRingBuffer};
|
|
||||||
renderCommandEncoder->memoryBarrier(barrierBuffers, 1, MTL::RenderStageVertex | MTL::RenderStageFragment | MTL::RenderStageObject | MTL::RenderStageMesh, MTL::RenderStageVertex);
|
|
||||||
|
|
||||||
renderCommandEncoder->setRenderPipelineState(m_copyBufferToBufferPipeline->GetRenderPipelineState());
|
|
||||||
m_state.m_encoderState.m_renderPipelineState = m_copyBufferToBufferPipeline->GetRenderPipelineState();
|
|
||||||
|
|
||||||
SetBuffer(renderCommandEncoder, METAL_SHADER_TYPE_VERTEX, m_xfbRingBuffer, srcOffset, GET_HELPER_BUFFER_BINDING(0));
|
|
||||||
SetBuffer(renderCommandEncoder, METAL_SHADER_TYPE_VERTEX, m_memoryManager->GetBufferCache(), dstOffset, GET_HELPER_BUFFER_BINDING(1));
|
|
||||||
|
|
||||||
renderCommandEncoder->drawPrimitives(MTL::PrimitiveTypePoint, NS::UInteger(0), NS::UInteger(size));
|
|
||||||
|
|
||||||
barrierBuffers[0] = m_memoryManager->GetBufferCache();
|
|
||||||
renderCommandEncoder->memoryBarrier(barrierBuffers, 1, MTL::RenderStageVertex, MTL::RenderStageVertex | MTL::RenderStageFragment | MTL::RenderStageObject | MTL::RenderStageMesh);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
auto blitCommandEncoder = GetBlitCommandEncoder();
|
|
||||||
|
|
||||||
blitCommandEncoder->copyFromBuffer(m_xfbRingBuffer, srcOffset, m_memoryManager->GetBufferCache(), dstOffset, size);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void MetalRenderer::buffer_bindVertexBuffer(uint32 bufferIndex, uint32 offset, uint32 size)
|
void MetalRenderer::buffer_bindVertexBuffer(uint32 bufferIndex, uint32 offset, uint32 size)
|
||||||
@ -945,9 +916,6 @@ void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32
|
|||||||
if (endRenderPass)
|
if (endRenderPass)
|
||||||
EndEncoding();
|
EndEncoding();
|
||||||
|
|
||||||
// Render pass
|
|
||||||
auto renderCommandEncoder = GetRenderCommandEncoder();
|
|
||||||
|
|
||||||
// Primitive type
|
// Primitive type
|
||||||
const LattePrimitiveMode primitiveMode = static_cast<LattePrimitiveMode>(LatteGPUState.contextRegister[mmVGT_PRIMITIVE_TYPE]);
|
const LattePrimitiveMode primitiveMode = static_cast<LattePrimitiveMode>(LatteGPUState.contextRegister[mmVGT_PRIMITIVE_TYPE]);
|
||||||
auto mtlPrimitiveType = GetMtlPrimitiveType(primitiveMode);
|
auto mtlPrimitiveType = GetMtlPrimitiveType(primitiveMode);
|
||||||
@ -955,6 +923,22 @@ void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32
|
|||||||
|
|
||||||
bool usesGeometryShader = (geometryShader != nullptr || isPrimitiveRect);
|
bool usesGeometryShader = (geometryShader != nullptr || isPrimitiveRect);
|
||||||
|
|
||||||
|
// Index buffer
|
||||||
|
Renderer::INDEX_TYPE hostIndexType;
|
||||||
|
uint32 hostIndexCount;
|
||||||
|
uint32 indexMin = 0;
|
||||||
|
uint32 indexMax = 0;
|
||||||
|
uint32 indexBufferOffset = 0;
|
||||||
|
uint32 indexBufferIndex = 0;
|
||||||
|
LatteIndices_decode(memory_getPointerFromVirtualOffset(indexDataMPTR), indexType, count, primitiveMode, indexMin, indexMax, hostIndexType, hostIndexCount, indexBufferOffset, indexBufferIndex);
|
||||||
|
|
||||||
|
// synchronize vertex and uniform cache and update buffer bindings
|
||||||
|
// We need to call this before getting the render command encoder, since it can cause buffer copies
|
||||||
|
LatteBufferCache_Sync(indexMin + baseVertex, indexMax + baseVertex, baseInstance, instanceCount);
|
||||||
|
|
||||||
|
// Render pass
|
||||||
|
auto renderCommandEncoder = GetRenderCommandEncoder();
|
||||||
|
|
||||||
// Depth stencil state
|
// Depth stencil state
|
||||||
|
|
||||||
// Disable depth write when there is no depth attachment
|
// Disable depth write when there is no depth attachment
|
||||||
@ -1120,18 +1104,6 @@ void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32
|
|||||||
|
|
||||||
// Resources
|
// Resources
|
||||||
|
|
||||||
// Index buffer
|
|
||||||
Renderer::INDEX_TYPE hostIndexType;
|
|
||||||
uint32 hostIndexCount;
|
|
||||||
uint32 indexMin = 0;
|
|
||||||
uint32 indexMax = 0;
|
|
||||||
uint32 indexBufferOffset = 0;
|
|
||||||
uint32 indexBufferIndex = 0;
|
|
||||||
LatteIndices_decode(memory_getPointerFromVirtualOffset(indexDataMPTR), indexType, count, primitiveMode, indexMin, indexMax, hostIndexType, hostIndexCount, indexBufferOffset, indexBufferIndex);
|
|
||||||
|
|
||||||
// synchronize vertex and uniform cache and update buffer bindings
|
|
||||||
LatteBufferCache_Sync(indexMin + baseVertex, indexMax + baseVertex, baseInstance, instanceCount);
|
|
||||||
|
|
||||||
// Vertex buffers
|
// Vertex buffers
|
||||||
for (uint8 i = 0; i < MAX_MTL_BUFFERS; i++)
|
for (uint8 i = 0; i < MAX_MTL_BUFFERS; i++)
|
||||||
{
|
{
|
||||||
@ -1851,6 +1823,37 @@ void MetalRenderer::ClearColorTextureInternal(MTL::Texture* mtlTexture, sint32 s
|
|||||||
EndEncoding();
|
EndEncoding();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void MetalRenderer::CopyBufferToBuffer(MTL::Buffer* src, uint32 srcOffset, MTL::Buffer* dst, uint32 dstOffset, uint32 size)
|
||||||
|
{
|
||||||
|
// Do the copy in a vertex shader on Apple GPUs
|
||||||
|
if (m_isAppleGPU && m_encoderType == MetalEncoderType::Render)
|
||||||
|
{
|
||||||
|
auto renderCommandEncoder = static_cast<MTL::RenderCommandEncoder*>(m_commandEncoder);
|
||||||
|
|
||||||
|
MTL::Resource* barrierBuffers[] = {src};
|
||||||
|
// TODO: let the caller choose the stages
|
||||||
|
renderCommandEncoder->memoryBarrier(barrierBuffers, 1, MTL::RenderStageVertex | MTL::RenderStageFragment | MTL::RenderStageObject | MTL::RenderStageMesh, MTL::RenderStageVertex);
|
||||||
|
|
||||||
|
renderCommandEncoder->setRenderPipelineState(m_copyBufferToBufferPipeline->GetRenderPipelineState());
|
||||||
|
m_state.m_encoderState.m_renderPipelineState = m_copyBufferToBufferPipeline->GetRenderPipelineState();
|
||||||
|
|
||||||
|
SetBuffer(renderCommandEncoder, METAL_SHADER_TYPE_VERTEX, src, srcOffset, GET_HELPER_BUFFER_BINDING(0));
|
||||||
|
SetBuffer(renderCommandEncoder, METAL_SHADER_TYPE_VERTEX, dst, dstOffset, GET_HELPER_BUFFER_BINDING(1));
|
||||||
|
|
||||||
|
renderCommandEncoder->drawPrimitives(MTL::PrimitiveTypePoint, NS::UInteger(0), NS::UInteger(size));
|
||||||
|
|
||||||
|
barrierBuffers[0] = dst;
|
||||||
|
// TODO: let the caller choose the stages
|
||||||
|
renderCommandEncoder->memoryBarrier(barrierBuffers, 1, MTL::RenderStageVertex, MTL::RenderStageVertex | MTL::RenderStageFragment | MTL::RenderStageObject | MTL::RenderStageMesh);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
auto blitCommandEncoder = GetBlitCommandEncoder();
|
||||||
|
|
||||||
|
blitCommandEncoder->copyFromBuffer(src, srcOffset, dst, dstOffset, size);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void MetalRenderer::SwapBuffer(bool mainWindow)
|
void MetalRenderer::SwapBuffer(bool mainWindow)
|
||||||
{
|
{
|
||||||
auto& layer = GetLayer(mainWindow);
|
auto& layer = GetLayer(mainWindow);
|
||||||
|
@ -376,6 +376,8 @@ public:
|
|||||||
|
|
||||||
void ClearColorTextureInternal(MTL::Texture* mtlTexture, sint32 sliceIndex, sint32 mipIndex, float r, float g, float b, float a);
|
void ClearColorTextureInternal(MTL::Texture* mtlTexture, sint32 sliceIndex, sint32 mipIndex, float r, float g, float b, float a);
|
||||||
|
|
||||||
|
void CopyBufferToBuffer(MTL::Buffer* src, uint32 srcOffset, MTL::Buffer* dst, uint32 dstOffset, uint32 size);
|
||||||
|
|
||||||
// Getters
|
// Getters
|
||||||
bool IsAppleGPU() const
|
bool IsAppleGPU() const
|
||||||
{
|
{
|
||||||
|
Loading…
Reference in New Issue
Block a user