From d7411e27f773e5b9ef1278c11e87d35b9109725f Mon Sep 17 00:00:00 2001 From: Samuliak Date: Sat, 3 Aug 2024 09:31:40 +0200 Subject: [PATCH] implement render pipeline cache --- src/Cafe/CMakeLists.txt | 2 + src/Cafe/HW/Latte/Core/LatteShader.cpp | 1 - .../LegacyShaderDecompiler/LatteDecompiler.h | 12 +- .../Renderer/Metal/LatteTextureViewMtl.cpp | 1 - .../Renderer/Metal/MetalPipelineCache.cpp | 207 ++++++++++++++ .../Latte/Renderer/Metal/MetalPipelineCache.h | 22 ++ .../HW/Latte/Renderer/Metal/MetalRenderer.cpp | 255 ++++++++++-------- .../HW/Latte/Renderer/Metal/MetalRenderer.h | 151 +---------- 8 files changed, 388 insertions(+), 263 deletions(-) create mode 100644 src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.cpp create mode 100644 src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.h diff --git a/src/Cafe/CMakeLists.txt b/src/Cafe/CMakeLists.txt index 82a1989f..fb802c82 100644 --- a/src/Cafe/CMakeLists.txt +++ b/src/Cafe/CMakeLists.txt @@ -553,6 +553,8 @@ if(ENABLE_METAL) HW/Latte/Renderer/Metal/CachedFBOMtl.h HW/Latte/Renderer/Metal/MetalMemoryManager.cpp HW/Latte/Renderer/Metal/MetalMemoryManager.h + HW/Latte/Renderer/Metal/MetalPipelineCache.cpp + HW/Latte/Renderer/Metal/MetalPipelineCache.h HW/Latte/Renderer/Metal/ShaderSourcePresent.h ) diff --git a/src/Cafe/HW/Latte/Core/LatteShader.cpp b/src/Cafe/HW/Latte/Core/LatteShader.cpp index 486516ef..66730a9b 100644 --- a/src/Cafe/HW/Latte/Core/LatteShader.cpp +++ b/src/Cafe/HW/Latte/Core/LatteShader.cpp @@ -615,7 +615,6 @@ LatteDecompilerShader* LatteShader_CreateShaderFromDecompilerOutput(LatteDecompi LatteDecompilerShader* shader = decompilerOutput.shader; shader->baseHash = baseHash; // copy resource mapping - // HACK if (g_renderer->GetType() != RendererAPI::OpenGL) shader->resourceMapping = decompilerOutput.resourceMappingVK; else diff --git a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompiler.h b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompiler.h index 1159614e..78af1dec 100644 --- a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompiler.h +++ b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompiler.h @@ -36,7 +36,7 @@ typedef struct uint16 mappedIndexOffset; // index in remapped uniform array }LatteFastAccessRemappedUniformEntry_buffer_t; -typedef struct +typedef struct { uint32 texUnit; sint32 uniformLocation; @@ -198,7 +198,7 @@ struct LatteDecompilerShader // resource mapping (binding points) LatteDecompilerShaderResourceMapping resourceMapping{}; // uniforms - struct + struct { sint32 loc_remapped; // uf_remappedVS/uf_remappedGS/uf_remappedPS sint32 loc_uniformRegister; // uf_uniformRegisterVS/uf_uniformRegisterGS/uf_uniformRegisterPS @@ -215,7 +215,7 @@ struct LatteDecompilerShader sint32 uniformRangeSize; // entire size of uniform variable block }uniform{ 0 }; // fast access - struct _RemappedUniformBufferGroup + struct _RemappedUniformBufferGroup { _RemappedUniformBufferGroup(uint32 _kcacheBankIdOffset) : kcacheBankIdOffset(_kcacheBankIdOffset) {}; @@ -255,14 +255,14 @@ struct LatteDecompilerOutputUniformOffsets } }; -struct LatteDecompilerOptions +struct LatteDecompilerOptions { bool usesGeometryShader{ false }; // floating point math bool strictMul{}; // if true, 0*anything=0 rule is emulated // Vulkan-specific bool useTFViaSSBO{ false }; - struct + struct { bool hasRoundingModeRTEFloat32{ false }; }spirvInstrinsics; @@ -322,4 +322,4 @@ struct LatteParsedGSCopyShader }; LatteParsedGSCopyShader* LatteGSCopyShaderParser_parse(uint8* programData, uint32 programSize); -bool LatteGSCopyShaderParser_getExportTypeByOffset(LatteParsedGSCopyShader* shaderContext, uint32 offset, uint32* exportType, uint32* exportParam); \ No newline at end of file +bool LatteGSCopyShaderParser_getExportTypeByOffset(LatteParsedGSCopyShader* shaderContext, uint32 offset, uint32* exportType, uint32* exportParam); diff --git a/src/Cafe/HW/Latte/Renderer/Metal/LatteTextureViewMtl.cpp b/src/Cafe/HW/Latte/Renderer/Metal/LatteTextureViewMtl.cpp index 2e7a3309..d48b17cc 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/LatteTextureViewMtl.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/LatteTextureViewMtl.cpp @@ -2,7 +2,6 @@ #include "Cafe/HW/Latte/Renderer/Metal/LatteTextureMtl.h" #include "Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h" #include "Cafe/HW/Latte/Renderer/Metal/LatteToMtl.h" -#include "Metal/MTLTexture.hpp" LatteTextureViewMtl::LatteTextureViewMtl(MetalRenderer* mtlRenderer, LatteTextureMtl* texture, Latte::E_DIM dim, Latte::E_GX2SURFFMT format, sint32 firstMip, sint32 mipCount, sint32 firstSlice, sint32 sliceCount) : LatteTextureView(texture, firstMip, mipCount, firstSlice, sliceCount, dim, format), m_mtlr(mtlRenderer), m_format(format) diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.cpp b/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.cpp new file mode 100644 index 00000000..59dcdaee --- /dev/null +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.cpp @@ -0,0 +1,207 @@ +#include "Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.h" +#include "Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h" +#include "HW/Latte/Core/FetchShader.h" +#include "HW/Latte/ISA/RegDefines.h" +#include "HW/Latte/Renderer/Metal/CachedFBOMtl.h" +#include "HW/Latte/Renderer/Metal/LatteToMtl.h" +#include "HW/Latte/Renderer/Metal/RendererShaderMtl.h" +#include "HW/Latte/Renderer/Metal/LatteTextureViewMtl.h" + +MetalPipelineCache::~MetalPipelineCache() +{ + for (auto& pair : m_pipelineCache) + { + pair.second->release(); + } + m_pipelineCache.clear(); +} + +MTL::RenderPipelineState* MetalPipelineCache::GetPipelineState(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const LatteDecompilerShader* pixelShader, CachedFBOMtl* activeFBO, const LatteContextRegister& lcr) +{ + uint64 stateHash = CalculatePipelineHash(fetchShader, vertexShader, pixelShader, activeFBO, lcr); + auto& pipeline = m_pipelineCache[stateHash]; + if (pipeline) + { + return pipeline; + } + + // Vertex descriptor + MTL::VertexDescriptor* vertexDescriptor = MTL::VertexDescriptor::alloc()->init(); + for (auto& bufferGroup : fetchShader->bufferGroups) + { + std::optional fetchType; + + for (sint32 j = 0; j < bufferGroup.attribCount; ++j) + { + auto& attr = bufferGroup.attrib[j]; + + uint32 semanticId = vertexShader->resourceMapping.attributeMapping[attr.semanticId]; + if (semanticId == (uint32)-1) + continue; // attribute not used? + + auto attribute = vertexDescriptor->attributes()->object(semanticId); + attribute->setOffset(attr.offset); + // Bind from the end to not conflict with uniform buffers + attribute->setBufferIndex(GET_MTL_VERTEX_BUFFER_INDEX(attr.attributeBufferIndex)); + attribute->setFormat(GetMtlVertexFormat(attr.format)); + + if (fetchType.has_value()) + cemu_assert_debug(fetchType == attr.fetchType); + else + fetchType = attr.fetchType; + + if (attr.fetchType == LatteConst::INSTANCE_DATA) + { + cemu_assert_debug(attr.aluDivisor == 1); // other divisor not yet supported + } + } + + uint32 bufferIndex = bufferGroup.attributeBufferIndex; + uint32 bufferBaseRegisterIndex = mmSQ_VTX_ATTRIBUTE_BLOCK_START + bufferIndex * 7; + // TODO: is LatteGPUState.contextNew correct? + uint32 bufferStride = (LatteGPUState.contextNew.GetRawView()[bufferBaseRegisterIndex + 2] >> 11) & 0xFFFF; + + auto layout = vertexDescriptor->layouts()->object(GET_MTL_VERTEX_BUFFER_INDEX(bufferIndex)); + layout->setStride(bufferStride); + if (!fetchType.has_value() || fetchType == LatteConst::VertexFetchType2::VERTEX_DATA) + layout->setStepFunction(MTL::VertexStepFunctionPerVertex); + else if (fetchType == LatteConst::VertexFetchType2::INSTANCE_DATA) + layout->setStepFunction(MTL::VertexStepFunctionPerInstance); + else + { + debug_printf("unimplemented vertex fetch type %u\n", (uint32)fetchType.value()); + cemu_assert(false); + } + } + + // Render pipeline state + MTL::RenderPipelineDescriptor* renderPipelineDescriptor = MTL::RenderPipelineDescriptor::alloc()->init(); + renderPipelineDescriptor->setVertexFunction(static_cast(vertexShader->shader)->GetFunction()); + renderPipelineDescriptor->setFragmentFunction(static_cast(pixelShader->shader)->GetFunction()); + // TODO: don't always set the vertex descriptor + renderPipelineDescriptor->setVertexDescriptor(vertexDescriptor); + for (uint8 i = 0; i < 8; i++) + { + const auto& colorBuffer = activeFBO->colorBuffer[i]; + auto texture = static_cast(colorBuffer.texture); + if (!texture) + { + continue; + } + auto colorAttachment = renderPipelineDescriptor->colorAttachments()->object(i); + colorAttachment->setPixelFormat(texture->GetTexture()->pixelFormat()); + + // Blending + const Latte::LATTE_CB_COLOR_CONTROL& colorControlReg = LatteGPUState.contextNew.CB_COLOR_CONTROL; + uint32 blendEnableMask = colorControlReg.get_BLEND_MASK(); + uint32 renderTargetMask = LatteGPUState.contextNew.CB_TARGET_MASK.get_MASK(); + + bool blendEnabled = ((blendEnableMask & (1 << i))) != 0; + if (blendEnabled) + { + colorAttachment->setBlendingEnabled(true); + + const auto& blendControlReg = LatteGPUState.contextNew.CB_BLENDN_CONTROL[i]; + + auto rgbBlendOp = GetMtlBlendOp(blendControlReg.get_COLOR_COMB_FCN()); + auto srcRgbBlendFactor = GetMtlBlendFactor(blendControlReg.get_COLOR_SRCBLEND()); + auto dstRgbBlendFactor = GetMtlBlendFactor(blendControlReg.get_COLOR_DSTBLEND()); + + colorAttachment->setWriteMask((renderTargetMask >> (i * 4)) & 0xF); + colorAttachment->setRgbBlendOperation(rgbBlendOp); + colorAttachment->setSourceRGBBlendFactor(srcRgbBlendFactor); + colorAttachment->setDestinationRGBBlendFactor(dstRgbBlendFactor); + if (blendControlReg.get_SEPARATE_ALPHA_BLEND()) + { + colorAttachment->setAlphaBlendOperation(GetMtlBlendOp(blendControlReg.get_ALPHA_COMB_FCN())); + colorAttachment->setSourceAlphaBlendFactor(GetMtlBlendFactor(blendControlReg.get_ALPHA_SRCBLEND())); + colorAttachment->setDestinationAlphaBlendFactor(GetMtlBlendFactor(blendControlReg.get_ALPHA_DSTBLEND())); + } + else + { + colorAttachment->setAlphaBlendOperation(rgbBlendOp); + colorAttachment->setSourceAlphaBlendFactor(srcRgbBlendFactor); + colorAttachment->setDestinationAlphaBlendFactor(dstRgbBlendFactor); + } + } + } + if (activeFBO->depthBuffer.texture) + { + auto texture = static_cast(activeFBO->depthBuffer.texture); + renderPipelineDescriptor->setDepthAttachmentPixelFormat(texture->GetTexture()->pixelFormat()); + } + + NS::Error* error = nullptr; + pipeline = m_mtlr->GetDevice()->newRenderPipelineState(renderPipelineDescriptor, &error); + if (error) + { + debug_printf("error creating render pipeline state: %s\n", error->localizedDescription()->utf8String()); + return nullptr; + } + + return pipeline; +} + +uint64 MetalPipelineCache::CalculatePipelineHash(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const LatteDecompilerShader* pixelShader, class CachedFBOMtl* activeFBO, const LatteContextRegister& lcr) +{ + // Hash + uint64 stateHash = 0; + for (auto& group : fetchShader->bufferGroups) + { + uint32 bufferStride = group.getCurrentBufferStride(lcr.GetRawView()); + stateHash = std::rotl(stateHash, 7); + stateHash += bufferStride * 3; + } + + stateHash += fetchShader->getVkPipelineHashFragment(); + stateHash = std::rotl(stateHash, 7); + + stateHash += lcr.GetRawView()[mmVGT_PRIMITIVE_TYPE]; + stateHash = std::rotl(stateHash, 7); + + stateHash += lcr.GetRawView()[mmVGT_STRMOUT_EN]; + stateHash = std::rotl(stateHash, 7); + + if(lcr.PA_CL_CLIP_CNTL.get_DX_RASTERIZATION_KILL()) + stateHash += 0x333333; + + stateHash = (stateHash >> 8) + (stateHash * 0x370531ull) % 0x7F980D3BF9B4639Dull; + + uint32* ctxRegister = lcr.GetRawView(); + + if (vertexShader) + stateHash += vertexShader->baseHash; + + stateHash = std::rotl(stateHash, 13); + + if (pixelShader) + stateHash += pixelShader->baseHash + pixelShader->auxHash; + + stateHash = std::rotl(stateHash, 13); + + uint32 polygonCtrl = lcr.PA_SU_SC_MODE_CNTL.getRawValue(); + stateHash += polygonCtrl; + stateHash = std::rotl(stateHash, 7); + + stateHash += ctxRegister[Latte::REGADDR::PA_CL_CLIP_CNTL]; + stateHash = std::rotl(stateHash, 7); + + const auto colorControlReg = ctxRegister[Latte::REGADDR::CB_COLOR_CONTROL]; + stateHash += colorControlReg; + + stateHash += ctxRegister[Latte::REGADDR::CB_TARGET_MASK]; + + const uint32 blendEnableMask = (colorControlReg >> 8) & 0xFF; + if (blendEnableMask) + { + for (auto i = 0; i < 8; ++i) + { + if (((blendEnableMask & (1 << i))) == 0) + continue; + stateHash = std::rotl(stateHash, 7); + stateHash += ctxRegister[Latte::REGADDR::CB_BLEND0_CONTROL + i]; + } + } + + return stateHash; +} diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.h b/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.h new file mode 100644 index 00000000..11f81f88 --- /dev/null +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.h @@ -0,0 +1,22 @@ +#pragma once + +#include + +#include "HW/Latte/ISA/LatteReg.h" +#include "HW/Latte/LegacyShaderDecompiler/LatteDecompiler.h" + +class MetalPipelineCache +{ +public: + MetalPipelineCache(class MetalRenderer* metalRenderer) : m_mtlr{metalRenderer} {} + ~MetalPipelineCache(); + + MTL::RenderPipelineState* GetPipelineState(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const LatteDecompilerShader* pixelShader, class CachedFBOMtl* activeFBO, const LatteContextRegister& lcr); + +private: + class MetalRenderer* m_mtlr; + + std::map m_pipelineCache; + + uint64 CalculatePipelineHash(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const LatteDecompilerShader* pixelShader, class CachedFBOMtl* activeFBO, const LatteContextRegister& lcr); +}; diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp index f91a36a4..5262360d 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp @@ -4,6 +4,8 @@ #include "Cafe/HW/Latte/Renderer/Metal/LatteTextureViewMtl.h" #include "Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.h" #include "Cafe/HW/Latte/Renderer/Metal/CachedFBOMtl.h" +#include "Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.h" +#include "Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.h" #include "Cafe/HW/Latte/Renderer/Metal/LatteToMtl.h" #include "Cafe/HW/Latte/Renderer/Metal/ShaderSourcePresent.h" @@ -13,9 +15,11 @@ #include "Cafe/HW/Latte/Core/LatteIndices.h" #include "Cemu/Logging/CemuDebugLogging.h" #include "Foundation/NSTypes.hpp" +#include "HW/Latte/Core/Latte.h" #include "Metal/MTLDepthStencil.hpp" #include "Metal/MTLRenderCommandEncoder.hpp" #include "Metal/MTLRenderPass.hpp" +#include "Metal/MTLRenderPipeline.hpp" #include "gui/guiWrapper.h" extern bool hasValidFramebufferAttached; @@ -31,6 +35,7 @@ MetalRenderer::MetalRenderer() m_nearestSampler = m_device->newSamplerState(samplerDescriptor); m_memoryManager = new MetalMemoryManager(this); + m_pipelineCache = new MetalPipelineCache(this); // Initialize state for (uint32 i = 0; i < (uint32)LatteConst::ShaderType::TotalCount; i++) @@ -612,119 +617,8 @@ void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32 auto fetchShader = vertexShader->compatibleFetchShader; - // Vertex descriptor - MTL::VertexDescriptor* vertexDescriptor = MTL::VertexDescriptor::alloc()->init(); - for (auto& bufferGroup : fetchShader->bufferGroups) - { - std::optional fetchType; - - for (sint32 j = 0; j < bufferGroup.attribCount; ++j) - { - auto& attr = bufferGroup.attrib[j]; - - uint32 semanticId = vertexShader->resourceMapping.attributeMapping[attr.semanticId]; - if (semanticId == (uint32)-1) - continue; // attribute not used? - - auto attribute = vertexDescriptor->attributes()->object(semanticId); - attribute->setOffset(attr.offset); - // Bind from the end to not conflict with uniform buffers - attribute->setBufferIndex(GET_MTL_VERTEX_BUFFER_INDEX(attr.attributeBufferIndex)); - attribute->setFormat(GetMtlVertexFormat(attr.format)); - - if (fetchType.has_value()) - cemu_assert_debug(fetchType == attr.fetchType); - else - fetchType = attr.fetchType; - - if (attr.fetchType == LatteConst::INSTANCE_DATA) - { - cemu_assert_debug(attr.aluDivisor == 1); // other divisor not yet supported - } - } - - uint32 bufferIndex = bufferGroup.attributeBufferIndex; - uint32 bufferBaseRegisterIndex = mmSQ_VTX_ATTRIBUTE_BLOCK_START + bufferIndex * 7; - // TODO: is LatteGPUState.contextNew correct? - uint32 bufferStride = (LatteGPUState.contextNew.GetRawView()[bufferBaseRegisterIndex + 2] >> 11) & 0xFFFF; - - auto layout = vertexDescriptor->layouts()->object(GET_MTL_VERTEX_BUFFER_INDEX(bufferIndex)); - layout->setStride(bufferStride); - if (!fetchType.has_value() || fetchType == LatteConst::VertexFetchType2::VERTEX_DATA) - layout->setStepFunction(MTL::VertexStepFunctionPerVertex); - else if (fetchType == LatteConst::VertexFetchType2::INSTANCE_DATA) - layout->setStepFunction(MTL::VertexStepFunctionPerInstance); - else - { - debug_printf("unimplemented vertex fetch type %u\n", (uint32)fetchType.value()); - cemu_assert(false); - } - } - // Render pipeline state - MTL::RenderPipelineDescriptor* renderPipelineDescriptor = MTL::RenderPipelineDescriptor::alloc()->init(); - renderPipelineDescriptor->setVertexFunction(static_cast(vertexShader->shader)->GetFunction()); - renderPipelineDescriptor->setFragmentFunction(static_cast(pixelShader->shader)->GetFunction()); - // TODO: don't always set the vertex descriptor - renderPipelineDescriptor->setVertexDescriptor(vertexDescriptor); - for (uint8 i = 0; i < 8; i++) - { - const auto& colorBuffer = m_state.activeFBO->colorBuffer[i]; - auto texture = static_cast(colorBuffer.texture); - if (!texture) - { - continue; - } - auto colorAttachment = renderPipelineDescriptor->colorAttachments()->object(i); - colorAttachment->setPixelFormat(texture->GetTexture()->pixelFormat()); - - // Blending - const Latte::LATTE_CB_COLOR_CONTROL& colorControlReg = LatteGPUState.contextNew.CB_COLOR_CONTROL; - uint32 blendEnableMask = colorControlReg.get_BLEND_MASK(); - uint32 renderTargetMask = LatteGPUState.contextNew.CB_TARGET_MASK.get_MASK(); - - bool blendEnabled = ((blendEnableMask & (1 << i))) != 0; - if (blendEnabled) - { - colorAttachment->setBlendingEnabled(true); - - const auto& blendControlReg = LatteGPUState.contextNew.CB_BLENDN_CONTROL[i]; - - auto rgbBlendOp = GetMtlBlendOp(blendControlReg.get_COLOR_COMB_FCN()); - auto srcRgbBlendFactor = GetMtlBlendFactor(blendControlReg.get_COLOR_SRCBLEND()); - auto dstRgbBlendFactor = GetMtlBlendFactor(blendControlReg.get_COLOR_DSTBLEND()); - - colorAttachment->setWriteMask((renderTargetMask >> (i * 4)) & 0xF); - colorAttachment->setRgbBlendOperation(rgbBlendOp); - colorAttachment->setSourceRGBBlendFactor(srcRgbBlendFactor); - colorAttachment->setDestinationRGBBlendFactor(dstRgbBlendFactor); - if (blendControlReg.get_SEPARATE_ALPHA_BLEND()) - { - colorAttachment->setAlphaBlendOperation(GetMtlBlendOp(blendControlReg.get_ALPHA_COMB_FCN())); - colorAttachment->setSourceAlphaBlendFactor(GetMtlBlendFactor(blendControlReg.get_ALPHA_SRCBLEND())); - colorAttachment->setDestinationAlphaBlendFactor(GetMtlBlendFactor(blendControlReg.get_ALPHA_DSTBLEND())); - } - else - { - colorAttachment->setAlphaBlendOperation(rgbBlendOp); - colorAttachment->setSourceAlphaBlendFactor(srcRgbBlendFactor); - colorAttachment->setDestinationAlphaBlendFactor(dstRgbBlendFactor); - } - } - } - if (m_state.activeFBO->depthBuffer.texture) - { - auto texture = static_cast(m_state.activeFBO->depthBuffer.texture); - renderPipelineDescriptor->setDepthAttachmentPixelFormat(texture->GetTexture()->pixelFormat()); - } - - NS::Error* error = nullptr; - MTL::RenderPipelineState* renderPipelineState = m_device->newRenderPipelineState(renderPipelineDescriptor, &error); - if (error) - { - debug_printf("error creating render pipeline state: %s\n", error->localizedDescription()->utf8String()); - return; - } + MTL::RenderPipelineState* renderPipelineState = m_pipelineCache->GetPipelineState(fetchShader, vertexShader, pixelShader, m_state.activeFBO, LatteGPUState.contextNew); renderCommandEncoder->setRenderPipelineState(renderPipelineState); // Depth stencil state @@ -872,6 +766,143 @@ void MetalRenderer::indexData_uploadIndexMemory(uint32 offset, uint32 size) debug_printf("MetalRenderer::indexData_uploadIndexMemory not implemented\n"); } +void MetalRenderer::EnsureCommandBuffer() +{ + if (!m_commandBuffer) + { + // Debug + m_commandQueue->insertDebugCaptureBoundary(); + + m_commandBuffer = m_commandQueue->commandBuffer(); + } +} + +// Some render passes clear the attachments, forceRecreate is supposed to be used in those cases +MTL::RenderCommandEncoder* MetalRenderer::GetRenderCommandEncoder(MTL::RenderPassDescriptor* renderPassDescriptor, MTL::Texture* colorRenderTargets[8], MTL::Texture* depthRenderTarget, bool forceRecreate, bool rebindStateIfNewEncoder) +{ + EnsureCommandBuffer(); + + // Check if we need to begin a new render pass + if (m_commandEncoder) + { + if (!forceRecreate) + { + if (m_encoderType == MetalEncoderType::Render) + { + bool needsNewRenderPass = false; + for (uint8 i = 0; i < 8; i++) + { + if (colorRenderTargets[i] && (colorRenderTargets[i] != m_state.colorRenderTargets[i])) + { + needsNewRenderPass = true; + break; + } + } + + if (!needsNewRenderPass) + { + if (depthRenderTarget && (depthRenderTarget != m_state.depthRenderTarget)) + { + needsNewRenderPass = true; + } + } + + if (!needsNewRenderPass) + { + return (MTL::RenderCommandEncoder*)m_commandEncoder; + } + } + } + + EndEncoding(); + } + + // Update state + for (uint8 i = 0; i < 8; i++) + { + m_state.colorRenderTargets[i] = colorRenderTargets[i]; + } + m_state.depthRenderTarget = depthRenderTarget; + + auto renderCommandEncoder = m_commandBuffer->renderCommandEncoder(renderPassDescriptor); + m_commandEncoder = renderCommandEncoder; + m_encoderType = MetalEncoderType::Render; + + if (rebindStateIfNewEncoder) + { + // Rebind all the render state + RebindRenderState(renderCommandEncoder); + } + + return renderCommandEncoder; +} + +MTL::ComputeCommandEncoder* MetalRenderer::GetComputeCommandEncoder() +{ + if (m_commandEncoder) + { + if (m_encoderType != MetalEncoderType::Compute) + { + return (MTL::ComputeCommandEncoder*)m_commandEncoder; + } + + EndEncoding(); + } + + auto computeCommandEncoder = m_commandBuffer->computeCommandEncoder(); + m_commandEncoder = computeCommandEncoder; + m_encoderType = MetalEncoderType::Compute; + + return computeCommandEncoder; +} + +MTL::BlitCommandEncoder* MetalRenderer::GetBlitCommandEncoder() +{ + if (m_commandEncoder) + { + if (m_encoderType != MetalEncoderType::Blit) + { + return (MTL::BlitCommandEncoder*)m_commandEncoder; + } + + EndEncoding(); + } + + auto blitCommandEncoder = m_commandBuffer->blitCommandEncoder(); + m_commandEncoder = blitCommandEncoder; + m_encoderType = MetalEncoderType::Blit; + + return blitCommandEncoder; +} + +void MetalRenderer::EndEncoding() +{ + if (m_commandEncoder) + { + m_commandEncoder->endEncoding(); + m_commandEncoder->release(); + m_commandEncoder = nullptr; + } +} + +void MetalRenderer::CommitCommandBuffer() +{ + EndEncoding(); + + if (m_commandBuffer) + { + m_commandBuffer->commit(); + m_commandBuffer->release(); + m_commandBuffer = nullptr; + + // Reset temporary buffers + m_memoryManager->ResetTemporaryBuffers(); + + // Debug + m_commandQueue->insertDebugCaptureBoundary(); + } +} + void MetalRenderer::BindStageResources(MTL::RenderCommandEncoder* renderCommandEncoder, LatteDecompilerShader* shader) { sint32 textureCount = shader->resourceMapping.getTextureCount(); diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h index a79a515f..77aee9ee 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h @@ -5,11 +5,6 @@ #include #include "Cafe/HW/Latte/Renderer/Renderer.h" -#include "Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.h" -#include "Metal/MTLComputeCommandEncoder.hpp" -#include "Metal/MTLRenderCommandEncoder.hpp" -#include "Metal/MTLRenderPass.hpp" -#include "Metal/MTLRenderPipeline.hpp" #define MAX_MTL_BUFFERS 31 #define GET_MTL_VERTEX_BUFFER_INDEX(index) (MAX_MTL_BUFFERS - index - 2) @@ -199,7 +194,8 @@ public: private: CA::MetalLayer* m_metalLayer; - MetalMemoryManager* m_memoryManager; + class MetalMemoryManager* m_memoryManager; + class MetalPipelineCache* m_pipelineCache; // Metal objects MTL::Device* m_device; @@ -222,144 +218,13 @@ private: MetalState m_state; // Helpers - void EnsureCommandBuffer() - { - if (!m_commandBuffer) - { - // Debug - m_commandQueue->insertDebugCaptureBoundary(); - - m_commandBuffer = m_commandQueue->commandBuffer(); - } - } - - // Some render passes clear the attachments, forceRecreate is supposed to be used in those cases - MTL::RenderCommandEncoder* GetRenderCommandEncoder(MTL::RenderPassDescriptor* renderPassDescriptor, MTL::Texture* colorRenderTargets[8], MTL::Texture* depthRenderTarget, bool forceRecreate = false, bool rebindStateIfNewEncoder = true) - { - EnsureCommandBuffer(); - - // Check if we need to begin a new render pass - if (m_commandEncoder) - { - if (!forceRecreate) - { - if (m_encoderType == MetalEncoderType::Render) - { - bool needsNewRenderPass = false; - for (uint8 i = 0; i < 8; i++) - { - if (colorRenderTargets[i] && (colorRenderTargets[i] != m_state.colorRenderTargets[i])) - { - needsNewRenderPass = true; - break; - } - } - - if (!needsNewRenderPass) - { - if (depthRenderTarget && (depthRenderTarget != m_state.depthRenderTarget)) - { - needsNewRenderPass = true; - } - } - - if (!needsNewRenderPass) - { - return (MTL::RenderCommandEncoder*)m_commandEncoder; - } - } - } - - EndEncoding(); - } - - // Update state - for (uint8 i = 0; i < 8; i++) - { - m_state.colorRenderTargets[i] = colorRenderTargets[i]; - } - m_state.depthRenderTarget = depthRenderTarget; - - auto renderCommandEncoder = m_commandBuffer->renderCommandEncoder(renderPassDescriptor); - m_commandEncoder = renderCommandEncoder; - m_encoderType = MetalEncoderType::Render; - - if (rebindStateIfNewEncoder) - { - // Rebind all the render state - RebindRenderState(renderCommandEncoder); - } - - return renderCommandEncoder; - } - - MTL::ComputeCommandEncoder* GetComputeCommandEncoder() - { - if (m_commandEncoder) - { - if (m_encoderType != MetalEncoderType::Compute) - { - return (MTL::ComputeCommandEncoder*)m_commandEncoder; - } - - EndEncoding(); - } - - auto computeCommandEncoder = m_commandBuffer->computeCommandEncoder(); - m_commandEncoder = computeCommandEncoder; - m_encoderType = MetalEncoderType::Compute; - - return computeCommandEncoder; - } - - MTL::BlitCommandEncoder* GetBlitCommandEncoder() - { - if (m_commandEncoder) - { - if (m_encoderType != MetalEncoderType::Blit) - { - return (MTL::BlitCommandEncoder*)m_commandEncoder; - } - - EndEncoding(); - } - - auto blitCommandEncoder = m_commandBuffer->blitCommandEncoder(); - m_commandEncoder = blitCommandEncoder; - m_encoderType = MetalEncoderType::Blit; - - return blitCommandEncoder; - } - - void EndEncoding() - { - if (m_commandEncoder) - { - m_commandEncoder->endEncoding(); - m_commandEncoder->release(); - m_commandEncoder = nullptr; - } - } - - void CommitCommandBuffer() - { - EndEncoding(); - - if (m_commandBuffer) - { - m_commandBuffer->commit(); - m_commandBuffer->release(); - m_commandBuffer = nullptr; - - // Reset temporary buffers - m_memoryManager->ResetTemporaryBuffers(); - - // Debug - m_commandQueue->insertDebugCaptureBoundary(); - } - } + void EnsureCommandBuffer(); + MTL::RenderCommandEncoder* GetRenderCommandEncoder(MTL::RenderPassDescriptor* renderPassDescriptor, MTL::Texture* colorRenderTargets[8], MTL::Texture* depthRenderTarget, bool forceRecreate = false, bool rebindStateIfNewEncoder = true); + MTL::ComputeCommandEncoder* GetComputeCommandEncoder(); + MTL::BlitCommandEncoder* GetBlitCommandEncoder(); + void EndEncoding(); + void CommitCommandBuffer(); void BindStageResources(MTL::RenderCommandEncoder* renderCommandEncoder, LatteDecompilerShader* shader); - void RebindRenderState(MTL::RenderCommandEncoder* renderCommandEncoder); };