From 406a85672df6edc38a4f0ad686a576c446e2f846 Mon Sep 17 00:00:00 2001 From: Samuliak Date: Sat, 3 Aug 2024 10:01:57 +0200 Subject: [PATCH] implement depth stencil cache & fix: textureSize --- src/Cafe/CMakeLists.txt | 2 + src/Cafe/HW/Latte/Core/LatteShader.cpp | 1 + .../LatteDecompilerEmitMSL.cpp | 12 +- .../Renderer/Metal/MetalDepthStencilCache.cpp | 138 ++++++++++++++++++ .../Renderer/Metal/MetalDepthStencilCache.h | 21 +++ .../Renderer/Metal/MetalPipelineCache.cpp | 15 +- .../HW/Latte/Renderer/Metal/MetalRenderer.cpp | 89 +---------- .../HW/Latte/Renderer/Metal/MetalRenderer.h | 1 + .../Renderer/Metal/RendererShaderMtl.cpp | 2 +- 9 files changed, 183 insertions(+), 98 deletions(-) create mode 100644 src/Cafe/HW/Latte/Renderer/Metal/MetalDepthStencilCache.cpp create mode 100644 src/Cafe/HW/Latte/Renderer/Metal/MetalDepthStencilCache.h diff --git a/src/Cafe/CMakeLists.txt b/src/Cafe/CMakeLists.txt index fb802c82..7d10788a 100644 --- a/src/Cafe/CMakeLists.txt +++ b/src/Cafe/CMakeLists.txt @@ -555,6 +555,8 @@ if(ENABLE_METAL) HW/Latte/Renderer/Metal/MetalMemoryManager.h HW/Latte/Renderer/Metal/MetalPipelineCache.cpp HW/Latte/Renderer/Metal/MetalPipelineCache.h + HW/Latte/Renderer/Metal/MetalDepthStencilCache.cpp + HW/Latte/Renderer/Metal/MetalDepthStencilCache.h HW/Latte/Renderer/Metal/ShaderSourcePresent.h ) diff --git a/src/Cafe/HW/Latte/Core/LatteShader.cpp b/src/Cafe/HW/Latte/Core/LatteShader.cpp index 66730a9b..486516ef 100644 --- a/src/Cafe/HW/Latte/Core/LatteShader.cpp +++ b/src/Cafe/HW/Latte/Core/LatteShader.cpp @@ -615,6 +615,7 @@ LatteDecompilerShader* LatteShader_CreateShaderFromDecompilerOutput(LatteDecompi LatteDecompilerShader* shader = decompilerOutput.shader; shader->baseHash = baseHash; // copy resource mapping + // HACK if (g_renderer->GetType() != RendererAPI::OpenGL) shader->resourceMapping = decompilerOutput.resourceMappingVK; else diff --git a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSL.cpp b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSL.cpp index 0e55ffa1..3617c7c0 100644 --- a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSL.cpp +++ b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSL.cpp @@ -2422,7 +2422,7 @@ static void _emitTEXSampleTextureCode(LatteDecompilerShaderContext* shaderContex src->add(")"); // avoid truncate to effectively round downwards on texel edges if (ActiveSettings::ForceSamplerRoundToPrecision()) - src->addFmt("+ float2(1.0)/float2(textureSize(tex{}, 0))/512.0", texInstruction->textureFetch.textureIndex); + src->addFmt("+ float2(1.0)/float2(tex{}.get_width(), tex{}.get_height())/512.0", texInstruction->textureFetch.textureIndex, texInstruction->textureFetch.textureIndex); } // lod or lod bias parameter if( texOpcode == GPU7_TEX_INST_SAMPLE_L || texOpcode == GPU7_TEX_INST_SAMPLE_LB || texOpcode == GPU7_TEX_INST_SAMPLE_C_L) @@ -2599,17 +2599,17 @@ static void _emitTEXGetTextureResInfoCode(LatteDecompilerShaderContext* shaderCo auto texDim = shaderContext->shader->textureUnitDim[texInstruction->textureFetch.textureIndex]; if (texDim == Latte::E_DIM::DIM_1D) - src->addFmt(" = int4(textureSize(tex{}, 0),1,1,1).", texInstruction->textureFetch.textureIndex); + src->addFmt(" = int4(tex{}.get_width(), 1, 1, 1).", texInstruction->textureFetch.textureIndex); else if (texDim == Latte::E_DIM::DIM_1D_ARRAY) - src->addFmt(" = int4(textureSize(tex{}, 0),1,1).", texInstruction->textureFetch.textureIndex); + src->addFmt(" = int4(tex{}.get_width(), tex{}.get_array_size(), 1, 1).", texInstruction->textureFetch.textureIndex, texInstruction->textureFetch.textureIndex); else if (texDim == Latte::E_DIM::DIM_2D || texDim == Latte::E_DIM::DIM_2D_MSAA) - src->addFmt(" = int4(textureSize(tex{}, 0),1,1).", texInstruction->textureFetch.textureIndex); + src->addFmt(" = int4(tex{}.get_width(), tex{}.get_height(), 1, 1).", texInstruction->textureFetch.textureIndex, texInstruction->textureFetch.textureIndex); else if (texDim == Latte::E_DIM::DIM_2D_ARRAY) - src->addFmt(" = int4(textureSize(tex{}, 0),1).", texInstruction->textureFetch.textureIndex); + src->addFmt(" = int4(tex{}.get_width(), tex{}.get_height(), tex{}.get_array_size(), 1).", texInstruction->textureFetch.textureIndex, texInstruction->textureFetch.textureIndex, texInstruction->textureFetch.textureIndex); else { cemu_assert_debug(false); - src->addFmt(" = int4(textureSize(tex{}, 0),1,1).", texInstruction->textureFetch.textureIndex); + src->addFmt(" = int4(tex{}.get_width(), tex{}.get_height(), 1, 1).", texInstruction->textureFetch.textureIndex, texInstruction->textureFetch.textureIndex); } for(sint32 f=0; f<4; f++) diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalDepthStencilCache.cpp b/src/Cafe/HW/Latte/Renderer/Metal/MetalDepthStencilCache.cpp new file mode 100644 index 00000000..87968ec3 --- /dev/null +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalDepthStencilCache.cpp @@ -0,0 +1,138 @@ +#include "Cafe/HW/Latte/Renderer/Metal/MetalDepthStencilCache.h" +#include "Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h" +#include "HW/Latte/ISA/RegDefines.h" +#include "HW/Latte/Renderer/Metal/LatteToMtl.h" +#include "Metal/MTLDepthStencil.hpp" + +MetalDepthStencilCache::~MetalDepthStencilCache() +{ + for (auto& pair : m_depthStencilCache) + { + pair.second->release(); + } + m_depthStencilCache.clear(); +} + +MTL::DepthStencilState* MetalDepthStencilCache::GetDepthStencilState(const LatteContextRegister& lcr) +{ + uint64 stateHash = CalculateDepthStencilHash(lcr); + auto& depthStencilState = m_depthStencilCache[stateHash]; + if (depthStencilState) + { + return depthStencilState; + } + + // Depth stencil state + bool depthEnable = LatteGPUState.contextNew.DB_DEPTH_CONTROL.get_Z_ENABLE(); + auto depthFunc = LatteGPUState.contextNew.DB_DEPTH_CONTROL.get_Z_FUNC(); + bool depthWriteEnable = LatteGPUState.contextNew.DB_DEPTH_CONTROL.get_Z_WRITE_ENABLE(); + + MTL::DepthStencilDescriptor* desc = MTL::DepthStencilDescriptor::alloc()->init(); + desc->setDepthWriteEnabled(depthWriteEnable); + + auto depthCompareFunc = GetMtlCompareFunc(depthFunc); + if (!depthEnable) + { + depthCompareFunc = MTL::CompareFunctionAlways; + } + desc->setDepthCompareFunction(depthCompareFunc); + + // TODO: stencil state + /* + // get stencil control parameters + bool stencilEnable = LatteGPUState.contextNew.DB_DEPTH_CONTROL.get_STENCIL_ENABLE(); + bool backStencilEnable = LatteGPUState.contextNew.DB_DEPTH_CONTROL.get_BACK_STENCIL_ENABLE(); + auto frontStencilFunc = LatteGPUState.contextNew.DB_DEPTH_CONTROL.get_STENCIL_FUNC_F(); + auto frontStencilZPass = LatteGPUState.contextNew.DB_DEPTH_CONTROL.get_STENCIL_ZPASS_F(); + auto frontStencilZFail = LatteGPUState.contextNew.DB_DEPTH_CONTROL.get_STENCIL_ZFAIL_F(); + auto frontStencilFail = LatteGPUState.contextNew.DB_DEPTH_CONTROL.get_STENCIL_FAIL_F(); + auto backStencilFunc = LatteGPUState.contextNew.DB_DEPTH_CONTROL.get_STENCIL_FUNC_B(); + auto backStencilZPass = LatteGPUState.contextNew.DB_DEPTH_CONTROL.get_STENCIL_ZPASS_B(); + auto backStencilZFail = LatteGPUState.contextNew.DB_DEPTH_CONTROL.get_STENCIL_ZFAIL_B(); + auto backStencilFail = LatteGPUState.contextNew.DB_DEPTH_CONTROL.get_STENCIL_FAIL_B(); + // get stencil control parameters + uint32 stencilCompareMaskFront = LatteGPUState.contextNew.DB_STENCILREFMASK.get_STENCILMASK_F(); + uint32 stencilWriteMaskFront = LatteGPUState.contextNew.DB_STENCILREFMASK.get_STENCILWRITEMASK_F(); + uint32 stencilRefFront = LatteGPUState.contextNew.DB_STENCILREFMASK.get_STENCILREF_F(); + uint32 stencilCompareMaskBack = LatteGPUState.contextNew.DB_STENCILREFMASK_BF.get_STENCILMASK_B(); + uint32 stencilWriteMaskBack = LatteGPUState.contextNew.DB_STENCILREFMASK_BF.get_STENCILWRITEMASK_B(); + uint32 stencilRefBack = LatteGPUState.contextNew.DB_STENCILREFMASK_BF.get_STENCILREF_B(); + + static const VkStencilOp stencilOpTable[8] = { + VK_STENCIL_OP_KEEP, + VK_STENCIL_OP_ZERO, + VK_STENCIL_OP_REPLACE, + VK_STENCIL_OP_INCREMENT_AND_CLAMP, + VK_STENCIL_OP_DECREMENT_AND_CLAMP, + VK_STENCIL_OP_INVERT, + VK_STENCIL_OP_INCREMENT_AND_WRAP, + VK_STENCIL_OP_DECREMENT_AND_WRAP + }; + + depthStencilState.stencilTestEnable = stencilEnable ? VK_TRUE : VK_FALSE; + + depthStencilState.front.reference = stencilRefFront; + depthStencilState.front.compareMask = stencilCompareMaskFront; + depthStencilState.front.writeMask = stencilWriteMaskBack; + depthStencilState.front.compareOp = vkDepthCompareTable[(size_t)frontStencilFunc]; + depthStencilState.front.depthFailOp = stencilOpTable[(size_t)frontStencilZFail]; + depthStencilState.front.failOp = stencilOpTable[(size_t)frontStencilFail]; + depthStencilState.front.passOp = stencilOpTable[(size_t)frontStencilZPass]; + + if (backStencilEnable) + { + depthStencilState.back.reference = stencilRefBack; + depthStencilState.back.compareMask = stencilCompareMaskBack; + depthStencilState.back.writeMask = stencilWriteMaskBack; + depthStencilState.back.compareOp = vkDepthCompareTable[(size_t)backStencilFunc]; + depthStencilState.back.depthFailOp = stencilOpTable[(size_t)backStencilZFail]; + depthStencilState.back.failOp = stencilOpTable[(size_t)backStencilFail]; + depthStencilState.back.passOp = stencilOpTable[(size_t)backStencilZPass]; + } + else + { + depthStencilState.back.reference = stencilRefFront; + depthStencilState.back.compareMask = stencilCompareMaskFront; + depthStencilState.back.writeMask = stencilWriteMaskFront; + depthStencilState.back.compareOp = vkDepthCompareTable[(size_t)frontStencilFunc]; + depthStencilState.back.depthFailOp = stencilOpTable[(size_t)frontStencilZFail]; + depthStencilState.back.failOp = stencilOpTable[(size_t)frontStencilFail]; + depthStencilState.back.passOp = stencilOpTable[(size_t)frontStencilZPass]; + } + */ + + depthStencilState = m_mtlr->GetDevice()->newDepthStencilState(desc); + desc->release(); + + return depthStencilState; +} + +uint64 MetalDepthStencilCache::CalculateDepthStencilHash(const LatteContextRegister& lcr) +{ + uint32* ctxRegister = lcr.GetRawView(); + + // Hash + uint64 stateHash = 0; + uint32 depthControl = ctxRegister[Latte::REGADDR::DB_DEPTH_CONTROL]; + bool stencilTestEnable = depthControl & 1; + if (stencilTestEnable) + { + stateHash += ctxRegister[mmDB_STENCILREFMASK]; + stateHash = std::rotl(stateHash, 17); + if(depthControl & (1<<7)) // back stencil enable + { + stateHash += ctxRegister[mmDB_STENCILREFMASK_BF]; + stateHash = std::rotl(stateHash, 13); + } + } + else + { + // zero out stencil related bits (8-31) + depthControl &= 0xFF; + } + + stateHash = std::rotl(stateHash, 17); + stateHash += depthControl; + + return stateHash; +} diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalDepthStencilCache.h b/src/Cafe/HW/Latte/Renderer/Metal/MetalDepthStencilCache.h new file mode 100644 index 00000000..4ce05c28 --- /dev/null +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalDepthStencilCache.h @@ -0,0 +1,21 @@ +#pragma once + +#include + +#include "HW/Latte/ISA/LatteReg.h" + +class MetalDepthStencilCache +{ +public: + MetalDepthStencilCache(class MetalRenderer* metalRenderer) : m_mtlr{metalRenderer} {} + ~MetalDepthStencilCache(); + + MTL::DepthStencilState* GetDepthStencilState(const LatteContextRegister& lcr); + +private: + class MetalRenderer* m_mtlr; + + std::map m_depthStencilCache; + + uint64 CalculateDepthStencilHash(const LatteContextRegister& lcr); +}; diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.cpp b/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.cpp index 59dcdaee..28f32193 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.cpp @@ -75,11 +75,11 @@ MTL::RenderPipelineState* MetalPipelineCache::GetPipelineState(const LatteFetchS } // Render pipeline state - MTL::RenderPipelineDescriptor* renderPipelineDescriptor = MTL::RenderPipelineDescriptor::alloc()->init(); - renderPipelineDescriptor->setVertexFunction(static_cast(vertexShader->shader)->GetFunction()); - renderPipelineDescriptor->setFragmentFunction(static_cast(pixelShader->shader)->GetFunction()); + MTL::RenderPipelineDescriptor* desc = MTL::RenderPipelineDescriptor::alloc()->init(); + desc->setVertexFunction(static_cast(vertexShader->shader)->GetFunction()); + desc->setFragmentFunction(static_cast(pixelShader->shader)->GetFunction()); // TODO: don't always set the vertex descriptor - renderPipelineDescriptor->setVertexDescriptor(vertexDescriptor); + desc->setVertexDescriptor(vertexDescriptor); for (uint8 i = 0; i < 8; i++) { const auto& colorBuffer = activeFBO->colorBuffer[i]; @@ -88,7 +88,7 @@ MTL::RenderPipelineState* MetalPipelineCache::GetPipelineState(const LatteFetchS { continue; } - auto colorAttachment = renderPipelineDescriptor->colorAttachments()->object(i); + auto colorAttachment = desc->colorAttachments()->object(i); colorAttachment->setPixelFormat(texture->GetTexture()->pixelFormat()); // Blending @@ -128,11 +128,12 @@ MTL::RenderPipelineState* MetalPipelineCache::GetPipelineState(const LatteFetchS if (activeFBO->depthBuffer.texture) { auto texture = static_cast(activeFBO->depthBuffer.texture); - renderPipelineDescriptor->setDepthAttachmentPixelFormat(texture->GetTexture()->pixelFormat()); + desc->setDepthAttachmentPixelFormat(texture->GetTexture()->pixelFormat()); } NS::Error* error = nullptr; - pipeline = m_mtlr->GetDevice()->newRenderPipelineState(renderPipelineDescriptor, &error); + pipeline = m_mtlr->GetDevice()->newRenderPipelineState(desc, &error); + desc->release(); if (error) { debug_printf("error creating render pipeline state: %s\n", error->localizedDescription()->utf8String()); diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp index 5262360d..5e1a74ec 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp @@ -5,21 +5,16 @@ #include "Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.h" #include "Cafe/HW/Latte/Renderer/Metal/CachedFBOMtl.h" #include "Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.h" +#include "Cafe/HW/Latte/Renderer/Metal/MetalDepthStencilCache.h" #include "Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.h" #include "Cafe/HW/Latte/Renderer/Metal/LatteToMtl.h" #include "Cafe/HW/Latte/Renderer/Metal/ShaderSourcePresent.h" -#include "Cafe/HW/Latte/Core/FetchShader.h" #include "Cafe/HW/Latte/Core/LatteShader.h" #include "Cafe/HW/Latte/Core/LatteIndices.h" #include "Cemu/Logging/CemuDebugLogging.h" -#include "Foundation/NSTypes.hpp" #include "HW/Latte/Core/Latte.h" -#include "Metal/MTLDepthStencil.hpp" -#include "Metal/MTLRenderCommandEncoder.hpp" -#include "Metal/MTLRenderPass.hpp" -#include "Metal/MTLRenderPipeline.hpp" #include "gui/guiWrapper.h" extern bool hasValidFramebufferAttached; @@ -36,6 +31,7 @@ MetalRenderer::MetalRenderer() m_memoryManager = new MetalMemoryManager(this); m_pipelineCache = new MetalPipelineCache(this); + m_depthStencilCache = new MetalDepthStencilCache(this); // Initialize state for (uint32 i = 0; i < (uint32)LatteConst::ShaderType::TotalCount; i++) @@ -49,6 +45,8 @@ MetalRenderer::MetalRenderer() MetalRenderer::~MetalRenderer() { + delete m_depthStencilCache; + delete m_pipelineCache; delete m_memoryManager; m_commandQueue->release(); @@ -622,84 +620,7 @@ void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32 renderCommandEncoder->setRenderPipelineState(renderPipelineState); // Depth stencil state - bool depthEnable = LatteGPUState.contextNew.DB_DEPTH_CONTROL.get_Z_ENABLE(); - auto depthFunc = LatteGPUState.contextNew.DB_DEPTH_CONTROL.get_Z_FUNC(); - bool depthWriteEnable = LatteGPUState.contextNew.DB_DEPTH_CONTROL.get_Z_WRITE_ENABLE(); - - MTL::DepthStencilDescriptor* depthStencilDescriptor = MTL::DepthStencilDescriptor::alloc()->init(); - depthStencilDescriptor->setDepthWriteEnabled(depthWriteEnable); - - auto depthCompareFunc = GetMtlCompareFunc(depthFunc); - if (!depthEnable) - { - depthCompareFunc = MTL::CompareFunctionAlways; - } - depthStencilDescriptor->setDepthCompareFunction(depthCompareFunc); - - // TODO: stencil state - /* - // get stencil control parameters - bool stencilEnable = LatteGPUState.contextNew.DB_DEPTH_CONTROL.get_STENCIL_ENABLE(); - bool backStencilEnable = LatteGPUState.contextNew.DB_DEPTH_CONTROL.get_BACK_STENCIL_ENABLE(); - auto frontStencilFunc = LatteGPUState.contextNew.DB_DEPTH_CONTROL.get_STENCIL_FUNC_F(); - auto frontStencilZPass = LatteGPUState.contextNew.DB_DEPTH_CONTROL.get_STENCIL_ZPASS_F(); - auto frontStencilZFail = LatteGPUState.contextNew.DB_DEPTH_CONTROL.get_STENCIL_ZFAIL_F(); - auto frontStencilFail = LatteGPUState.contextNew.DB_DEPTH_CONTROL.get_STENCIL_FAIL_F(); - auto backStencilFunc = LatteGPUState.contextNew.DB_DEPTH_CONTROL.get_STENCIL_FUNC_B(); - auto backStencilZPass = LatteGPUState.contextNew.DB_DEPTH_CONTROL.get_STENCIL_ZPASS_B(); - auto backStencilZFail = LatteGPUState.contextNew.DB_DEPTH_CONTROL.get_STENCIL_ZFAIL_B(); - auto backStencilFail = LatteGPUState.contextNew.DB_DEPTH_CONTROL.get_STENCIL_FAIL_B(); - // get stencil control parameters - uint32 stencilCompareMaskFront = LatteGPUState.contextNew.DB_STENCILREFMASK.get_STENCILMASK_F(); - uint32 stencilWriteMaskFront = LatteGPUState.contextNew.DB_STENCILREFMASK.get_STENCILWRITEMASK_F(); - uint32 stencilRefFront = LatteGPUState.contextNew.DB_STENCILREFMASK.get_STENCILREF_F(); - uint32 stencilCompareMaskBack = LatteGPUState.contextNew.DB_STENCILREFMASK_BF.get_STENCILMASK_B(); - uint32 stencilWriteMaskBack = LatteGPUState.contextNew.DB_STENCILREFMASK_BF.get_STENCILWRITEMASK_B(); - uint32 stencilRefBack = LatteGPUState.contextNew.DB_STENCILREFMASK_BF.get_STENCILREF_B(); - - static const VkStencilOp stencilOpTable[8] = { - VK_STENCIL_OP_KEEP, - VK_STENCIL_OP_ZERO, - VK_STENCIL_OP_REPLACE, - VK_STENCIL_OP_INCREMENT_AND_CLAMP, - VK_STENCIL_OP_DECREMENT_AND_CLAMP, - VK_STENCIL_OP_INVERT, - VK_STENCIL_OP_INCREMENT_AND_WRAP, - VK_STENCIL_OP_DECREMENT_AND_WRAP - }; - - depthStencilState.stencilTestEnable = stencilEnable ? VK_TRUE : VK_FALSE; - - depthStencilState.front.reference = stencilRefFront; - depthStencilState.front.compareMask = stencilCompareMaskFront; - depthStencilState.front.writeMask = stencilWriteMaskBack; - depthStencilState.front.compareOp = vkDepthCompareTable[(size_t)frontStencilFunc]; - depthStencilState.front.depthFailOp = stencilOpTable[(size_t)frontStencilZFail]; - depthStencilState.front.failOp = stencilOpTable[(size_t)frontStencilFail]; - depthStencilState.front.passOp = stencilOpTable[(size_t)frontStencilZPass]; - - if (backStencilEnable) - { - depthStencilState.back.reference = stencilRefBack; - depthStencilState.back.compareMask = stencilCompareMaskBack; - depthStencilState.back.writeMask = stencilWriteMaskBack; - depthStencilState.back.compareOp = vkDepthCompareTable[(size_t)backStencilFunc]; - depthStencilState.back.depthFailOp = stencilOpTable[(size_t)backStencilZFail]; - depthStencilState.back.failOp = stencilOpTable[(size_t)backStencilFail]; - depthStencilState.back.passOp = stencilOpTable[(size_t)backStencilZPass]; - } - else - { - depthStencilState.back.reference = stencilRefFront; - depthStencilState.back.compareMask = stencilCompareMaskFront; - depthStencilState.back.writeMask = stencilWriteMaskFront; - depthStencilState.back.compareOp = vkDepthCompareTable[(size_t)frontStencilFunc]; - depthStencilState.back.depthFailOp = stencilOpTable[(size_t)frontStencilZFail]; - depthStencilState.back.failOp = stencilOpTable[(size_t)frontStencilFail]; - depthStencilState.back.passOp = stencilOpTable[(size_t)frontStencilZPass]; - } - */ - MTL::DepthStencilState* depthStencilState = m_device->newDepthStencilState(depthStencilDescriptor); + MTL::DepthStencilState* depthStencilState = m_depthStencilCache->GetDepthStencilState(LatteGPUState.contextNew); renderCommandEncoder->setDepthStencilState(depthStencilState); // Primitive type diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h index 77aee9ee..ac1d55b0 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h @@ -196,6 +196,7 @@ private: class MetalMemoryManager* m_memoryManager; class MetalPipelineCache* m_pipelineCache; + class MetalDepthStencilCache* m_depthStencilCache; // Metal objects MTL::Device* m_device; diff --git a/src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.cpp b/src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.cpp index b4a43a7c..f0d5fda1 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.cpp @@ -18,13 +18,13 @@ RendererShaderMtl::RendererShaderMtl(MetalRenderer* mtlRenderer, ShaderType type desc->setName(NS::String::string("main0", NS::ASCIIStringEncoding)); error = nullptr; m_function = library->newFunction(desc, &error); + library->release(); if (error) { printf("failed to create function (error: %s)\n", error->localizedDescription()->utf8String()); error->release(); return; } - library->release(); } RendererShaderMtl::~RendererShaderMtl()