From fa8bab2f3978d1fedeffa4c578a876e4c624206b Mon Sep 17 00:00:00 2001 From: Exzap <13877693+Exzap@users.noreply.github.com> Date: Wed, 27 Mar 2024 16:01:44 +0100 Subject: [PATCH] Latte: Add support for LOOP_START_NO_AL shader instruction This instruction is used by Injustice: Gods Among Us and Project Zero Also improved robustness of rendering to be less prone to crashing when a game tries to draw with broken shaders --- src/Cafe/HW/Latte/Core/LatteRenderTarget.cpp | 2 +- src/Cafe/HW/Latte/Core/LatteShaderAssembly.h | 2 +- .../HW/Latte/LegacyShaderDecompiler/LatteDecompiler.cpp | 6 ++++-- .../LegacyShaderDecompiler/LatteDecompilerAnalyzer.cpp | 9 ++++++--- .../LegacyShaderDecompiler/LatteDecompilerEmitGLSL.cpp | 3 ++- src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRendererCore.cpp | 4 ++-- 6 files changed, 16 insertions(+), 10 deletions(-) diff --git a/src/Cafe/HW/Latte/Core/LatteRenderTarget.cpp b/src/Cafe/HW/Latte/Core/LatteRenderTarget.cpp index f165e257..5b9fc349 100644 --- a/src/Cafe/HW/Latte/Core/LatteRenderTarget.cpp +++ b/src/Cafe/HW/Latte/Core/LatteRenderTarget.cpp @@ -340,7 +340,7 @@ uint8 LatteMRT::GetActiveColorBufferMask(const LatteDecompilerShader* pixelShade return 0; cemu_assert_debug(colorControlReg.get_DEGAMMA_ENABLE() == false); // not supported // combine color buffer mask with pixel output mask from pixel shader - colorBufferMask &= pixelShader->pixelColorOutputMask; + colorBufferMask &= (pixelShader ? pixelShader->pixelColorOutputMask : 0); // combine color buffer mask with color channel mask from mmCB_TARGET_MASK (disable render buffer if all colors are blocked) uint32 channelTargetMask = lcr.CB_TARGET_MASK.get_MASK(); for (uint32 i = 0; i < 8; i++) diff --git a/src/Cafe/HW/Latte/Core/LatteShaderAssembly.h b/src/Cafe/HW/Latte/Core/LatteShaderAssembly.h index df636689..d2314a53 100644 --- a/src/Cafe/HW/Latte/Core/LatteShaderAssembly.h +++ b/src/Cafe/HW/Latte/Core/LatteShaderAssembly.h @@ -12,7 +12,7 @@ #define GPU7_CF_INST_VTX (0x02) // used only in GS copy program? #define GPU7_CF_INST_LOOP_END (0x05) #define GPU7_CF_INST_LOOP_START_DX10 (0x06) -#define GPU7_CF_INST_LOOP_START_NO_AL (0x07) // (Seen in Project Zero) +#define GPU7_CF_INST_LOOP_START_NO_AL (0x07) // (Seen in Project Zero, Injustice: Gods Among Us) #define GPU7_CF_INST_LOOP_BREAK (0x09) #define GPU7_CF_INST_JUMP (0x0A) diff --git a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompiler.cpp b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompiler.cpp index cf88b901..c3f7c19e 100644 --- a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompiler.cpp +++ b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompiler.cpp @@ -101,7 +101,8 @@ bool LatteDecompiler_ParseCFInstruction(LatteDecompilerShaderContext* shaderCont // ignored (we use ALU/IF/ELSE/PUSH/POP clauses to determine code flow) return true; } - else if (cf_inst23_7 == GPU7_CF_INST_LOOP_START_DX10 || cf_inst23_7 == GPU7_CF_INST_LOOP_END) + else if (cf_inst23_7 == GPU7_CF_INST_LOOP_START_DX10 || cf_inst23_7 == GPU7_CF_INST_LOOP_END || + cf_inst23_7 == GPU7_CF_INST_LOOP_START_NO_AL) { LatteDecompilerCFInstruction& cfInstruction = instructionList.emplace_back(); // set type and address @@ -966,7 +967,8 @@ void LatteDecompiler_ParseClauses(LatteDecompilerShaderContext* decompilerContex { // no sub-instructions } - else if (cfInstruction.type == GPU7_CF_INST_LOOP_START_DX10 || cfInstruction.type == GPU7_CF_INST_LOOP_END) + else if (cfInstruction.type == GPU7_CF_INST_LOOP_START_DX10 || cfInstruction.type == GPU7_CF_INST_LOOP_END || + cfInstruction.type == GPU7_CF_INST_LOOP_START_NO_AL) { // no sub-instructions } diff --git a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerAnalyzer.cpp b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerAnalyzer.cpp index cf22f05d..19604e0c 100644 --- a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerAnalyzer.cpp +++ b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerAnalyzer.cpp @@ -441,7 +441,8 @@ void LatteDecompiler_analyzeSubroutine(LatteDecompilerShaderContext* shaderConte { shaderContext->analyzer.modifiesPixelActiveState = true; } - else if (cfInstruction.type == GPU7_CF_INST_LOOP_START_DX10 || cfInstruction.type == GPU7_CF_INST_LOOP_END) + else if (cfInstruction.type == GPU7_CF_INST_LOOP_START_DX10 || cfInstruction.type == GPU7_CF_INST_LOOP_END || + cfInstruction.type == GPU7_CF_INST_LOOP_START_NO_AL) { shaderContext->analyzer.modifiesPixelActiveState = true; } @@ -685,7 +686,8 @@ void LatteDecompiler_analyze(LatteDecompilerShaderContext* shaderContext, LatteD { shaderContext->analyzer.modifiesPixelActiveState = true; } - else if (cfInstruction.type == GPU7_CF_INST_LOOP_START_DX10 || cfInstruction.type == GPU7_CF_INST_LOOP_END) + else if (cfInstruction.type == GPU7_CF_INST_LOOP_START_DX10 || cfInstruction.type == GPU7_CF_INST_LOOP_END || + cfInstruction.type == GPU7_CF_INST_LOOP_START_NO_AL) { shaderContext->analyzer.modifiesPixelActiveState = true; shaderContext->analyzer.hasLoops = true; @@ -929,7 +931,8 @@ void LatteDecompiler_analyze(LatteDecompilerShaderContext* shaderContext, LatteD if (cfCurrentStackDepth < 0) debugBreakpoint(); } - else if (cfInstruction.type == GPU7_CF_INST_LOOP_START_DX10 || cfInstruction.type == GPU7_CF_INST_LOOP_END) + else if (cfInstruction.type == GPU7_CF_INST_LOOP_START_DX10 || cfInstruction.type == GPU7_CF_INST_LOOP_END || + cfInstruction.type == GPU7_CF_INST_LOOP_START_NO_AL) { // no effect on stack depth cfInstruction.activeStackDepth = cfCurrentStackDepth; diff --git a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitGLSL.cpp b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitGLSL.cpp index e19535be..7a6605f8 100644 --- a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitGLSL.cpp +++ b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitGLSL.cpp @@ -3662,7 +3662,8 @@ void LatteDecompiler_emitClauseCode(LatteDecompilerShaderContext* shaderContext, { src->addFmt("{} = {} == true && {} == true;" _CRLF, _getActiveMaskCVarName(shaderContext, cfInstruction->activeStackDepth + 1 - cfInstruction->popCount), _getActiveMaskVarName(shaderContext, cfInstruction->activeStackDepth - cfInstruction->popCount), _getActiveMaskCVarName(shaderContext, cfInstruction->activeStackDepth - cfInstruction->popCount)); } - else if( cfInstruction->type == GPU7_CF_INST_LOOP_START_DX10 ) + else if( cfInstruction->type == GPU7_CF_INST_LOOP_START_DX10 || + cfInstruction->type == GPU7_CF_INST_LOOP_START_NO_AL) { // start of loop // if pixel is disabled, then skip loop diff --git a/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRendererCore.cpp b/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRendererCore.cpp index d510140b..6500f7d3 100644 --- a/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRendererCore.cpp +++ b/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRendererCore.cpp @@ -1285,9 +1285,9 @@ void VulkanRenderer::draw_beginSequence() // update shader state LatteSHRC_UpdateActiveShaders(); - if (m_state.drawSequenceSkip) + if (LatteGPUState.activeShaderHasError) { - debug_printf("Skipping drawcalls due to shader error\n"); + cemuLog_logDebugOnce(LogType::Force, "Skipping drawcalls due to shader error"); m_state.drawSequenceSkip = true; cemu_assert_debug(false); return;