From a50ce997df6709b5b84f360cb948fba3720e4c3b Mon Sep 17 00:00:00 2001 From: Samuliak Date: Wed, 7 Aug 2024 21:14:31 +0200 Subject: [PATCH] fix: writing to depth from a fragment shader --- .../LegacyShaderDecompiler/LatteDecompiler.h | 2 ++ .../LatteDecompilerAnalyzer.cpp | 28 +++++++++---------- .../LatteDecompilerEmitMSL.cpp | 13 +++++---- .../LatteDecompilerEmitMSLHeader.hpp | 4 +-- 4 files changed, 25 insertions(+), 22 deletions(-) diff --git a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompiler.h b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompiler.h index 78af1dec..57df13b1 100644 --- a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompiler.h +++ b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompiler.h @@ -182,6 +182,8 @@ struct LatteDecompilerShader // analyzer stage (pixel outputs) uint32 pixelColorOutputMask{ 0 }; // from LSB to MSB, 1 bit per written output. 1 if written (indices of color attachments) + // analyzer stage (depth write) + bool depthWritten{ false }; // analyzer stage (geometry shader parameters/inputs) uint32 ringParameterCount{ 0 }; uint32 ringParameterCountFromPrevStage{ 0 }; // used in geometry shader to hold VS ringParameterCount diff --git a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerAnalyzer.cpp b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerAnalyzer.cpp index 19604e0c..e84e4851 100644 --- a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerAnalyzer.cpp +++ b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerAnalyzer.cpp @@ -287,15 +287,15 @@ void LatteDecompiler_analyzeTEXClause(LatteDecompilerShaderContext* shaderContex LatteDecompilerShader* shader = shaderContext->shader; for(auto& texInstruction : cfInstruction->instructionsTEX) { - if( texInstruction.opcode == GPU7_TEX_INST_SAMPLE || - texInstruction.opcode == GPU7_TEX_INST_SAMPLE_L || - texInstruction.opcode == GPU7_TEX_INST_SAMPLE_LB || - texInstruction.opcode == GPU7_TEX_INST_SAMPLE_LZ || - texInstruction.opcode == GPU7_TEX_INST_SAMPLE_C || + if( texInstruction.opcode == GPU7_TEX_INST_SAMPLE || + texInstruction.opcode == GPU7_TEX_INST_SAMPLE_L || + texInstruction.opcode == GPU7_TEX_INST_SAMPLE_LB || + texInstruction.opcode == GPU7_TEX_INST_SAMPLE_LZ || + texInstruction.opcode == GPU7_TEX_INST_SAMPLE_C || texInstruction.opcode == GPU7_TEX_INST_SAMPLE_C_L || texInstruction.opcode == GPU7_TEX_INST_SAMPLE_C_LZ || - texInstruction.opcode == GPU7_TEX_INST_FETCH4 || - texInstruction.opcode == GPU7_TEX_INST_SAMPLE_G || + texInstruction.opcode == GPU7_TEX_INST_FETCH4 || + texInstruction.opcode == GPU7_TEX_INST_SAMPLE_G || texInstruction.opcode == GPU7_TEX_INST_LD ) { if (texInstruction.textureFetch.textureIndex < 0 || texInstruction.textureFetch.textureIndex >= LATTE_NUM_MAX_TEX_UNITS) @@ -313,7 +313,7 @@ void LatteDecompiler_analyzeTEXClause(LatteDecompilerShaderContext* shaderContex shader->textureUnitSamplerAssignment[texInstruction.textureFetch.textureIndex] = texInstruction.textureFetch.samplerIndex; if( texInstruction.opcode == GPU7_TEX_INST_SAMPLE_C || texInstruction.opcode == GPU7_TEX_INST_SAMPLE_C_L || texInstruction.opcode == GPU7_TEX_INST_SAMPLE_C_LZ) shader->textureUsesDepthCompare[texInstruction.textureFetch.textureIndex] = true; - + bool useTexelCoords = false; if (texInstruction.opcode == GPU7_TEX_INST_SAMPLE && (texInstruction.textureFetch.unnormalized[0] && texInstruction.textureFetch.unnormalized[1] && texInstruction.textureFetch.unnormalized[2] && texInstruction.textureFetch.unnormalized[3])) useTexelCoords = true; @@ -393,7 +393,7 @@ void LatteDecompiler_analyzeExport(LatteDecompilerShaderContext* shaderContext, } else if( cfInstruction->exportType == 0 && cfInstruction->exportArrayBase == 61 ) { - // writes pixel depth + shader->depthWritten = true; } else debugBreakpoint(); @@ -419,7 +419,7 @@ void LatteDecompiler_analyzeExport(LatteDecompilerShaderContext* shaderContext, void LatteDecompiler_analyzeSubroutine(LatteDecompilerShaderContext* shaderContext, uint32 cfAddr) { // analyze CF and clauses up to RET statement - + // todo - find cfInstruction index from cfAddr cemu_assert_debug(false); @@ -505,9 +505,9 @@ namespace LatteDecompiler decompilerContext->hasUniformVarBlock = true; else if (decompilerContext->shader->uniformMode == LATTE_DECOMPILER_UNIFORM_MODE_FULL_CFILE) decompilerContext->hasUniformVarBlock = true; - - bool hasAnyViewportScaleDisabled = - !decompilerContext->contextRegistersNew->PA_CL_VTE_CNTL.get_VPORT_X_SCALE_ENA() || + + bool hasAnyViewportScaleDisabled = + !decompilerContext->contextRegistersNew->PA_CL_VTE_CNTL.get_VPORT_X_SCALE_ENA() || !decompilerContext->contextRegistersNew->PA_CL_VTE_CNTL.get_VPORT_Y_SCALE_ENA() || !decompilerContext->contextRegistersNew->PA_CL_VTE_CNTL.get_VPORT_Z_SCALE_ENA(); // we currently only support all on/off. Individual component scaling is not supported @@ -803,7 +803,7 @@ void LatteDecompiler_analyze(LatteDecompilerShaderContext* shaderContext, LatteD for(sint32 i=0; ioutput->textureUnitMask[i]) + if (!shaderContext->output->textureUnitMask[i]) { // texture unit not used shader->textureUnitDim[i] = (Latte::E_DIM)0xFF; diff --git a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSL.cpp b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSL.cpp index 56253a71..efe5cf30 100644 --- a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSL.cpp +++ b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSL.cpp @@ -2441,11 +2441,12 @@ static void _emitTEXSampleTextureCode(LatteDecompilerShaderContext* shaderContex src->add(")"); } } - else if( texOpcode == GPU7_TEX_INST_SAMPLE_LZ || texOpcode == GPU7_TEX_INST_SAMPLE_C_LZ ) - { - // TODO: correct? - src->add(", level(0.0)"); - } + // TODO: uncomment? + //else if( texOpcode == GPU7_TEX_INST_SAMPLE_LZ || texOpcode == GPU7_TEX_INST_SAMPLE_C_LZ ) + //{ + // // TODO: correct? + // src->add(", level(0.0)"); + //} } // gradient parameters if (texOpcode == GPU7_TEX_INST_SAMPLE_G) @@ -3215,7 +3216,7 @@ static void _emitExportCode(LatteDecompilerShaderContext* shaderContext, LatteDe cemu_assert_unimplemented(); // ukn } - src->add("out.depth = "); + src->add("out.passDepth = "); _emitExportGPRReadCode(shaderContext, cfInstruction, LATTE_DECOMPILER_DTYPE_FLOAT, 0); src->add(".x"); src->add(";" _CRLF); diff --git a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSLHeader.hpp b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSLHeader.hpp index c3d63eac..9d52196a 100644 --- a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSLHeader.hpp +++ b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSLHeader.hpp @@ -272,9 +272,9 @@ namespace LatteDecompiler } // generate depth output for pixel shader - if (decompilerContext->shader->pixelDepthOutputMask) + if (decompilerContext->shader->depthWritten) { - src->add("float passDepth [[depth(any)]];" _CRLF); + src->add("float passDepth [[depth]];" _CRLF); } src->add("};" _CRLF _CRLF);