From af3ce80b7c41206c26772cef080415c37d6886fb Mon Sep 17 00:00:00 2001 From: Samuliak Date: Fri, 26 Jul 2024 18:56:30 +0200 Subject: [PATCH] fix: shader errors and shader crashes --- src/Cafe/HW/Latte/Core/LatteShader.cpp | 8 +- src/Cafe/HW/Latte/Core/LatteShaderGL.cpp | 2 +- .../LatteDecompilerEmitMSL.cpp | 309 +++++++----------- .../LatteDecompilerEmitMSLAttrDecoder.cpp | 64 ++-- .../LatteDecompilerEmitMSLHeader.hpp | 48 +-- .../Latte/Renderer/Metal/LatteTextureMtl.cpp | 2 +- .../HW/Latte/Renderer/Metal/MetalRenderer.cpp | 79 +++-- .../HW/Latte/Renderer/Metal/MetalRenderer.h | 20 +- .../Renderer/Metal/RendererShaderMtl.cpp | 3 +- .../Latte/Renderer/Metal/RendererShaderMtl.h | 19 +- 10 files changed, 252 insertions(+), 302 deletions(-) diff --git a/src/Cafe/HW/Latte/Core/LatteShader.cpp b/src/Cafe/HW/Latte/Core/LatteShader.cpp index b59702cd..6561e642 100644 --- a/src/Cafe/HW/Latte/Core/LatteShader.cpp +++ b/src/Cafe/HW/Latte/Core/LatteShader.cpp @@ -77,7 +77,7 @@ inline ska::flat_hash_map& LatteSHRC_GetCacheByT if (shaderType == LatteConst::ShaderType::Vertex) return sVertexShaders; else if (shaderType == LatteConst::ShaderType::Geometry) - return sGeometryShaders; + return sGeometryShaders; cemu_assert_debug(shaderType == LatteConst::ShaderType::Pixel); return sPixelShaders; } @@ -320,7 +320,7 @@ void LatteShader_CreateRendererShader(LatteDecompilerShader* shader, bool compil { shaderType = RendererShader::ShaderType::kGeometry; gpShaderType = GraphicPack2::GP_SHADER_TYPE::GEOMETRY; - } + } else if (shader->shaderType == LatteConst::ShaderType::Pixel) { shaderType = RendererShader::ShaderType::kFragment; @@ -443,7 +443,7 @@ void LatteShader_DumpShader(uint64 baseHash, uint64 auxHash, LatteDecompilerShad { if (!ActiveSettings::DumpShadersEnabled()) return; - + const char* suffix = ""; if (shader->shaderType == LatteConst::ShaderType::Vertex) suffix = "vs"; @@ -1011,4 +1011,4 @@ void LatteSHRC_UnloadAll() while(!sPixelShaders.empty()) LatteShader_free(sPixelShaders.begin()->second); cemu_assert_debug(sPixelShaders.empty()); -} \ No newline at end of file +} diff --git a/src/Cafe/HW/Latte/Core/LatteShaderGL.cpp b/src/Cafe/HW/Latte/Core/LatteShaderGL.cpp index b8cb0ce1..09c484e6 100644 --- a/src/Cafe/HW/Latte/Core/LatteShaderGL.cpp +++ b/src/Cafe/HW/Latte/Core/LatteShaderGL.cpp @@ -26,7 +26,7 @@ bool gxShader_checkIfSuccessfullyLinked(GLuint glProgram) void LatteShader_prepareSeparableUniforms(LatteDecompilerShader* shader) { - if (g_renderer->GetType() == RendererAPI::Vulkan) + if (g_renderer->GetType() != RendererAPI::OpenGL) return; auto shaderGL = (RendererShaderGL*)shader->shader; diff --git a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSL.cpp b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSL.cpp index 2ffcfa0f..9981d5ea 100644 --- a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSL.cpp +++ b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSL.cpp @@ -628,11 +628,11 @@ static void _emitUniformAccessCode(LatteDecompilerShaderContext* shaderContext, cemu_assert_debug(remappedUniformEntry); _emitTypeConversionPrefixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_SIGNED_INT, requiredType); if(shaderContext->shader->shaderType == LatteConst::ShaderType::Vertex ) - src->addFmt("uf_remappedVS[{}]", remappedUniformEntry->mappedIndex); + src->addFmt("supportBuffer.remappedVS[{}]", remappedUniformEntry->mappedIndex); else if(shaderContext->shader->shaderType == LatteConst::ShaderType::Pixel ) - src->addFmt("uf_remappedPS[{}]", remappedUniformEntry->mappedIndex); + src->addFmt("supportBuffer.remappedPS[{}]", remappedUniformEntry->mappedIndex); else if(shaderContext->shader->shaderType == LatteConst::ShaderType::Geometry ) - src->addFmt("uf_remappedGS[{}]", remappedUniformEntry->mappedIndex); + src->addFmt("supportBuffer.remappedGS[{}]", remappedUniformEntry->mappedIndex); else debugBreakpoint(); _appendChannelAccess(src, aluInstruction->sourceOperand[operandIndex].chan); @@ -643,11 +643,11 @@ static void _emitUniformAccessCode(LatteDecompilerShaderContext* shaderContext, // uniform registers are accessed with unpredictable (dynamic) offset _emitTypeConversionPrefixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_SIGNED_INT, requiredType); if(shaderContext->shader->shaderType == LatteConst::ShaderType::Vertex ) - src->add("uf_uniformRegisterVS["); + src->add("supportBuffer.uniformRegisterVS["); else if (shaderContext->shader->shaderType == LatteConst::ShaderType::Pixel) - src->add("uf_uniformRegisterPS["); + src->add("supportBuffer.uniformRegisterPS["); else if(shaderContext->shader->shaderType == LatteConst::ShaderType::Geometry ) - src->add("uf_uniformRegisterGS["); + src->add("supportBuffer.uniformRegisterGS["); else debugBreakpoint(); _emitUniformAccessIndexCode(shaderContext, aluInstruction, operandIndex); @@ -802,7 +802,7 @@ static void _emitOperandInputCode(LatteDecompilerShaderContext* shaderContext, L if( currentRegisterElementType == LATTE_DECOMPILER_DTYPE_SIGNED_INT ) { // need to convert (not cast) from int bits to float - src->add("intBitsToFloat("); + src->add("as_type("); } else if( currentRegisterElementType == LATTE_DECOMPILER_DTYPE_FLOAT ) { @@ -872,7 +872,7 @@ static void _emitOperandInputCode(LatteDecompilerShaderContext* shaderContext, L src->add(_FormatFloatAsConstant(*(float*)&constVal)); } else - src->addFmt("intBitsToFloat(0x{:08x})", constVal); + src->addFmt("as_type(0x{:08x})", constVal); } } else if( GPU7_ALU_SRC_IS_CFILE(aluInstruction->sourceOperand[operandIndex].sel) ) @@ -1026,7 +1026,7 @@ static void _emitALUOP2InstructionCode(LatteDecompilerShaderContext* shaderConte src->add(" = "); if( outputType != LATTE_DECOMPILER_DTYPE_SIGNED_INT ) debugBreakpoint(); // todo - src->add("floatBitsToInt(tempResultf)"); + src->add("as_type(tempResultf)"); src->add(";" _CRLF); } else if( aluInstruction->opcode == ALU_OP2_INST_MOVA_INT ) @@ -1123,9 +1123,9 @@ static void _emitALUOP2InstructionCode(LatteDecompilerShaderContext* shaderConte _emitOperandInputCode(shaderContext, aluInstruction, 0, LATTE_DECOMPILER_DTYPE_FLOAT); src->add(");" _CRLF); // INF becomes 0.0 - src->add("if( isinf(tempResultf) == true && (floatBitsToInt(tempResultf)&0x80000000) == 0 ) tempResultf = 0.0;" _CRLF); + src->add("if( isinf(tempResultf) == true && (as_type(tempResultf)&0x80000000) == 0 ) tempResultf = 0.0;" _CRLF); // -INF becomes -0.0 - src->add("else if( isinf(tempResultf) == true && (floatBitsToInt(tempResultf)&0x80000000) != 0 ) tempResultf = -0.0;" _CRLF); + src->add("else if( isinf(tempResultf) == true && (as_type(tempResultf)&0x80000000) != 0 ) tempResultf = -0.0;" _CRLF); // assign result to output _emitInstructionOutputVariableName(shaderContext, aluInstruction); src->add(" = "); @@ -1145,14 +1145,14 @@ static void _emitALUOP2InstructionCode(LatteDecompilerShaderContext* shaderConte if (aluInstruction->opcode == ALU_OP2_INST_RECIPSQRT_CLAMPED) { // note: if( -INF < 0.0 ) does not resolve to true - src->add("if( isinf(tempResultf) == true && (floatBitsToInt(tempResultf)&0x80000000) != 0 ) tempResultf = -3.40282347E+38F;" _CRLF); - src->add("else if( isinf(tempResultf) == true && (floatBitsToInt(tempResultf)&0x80000000) == 0 ) tempResultf = 3.40282347E+38F;" _CRLF); + src->add("if( isinf(tempResultf) == true && (as_type(tempResultf)&0x80000000) != 0 ) tempResultf = -3.40282347E+38F;" _CRLF); + src->add("else if( isinf(tempResultf) == true && (as_type(tempResultf)&0x80000000) == 0 ) tempResultf = 3.40282347E+38F;" _CRLF); } else if (aluInstruction->opcode == ALU_OP2_INST_RECIPSQRT_FF) { // untested (BotW bombs) - src->add("if( isinf(tempResultf) == true && (floatBitsToInt(tempResultf)&0x80000000) != 0 ) tempResultf = -0.0;" _CRLF); - src->add("else if( isinf(tempResultf) == true && (floatBitsToInt(tempResultf)&0x80000000) == 0 ) tempResultf = 0.0;" _CRLF); + src->add("if( isinf(tempResultf) == true && (as_type(tempResultf)&0x80000000) != 0 ) tempResultf = -0.0;" _CRLF); + src->add("else if( isinf(tempResultf) == true && (as_type(tempResultf)&0x80000000) == 0 ) tempResultf = 0.0;" _CRLF); } // assign result to output _emitInstructionOutputVariableName(shaderContext, aluInstruction); @@ -1704,8 +1704,8 @@ static void _emitALUReductionInstructionCode(LatteDecompilerShaderContext* shade src->add(" = "); _emitTypeConversionPrefixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_FLOAT, outputType); - // dot(vec4(op0),vec4(op1)) - src->add("dot(vec4("); + // dot(float4(op0),float4(op1)) + src->add("dot(float4("); _emitOperandInputCode(shaderContext, aluRedcInstruction[0], 0, LATTE_DECOMPILER_DTYPE_FLOAT); src->add(","); _emitOperandInputCode(shaderContext, aluRedcInstruction[1], 0, LATTE_DECOMPILER_DTYPE_FLOAT); @@ -1713,7 +1713,7 @@ static void _emitALUReductionInstructionCode(LatteDecompilerShaderContext* shade _emitOperandInputCode(shaderContext, aluRedcInstruction[2], 0, LATTE_DECOMPILER_DTYPE_FLOAT); src->add(","); _emitOperandInputCode(shaderContext, aluRedcInstruction[3], 0, LATTE_DECOMPILER_DTYPE_FLOAT); - src->add("),vec4("); + src->add("),float4("); _emitOperandInputCode(shaderContext, aluRedcInstruction[0], 1, LATTE_DECOMPILER_DTYPE_FLOAT); src->add(","); _emitOperandInputCode(shaderContext, aluRedcInstruction[1], 1, LATTE_DECOMPILER_DTYPE_FLOAT); @@ -1730,7 +1730,7 @@ static void _emitALUReductionInstructionCode(LatteDecompilerShaderContext* shade { /* * How the CUBE instruction works (guessed mostly, based on DirectX/OpenGL spec): - Input: vec4, 3d direction vector (can be unnormalized) + w component (which can be ignored, since it only scales the vector but does not affect the direction) + Input: float4, 3d direction vector (can be unnormalized) + w component (which can be ignored, since it only scales the vector but does not affect the direction) First we figure out the major axis (closest axis-aligned vector). There are six possible vectors: +rx 0 @@ -1758,7 +1758,7 @@ static void _emitALUReductionInstructionCode(LatteDecompilerShaderContext* shade sint32 outputType; src->add("redcCUBE("); - src->add("vec4("); + src->add("float4("); _emitOperandInputCode(shaderContext, aluRedcInstruction[0], 0, LATTE_DECOMPILER_DTYPE_FLOAT); src->add(","); _emitOperandInputCode(shaderContext, aluRedcInstruction[1], 0, LATTE_DECOMPILER_DTYPE_FLOAT); @@ -1767,7 +1767,7 @@ static void _emitALUReductionInstructionCode(LatteDecompilerShaderContext* shade src->add(","); _emitOperandInputCode(shaderContext, aluRedcInstruction[3], 0, LATTE_DECOMPILER_DTYPE_FLOAT); src->add("),"); - src->add("vec4("); + src->add("float4("); _emitOperandInputCode(shaderContext, aluRedcInstruction[0], 1, LATTE_DECOMPILER_DTYPE_FLOAT); src->add(","); _emitOperandInputCode(shaderContext, aluRedcInstruction[1], 1, LATTE_DECOMPILER_DTYPE_FLOAT); @@ -1887,12 +1887,12 @@ bool _isPVUsedInNextGroup(LatteDecompilerCFInstruction* cfInstruction, sint32 st } */ -static void _emitVec3(LatteDecompilerShaderContext* shaderContext, uint32 dataType, LatteDecompilerALUInstruction* aluInst0, sint32 opIdx0, LatteDecompilerALUInstruction* aluInst1, sint32 opIdx1, LatteDecompilerALUInstruction* aluInst2, sint32 opIdx2) +static void _emitFloat3(LatteDecompilerShaderContext* shaderContext, uint32 dataType, LatteDecompilerALUInstruction* aluInst0, sint32 opIdx0, LatteDecompilerALUInstruction* aluInst1, sint32 opIdx1, LatteDecompilerALUInstruction* aluInst2, sint32 opIdx2) { StringBuf* src = shaderContext->shaderSource; if (dataType == LATTE_DECOMPILER_DTYPE_FLOAT) { - src->add("vec3("); + src->add("float3("); _emitOperandInputCode(shaderContext, aluInst0, opIdx0, LATTE_DECOMPILER_DTYPE_FLOAT); src->add(","); _emitOperandInputCode(shaderContext, aluInst1, opIdx1, LATTE_DECOMPILER_DTYPE_FLOAT); @@ -1902,7 +1902,7 @@ static void _emitVec3(LatteDecompilerShaderContext* shaderContext, uint32 dataTy } else if (dataType == LATTE_DECOMPILER_DTYPE_SIGNED_INT) { - src->add("ivec3("); + src->add("int3("); _emitOperandInputCode(shaderContext, aluInst0, opIdx0, LATTE_DECOMPILER_DTYPE_SIGNED_INT); src->add(","); _emitOperandInputCode(shaderContext, aluInst1, opIdx1, LATTE_DECOMPILER_DTYPE_SIGNED_INT); @@ -2002,7 +2002,7 @@ static void _emitALUClauseCode(LatteDecompilerShaderContext* shaderContext, Latt { _emitInstructionOutputVariableName(shaderContext, &aluInstruction); src->add(" = "); - src->add("floatBitsToInt(intBitsToFloat("); + src->add("as_type("); _emitInstructionOutputVariableName(shaderContext, &aluInstruction); src->add(")"); if( aluInstruction.omod == 1 ) @@ -2099,9 +2099,9 @@ static void _emitTEXSampleCoordInputComponent(LatteDecompilerShaderContext* shad if(interpretSrcAsType == LATTE_DECOMPILER_DTYPE_SIGNED_INT ) { if( elementSel == 4 ) - src->add("floatBitsToInt(0.0)"); + src->add("as_type(0.0)"); else if( elementSel == 5 ) - src->add("floatBitsToInt(1.0)"); + src->add("as_type(1.0)"); } else if(interpretSrcAsType == LATTE_DECOMPILER_DTYPE_FLOAT ) { @@ -2116,7 +2116,7 @@ static const char* _texGprAccessElemTable[8] = {"x","y","z","w","_","_","_","_"} static char* _getTexGPRAccess(LatteDecompilerShaderContext* shaderContext, sint32 gprIndex, uint32 dataType, sint8 selX, sint8 selY, sint8 selZ, sint8 selW, char* tempBuffer) { - // intBitsToFloat(R{}i.w) + // as_type(R{}i.w) *tempBuffer = '\0'; uint8 elemCount = (selX > 0 ? 1 : 0) + (selY > 0 ? 1 : 0) + (selZ > 0 ? 1 : 0) + (selW > 0 ? 1 : 0); if (shaderContext->typeTracker.defaultDataType == LATTE_DECOMPILER_DTYPE_SIGNED_INT) @@ -2124,7 +2124,7 @@ static char* _getTexGPRAccess(LatteDecompilerShaderContext* shaderContext, sint3 if (dataType == LATTE_DECOMPILER_DTYPE_SIGNED_INT) ; // no conversion else if (dataType == LATTE_DECOMPILER_DTYPE_FLOAT) - strcat(tempBuffer, "intBitsToFloat("); + strcat(tempBuffer, "as_type("); else cemu_assert_unimplemented(); strcat(tempBuffer, _getRegisterVarName(shaderContext, gprIndex)); @@ -2230,16 +2230,16 @@ static void _emitTEXSampleTextureCode(LatteDecompilerShaderContext* shaderContex if(numWrittenElements == 1) src->add(" = int("); else - shaderContext->shaderSource->addFmt(" = ivec{}(", numWrittenElements); + shaderContext->shaderSource->addFmt(" = int{}(", numWrittenElements); } else if (shaderContext->typeTracker.defaultDataType == LATTE_DECOMPILER_DTYPE_FLOAT) - src->add(" = uintBitsToFloat("); + src->add(" = as_type("); } else { // float samplers if (shaderContext->typeTracker.defaultDataType == LATTE_DECOMPILER_DTYPE_SIGNED_INT) - src->add(" = floatBitsToInt("); + src->add(" = as_type("); else if (shaderContext->typeTracker.defaultDataType == LATTE_DECOMPILER_DTYPE_FLOAT) src->add(" = ("); } @@ -2256,104 +2256,26 @@ static void _emitTEXSampleTextureCode(LatteDecompilerShaderContext* shaderContex if (numWrittenElements == 1) shaderContext->shaderSource->add("0.0"); else - shaderContext->shaderSource->addFmt("vec{}(0.0)", numWrittenElements); + shaderContext->shaderSource->addFmt("float{}(0.0)", numWrittenElements); shaderContext->shaderSource->add(");" _CRLF); return; } - - if (texOpcode == GPU7_TEX_INST_SAMPLE && (texInstruction->textureFetch.unnormalized[0] && texInstruction->textureFetch.unnormalized[1] && texInstruction->textureFetch.unnormalized[2] && texInstruction->textureFetch.unnormalized[3]) ) + src->addFmt("tex{}.", texInstruction->textureFetch.textureIndex); + if ((texOpcode == GPU7_TEX_INST_SAMPLE && (texInstruction->textureFetch.unnormalized[0] && texInstruction->textureFetch.unnormalized[1] && texInstruction->textureFetch.unnormalized[2] && texInstruction->textureFetch.unnormalized[3])) || + texOpcode == GPU7_TEX_INST_LD) { // texture is likely a RECT if (hasOffset) cemu_assert_unimplemented(); - src->add("texelFetch("); + src->add("read("); unnormalizationHandled = true; useTexelCoordinates = true; } - else if( texOpcode == GPU7_TEX_INST_FETCH4 ) - { - if( hasOffset ) - cemu_assert_unimplemented(); - src->add("textureGather("); - } - else if( texOpcode == GPU7_TEX_INST_LD ) - { - if( hasOffset ) - cemu_assert_unimplemented(); - src->add("texelFetch("); - unnormalizationHandled = true; - useTexelCoordinates = true; - } - else if( texOpcode == GPU7_TEX_INST_SAMPLE_L ) - { - // sample with LOD value set in gpr.w (replaces computed LOD value) - if( hasOffset ) - src->add("textureLodOffset("); - else - src->add("textureLod("); - } - else if (texOpcode == GPU7_TEX_INST_SAMPLE_LZ) - { - // sample with LOD set to 0.0 (replaces computed LOD value) - if (hasOffset) - src->add("textureLodOffset("); - else - src->add("textureLod("); - } - else if (texOpcode == GPU7_TEX_INST_SAMPLE_LB) - { - // sample with LOD biased - // note: AMD doc says LOD bias is calculated from instruction LOD_BIAS field. But it appears that LOD bias is taken from input register. Might actually be both? - if (hasOffset) - src->add("textureOffset("); - else - src->add("texture("); - } - else if (texOpcode == GPU7_TEX_INST_SAMPLE) - { - if (hasOffset) - src->add("textureOffset("); - else - src->add("texture("); - } - else if (texOpcode == GPU7_TEX_INST_SAMPLE_C_L) - { - // sample with LOD value set in gpr.w (replaces computed LOD value) - if (hasOffset) - src->add("textureLodOffset("); - else - src->add("textureLod("); - } - else if (texOpcode == GPU7_TEX_INST_SAMPLE_C_LZ) - { - // sample with LOD set to 0.0 (replaces computed LOD value) - if (hasOffset) - src->add("textureLodOffset("); - else - src->add("textureLod("); - } - else if (texOpcode == GPU7_TEX_INST_SAMPLE_C) - { - if (hasOffset) - src->add("textureOffset("); - else - src->add("texture("); - } - else if (texOpcode == GPU7_TEX_INST_SAMPLE_G) - { - if (hasOffset) - cemu_assert_unimplemented(); - src->add("textureGrad("); - } else { - if( hasOffset ) - cemu_assert_unimplemented(); - cemu_assert_unimplemented(); - src->add("texture("); + src->addFmt("sample(samplr{}, ", texInstruction->textureFetch.textureIndex); } - src->addFmt("tex{}, ", texInstruction->textureFetch.textureIndex); // for textureGather() add shift (todo: depends on rounding mode set in sampler registers?) if (texOpcode == GPU7_TEX_INST_FETCH4) @@ -2370,7 +2292,7 @@ static void _emitTEXSampleTextureCode(LatteDecompilerShaderContext* shaderContex // Vulkan uses truncate mode when point sampling (min and mag is both nearest) otherwise it uses rounding // adding a small fixed bias is enough to avoid vendor-specific cases where small inaccuracies cause the number to get rounded down due to truncation - src->addFmt("vec2(0.0001) + "); + src->addFmt("float2(0.0001) + "); } } @@ -2380,15 +2302,15 @@ static void _emitTEXSampleTextureCode(LatteDecompilerShaderContext* shaderContex // handle integer coordinates for texelFetch if (texDim == Latte::E_DIM::DIM_2D || texDim == Latte::E_DIM::DIM_2D_MSAA) { - src->add("ivec2("); - src->add("vec2("); + src->add("int2("); + src->add("float2("); _emitTEXSampleCoordInputComponent(shaderContext, texInstruction, 0, texCoordDataType); src->addFmt(", "); _emitTEXSampleCoordInputComponent(shaderContext, texInstruction, 1, texCoordDataType); - src->addFmt(")*uf_tex{}Scale", texInstruction->textureFetch.textureIndex); // close vec2 and scale + src->addFmt(")*supportBuffer.tex{}Scale", texInstruction->textureFetch.textureIndex); // close float2 and scale - src->add("), 0"); // close ivec2 and lod param + src->add("), 0"); // close int2 and lod param // todo - lod } else if (texDim == Latte::E_DIM::DIM_1D) @@ -2397,7 +2319,7 @@ static void _emitTEXSampleTextureCode(LatteDecompilerShaderContext* shaderContex src->add("int("); src->add("float("); _emitTEXSampleCoordInputComponent(shaderContext, texInstruction, 0, (texOpcode == GPU7_TEX_INST_LD) ? LATTE_DECOMPILER_DTYPE_SIGNED_INT : LATTE_DECOMPILER_DTYPE_FLOAT); - src->addFmt(")*uf_tex{}Scale.x", texInstruction->textureFetch.textureIndex); + src->addFmt(")*supportBuffer.tex{}Scale.x", texInstruction->textureFetch.textureIndex); src->add("), 0"); // todo - lod } @@ -2412,8 +2334,8 @@ static void _emitTEXSampleTextureCode(LatteDecompilerShaderContext* shaderContex // shadow sampler if (texDim == Latte::E_DIM::DIM_2D_ARRAY) { - // 3 coords + compare value (as vec4) - src->add("vec4("); + // 3 coords + compare value (as float4) + src->add("float4("); _emitTEXSampleCoordInputComponent(shaderContext, texInstruction, 0, LATTE_DECOMPILER_DTYPE_FLOAT); src->add(","); _emitTEXSampleCoordInputComponent(shaderContext, texInstruction, 1, LATTE_DECOMPILER_DTYPE_FLOAT); @@ -2429,7 +2351,7 @@ static void _emitTEXSampleTextureCode(LatteDecompilerShaderContext* shaderContex { debugBreakpoint(); } - src->add("vec4("); + src->add("float4("); src->addFmt("redcCUBEReverse({},", _getTexGPRAccess(shaderContext, texInstruction->srcGpr, LATTE_DECOMPILER_DTYPE_FLOAT, texInstruction->textureFetch.srcSel[0], texInstruction->textureFetch.srcSel[1], -1, -1, tempBuffer0)); _emitTEXSampleCoordInputComponent(shaderContext, texInstruction, 2, LATTE_DECOMPILER_DTYPE_SIGNED_INT); src->addFmt(")"); @@ -2442,22 +2364,22 @@ static void _emitTEXSampleTextureCode(LatteDecompilerShaderContext* shaderContex { debugBreakpoint(); } - src->addFmt("vec3({},0.0,{})", _getTexGPRAccess(shaderContext, texInstruction->srcGpr, LATTE_DECOMPILER_DTYPE_FLOAT, texInstruction->textureFetch.srcSel[0], -1, -1, -1, tempBuffer0), _getTexGPRAccess(shaderContext, texInstruction->srcGpr, LATTE_DECOMPILER_DTYPE_FLOAT, texInstruction->textureFetch.srcSel[3], -1, -1, -1, tempBuffer1)); + src->addFmt("float3({},0.0,{})", _getTexGPRAccess(shaderContext, texInstruction->srcGpr, LATTE_DECOMPILER_DTYPE_FLOAT, texInstruction->textureFetch.srcSel[0], -1, -1, -1, tempBuffer0), _getTexGPRAccess(shaderContext, texInstruction->srcGpr, LATTE_DECOMPILER_DTYPE_FLOAT, texInstruction->textureFetch.srcSel[3], -1, -1, -1, tempBuffer1)); } else { - // 2 coords + compare value (as vec3) + // 2 coords + compare value (as float3) if (texInstruction->textureFetch.srcSel[0] >= 4 && texInstruction->textureFetch.srcSel[1] >= 4) { debugBreakpoint(); } - src->addFmt("vec3({}, {})", _getTexGPRAccess(shaderContext, texInstruction->srcGpr, LATTE_DECOMPILER_DTYPE_FLOAT, texInstruction->textureFetch.srcSel[0], texInstruction->textureFetch.srcSel[1], -1, -1, tempBuffer0), _getTexGPRAccess(shaderContext, texInstruction->srcGpr, LATTE_DECOMPILER_DTYPE_FLOAT, texInstruction->textureFetch.srcSel[3], -1, -1, -1, tempBuffer1)); + src->addFmt("float3({}, {})", _getTexGPRAccess(shaderContext, texInstruction->srcGpr, LATTE_DECOMPILER_DTYPE_FLOAT, texInstruction->textureFetch.srcSel[0], texInstruction->textureFetch.srcSel[1], -1, -1, tempBuffer0), _getTexGPRAccess(shaderContext, texInstruction->srcGpr, LATTE_DECOMPILER_DTYPE_FLOAT, texInstruction->textureFetch.srcSel[3], -1, -1, -1, tempBuffer1)); } } else if( texDim == Latte::E_DIM::DIM_3D || texDim == Latte::E_DIM::DIM_2D_ARRAY ) { // 3 coords - src->add("vec3("); + src->add("float3("); _emitTEXSampleCoordInputComponent(shaderContext, texInstruction, 0, LATTE_DECOMPILER_DTYPE_FLOAT); src->add(","); _emitTEXSampleCoordInputComponent(shaderContext, texInstruction, 1, LATTE_DECOMPILER_DTYPE_FLOAT); @@ -2470,7 +2392,7 @@ static void _emitTEXSampleTextureCode(LatteDecompilerShaderContext* shaderContex // 2 coords + faceId cemu_assert_debug(texInstruction->textureFetch.srcSel[0] < 4); cemu_assert_debug(texInstruction->textureFetch.srcSel[1] < 4); - src->add("vec4("); + src->add("float4("); src->addFmt("redcCUBEReverse({},", _getTexGPRAccess(shaderContext, texInstruction->srcGpr, LATTE_DECOMPILER_DTYPE_FLOAT, texInstruction->textureFetch.srcSel[0], texInstruction->textureFetch.srcSel[1], -1, -1, tempBuffer0)); _emitTEXSampleCoordInputComponent(shaderContext, texInstruction, 2, LATTE_DECOMPILER_DTYPE_SIGNED_INT); src->add(")"); @@ -2484,14 +2406,14 @@ static void _emitTEXSampleTextureCode(LatteDecompilerShaderContext* shaderContex else { // 2 coords - src->add("vec2("); + src->add("float2("); _emitTEXSampleCoordInputComponent(shaderContext, texInstruction, 0, LATTE_DECOMPILER_DTYPE_FLOAT); src->add(","); _emitTEXSampleCoordInputComponent(shaderContext, texInstruction, 1, LATTE_DECOMPILER_DTYPE_FLOAT); src->add(")"); // avoid truncate to effectively round downwards on texel edges if (ActiveSettings::ForceSamplerRoundToPrecision()) - src->addFmt("+ vec2(1.0)/vec2(textureSize(tex{}, 0))/512.0", texInstruction->textureFetch.textureIndex); + src->addFmt("+ float2(1.0)/float2(textureSize(tex{}, 0))/512.0", texInstruction->textureFetch.textureIndex); } // lod or lod bias parameter if( texOpcode == GPU7_TEX_INST_SAMPLE_L || texOpcode == GPU7_TEX_INST_SAMPLE_LB || texOpcode == GPU7_TEX_INST_SAMPLE_C_L) @@ -2547,9 +2469,9 @@ static void _emitTEXSampleTextureCode(LatteDecompilerShaderContext* shaderContex if( offsetComponentCount == 1 ) src->addFmt(",{}", texInstruction->textureFetch.offsetX/2); else if( offsetComponentCount == 2 ) - src->addFmt(",ivec2({},{})", texInstruction->textureFetch.offsetX/2, texInstruction->textureFetch.offsetY/2, texInstruction->textureFetch.offsetZ/2); + src->addFmt(",int2({},{})", texInstruction->textureFetch.offsetX/2, texInstruction->textureFetch.offsetY/2, texInstruction->textureFetch.offsetZ/2); else if( offsetComponentCount == 3 ) - src->addFmt(",ivec3({},{},{})", texInstruction->textureFetch.offsetX/2, texInstruction->textureFetch.offsetY/2, texInstruction->textureFetch.offsetZ/2); + src->addFmt(",int3({},{},{})", texInstruction->textureFetch.offsetX/2, texInstruction->textureFetch.offsetY/2, texInstruction->textureFetch.offsetZ/2); } } // lod bias @@ -2661,17 +2583,17 @@ static void _emitTEXGetTextureResInfoCode(LatteDecompilerShaderContext* shaderCo auto texDim = shaderContext->shader->textureUnitDim[texInstruction->textureFetch.textureIndex]; if (texDim == Latte::E_DIM::DIM_1D) - src->addFmt(" = ivec4(textureSize(tex{}, 0),1,1,1).", texInstruction->textureFetch.textureIndex); + src->addFmt(" = int4(textureSize(tex{}, 0),1,1,1).", texInstruction->textureFetch.textureIndex); else if (texDim == Latte::E_DIM::DIM_1D_ARRAY) - src->addFmt(" = ivec4(textureSize(tex{}, 0),1,1).", texInstruction->textureFetch.textureIndex); + src->addFmt(" = int4(textureSize(tex{}, 0),1,1).", texInstruction->textureFetch.textureIndex); else if (texDim == Latte::E_DIM::DIM_2D || texDim == Latte::E_DIM::DIM_2D_MSAA) - src->addFmt(" = ivec4(textureSize(tex{}, 0),1,1).", texInstruction->textureFetch.textureIndex); + src->addFmt(" = int4(textureSize(tex{}, 0),1,1).", texInstruction->textureFetch.textureIndex); else if (texDim == Latte::E_DIM::DIM_2D_ARRAY) - src->addFmt(" = ivec4(textureSize(tex{}, 0),1).", texInstruction->textureFetch.textureIndex); + src->addFmt(" = int4(textureSize(tex{}, 0),1).", texInstruction->textureFetch.textureIndex); else { cemu_assert_debug(false); - src->addFmt(" = ivec4(textureSize(tex{}, 0),1,1).", texInstruction->textureFetch.textureIndex); + src->addFmt(" = int4(textureSize(tex{}, 0),1,1).", texInstruction->textureFetch.textureIndex); } for(sint32 f=0; f<4; f++) @@ -2725,16 +2647,16 @@ static void _emitTEXGetCompTexLodCode(LatteDecompilerShaderContext* shaderContex { // 3 coordinates if(shaderContext->typeTracker.defaultDataType == LATTE_DECOMPILER_DTYPE_FLOAT) - src->addFmt("vec4(textureQueryLod(tex{}, {}.{}{}{}),0.0,0.0)", texInstruction->textureFetch.textureIndex, _getRegisterVarName(shaderContext, texInstruction->srcGpr), resultElemTable[texInstruction->textureFetch.srcSel[0]], resultElemTable[texInstruction->textureFetch.srcSel[1]], resultElemTable[texInstruction->textureFetch.srcSel[2]]); + src->addFmt("float4(textureQueryLod(tex{}, {}.{}{}{}),0.0,0.0)", texInstruction->textureFetch.textureIndex, _getRegisterVarName(shaderContext, texInstruction->srcGpr), resultElemTable[texInstruction->textureFetch.srcSel[0]], resultElemTable[texInstruction->textureFetch.srcSel[1]], resultElemTable[texInstruction->textureFetch.srcSel[2]]); else - src->addFmt("vec4(textureQueryLod(tex{}, intBitsToFloat({}.{}{}{})),0.0,0.0)", texInstruction->textureFetch.textureIndex, _getRegisterVarName(shaderContext, texInstruction->srcGpr), resultElemTable[texInstruction->textureFetch.srcSel[0]], resultElemTable[texInstruction->textureFetch.srcSel[1]], resultElemTable[texInstruction->textureFetch.srcSel[2]]); + src->addFmt("float4(textureQueryLod(tex{}, as_type({}.{}{}{})),0.0,0.0)", texInstruction->textureFetch.textureIndex, _getRegisterVarName(shaderContext, texInstruction->srcGpr), resultElemTable[texInstruction->textureFetch.srcSel[0]], resultElemTable[texInstruction->textureFetch.srcSel[1]], resultElemTable[texInstruction->textureFetch.srcSel[2]]); } else { if (shaderContext->typeTracker.defaultDataType == LATTE_DECOMPILER_DTYPE_FLOAT) - src->addFmt("vec4(textureQueryLod(tex{}, {}.{}{}),0.0,0.0)", texInstruction->textureFetch.textureIndex, _getRegisterVarName(shaderContext, texInstruction->srcGpr), resultElemTable[texInstruction->textureFetch.srcSel[0]], resultElemTable[texInstruction->textureFetch.srcSel[1]]); + src->addFmt("float4(textureQueryLod(tex{}, {}.{}{}),0.0,0.0)", texInstruction->textureFetch.textureIndex, _getRegisterVarName(shaderContext, texInstruction->srcGpr), resultElemTable[texInstruction->textureFetch.srcSel[0]], resultElemTable[texInstruction->textureFetch.srcSel[1]]); else - src->addFmt("vec4(textureQueryLod(tex{}, intBitsToFloat({}.{}{})),0.0,0.0)", texInstruction->textureFetch.textureIndex, _getRegisterVarName(shaderContext, texInstruction->srcGpr), resultElemTable[texInstruction->textureFetch.srcSel[0]], resultElemTable[texInstruction->textureFetch.srcSel[1]]); + src->addFmt("float4(textureQueryLod(tex{}, as_type({}.{}{})),0.0,0.0)", texInstruction->textureFetch.textureIndex, _getRegisterVarName(shaderContext, texInstruction->srcGpr), resultElemTable[texInstruction->textureFetch.srcSel[0]], resultElemTable[texInstruction->textureFetch.srcSel[1]]); debugBreakpoint(); } @@ -2768,7 +2690,7 @@ static void _emitTEXSetCubemapIndexCode(LatteDecompilerShaderContext* shaderCont const char* resultElemTable[4] = {"x","y","z","w"}; if (shaderContext->typeTracker.defaultDataType == LATTE_DECOMPILER_DTYPE_SIGNED_INT) - src->addFmt(" = intBitsToFloat(R{}i.{});" _CRLF, texInstruction->srcGpr, resultElemTable[texInstruction->textureFetch.srcSel[0]]); + src->addFmt(" = as_type(R{}i.{});" _CRLF, texInstruction->srcGpr, resultElemTable[texInstruction->textureFetch.srcSel[0]]); else if (shaderContext->typeTracker.defaultDataType == LATTE_DECOMPILER_DTYPE_FLOAT) src->addFmt(" = R{}f.{};" _CRLF, texInstruction->srcGpr, resultElemTable[texInstruction->textureFetch.srcSel[0]]); else @@ -2942,7 +2864,7 @@ static void _emitTEXVFetchCode(LatteDecompilerShaderContext* shaderContext, Latt src->add(" = "); if (shaderContext->typeTracker.defaultDataType == LATTE_DECOMPILER_DTYPE_SIGNED_INT) - src->add("floatBitsToInt("); + src->add("as_type("); else src->add("("); @@ -2951,7 +2873,7 @@ static void _emitTEXVFetchCode(LatteDecompilerShaderContext* shaderContext, Latt if( shaderContext->typeTracker.defaultDataType == LATTE_DECOMPILER_DTYPE_SIGNED_INT ) src->addFmt("{}.{}", _getRegisterVarName(shaderContext, texInstruction->srcGpr), resultElemTable[texInstruction->textureFetch.srcSel[0]]); else - src->addFmt("floatBitsToInt({}.{})", _getRegisterVarName(shaderContext, texInstruction->srcGpr), resultElemTable[texInstruction->textureFetch.srcSel[0]]); + src->addFmt("as_type({}.{})", _getRegisterVarName(shaderContext, texInstruction->srcGpr), resultElemTable[texInstruction->textureFetch.srcSel[0]]); src->add("]."); @@ -2983,7 +2905,7 @@ static void _emitTEXReadMemCode(LatteDecompilerShaderContext* shaderContext, Lat src->add(" = "); if (shaderContext->typeTracker.defaultDataType == LATTE_DECOMPILER_DTYPE_SIGNED_INT) - src->add("floatBitsToInt("); + src->add("as_type("); else src->add("("); @@ -2999,13 +2921,13 @@ static void _emitTEXReadMemCode(LatteDecompilerShaderContext* shaderContext, Lat { readCount = 2; // todo - src->add("vec2(0.0,0.0)"); + src->add("float2(0.0,0.0)"); } else if (texInstruction->memRead.format == FMT_32_32_32_FLOAT) { readCount = 3; // todo - src->add("vec3(0.0,0.0,0.0)"); + src->add("float3(0.0,0.0,0.0)"); } else { @@ -3068,14 +2990,14 @@ static void _emitExportGPRReadCode(LatteDecompilerShaderContext* shaderContext, if(numOutputs == 1) src->add("float("); else - src->addFmt("vec{}(", numOutputs); + src->addFmt("float{}(", numOutputs); } else if (requiredType == LATTE_DECOMPILER_DTYPE_SIGNED_INT) { if (numOutputs == 1) src->add("int("); else - src->addFmt("ivec{}(", numOutputs); + src->addFmt("int{}(", numOutputs); } else cemu_assert_unimplemented(); @@ -3149,17 +3071,17 @@ static void _emitExportCode(LatteDecompilerShaderContext* shaderContext, LatteDe if (hasAnyViewportScaleDisabled) { - src->add("vec4 finalPos = "); + src->add("float4 finalPos = "); _emitExportGPRReadCode(shaderContext, cfInstruction, LATTE_DECOMPILER_DTYPE_FLOAT, 0); src->add(";" _CRLF); - src->add("finalPos.xy = finalPos.xy * uf_windowSpaceToClipSpaceTransform - vec2(1.0,1.0);"); - src->add("SET_POSITION(finalPos);"); + src->add("finalPos.xy = finalPos.xy * supportBuffer.windowSpaceToClipSpaceTransform - float2(1.0,1.0);"); + src->add("out.position = finalPos;"); } else { - src->add("SET_POSITION("); + src->add("out.position = "); _emitExportGPRReadCode(shaderContext, cfInstruction, LATTE_DECOMPILER_DTYPE_FLOAT, 0); - src->add(");" _CRLF); + src->add(";" _CRLF); } } else if (cfInstruction->exportType == 1 && cfInstruction->exportArrayBase == GPU7_DECOMPILER_CF_EXPORT_POINT_SIZE ) @@ -3181,7 +3103,7 @@ static void _emitExportCode(LatteDecompilerShaderContext* shaderContext, LatteDe uint32 vsSemanticId = _getVertexShaderOutParamSemanticId(shaderContext->contextRegisters, paramIndex); if (vsSemanticId != 0xFF) { - src->addFmt("passParameterSem{} = ", vsSemanticId); + src->addFmt("out.passParameterSem{} = ", vsSemanticId); _emitExportGPRReadCode(shaderContext, cfInstruction, LATTE_DECOMPILER_DTYPE_FLOAT, 0); src->add(";" _CRLF); } @@ -3235,7 +3157,7 @@ static void _emitExportCode(LatteDecompilerShaderContext* shaderContext, LatteDe src->add(">="); break; } - src->add(" uf_alphaTestRef"); + src->add(" supportBuffer.alphaTestRef"); src->add(") == false) discard;" _CRLF); } // pixel color output @@ -3395,13 +3317,13 @@ static void _emitCFRingWriteCode(LatteDecompilerShaderContext* shaderContext, La if (parameterExportType == 1 && parameterExportBase == GPU7_DECOMPILER_CF_EXPORT_BASE_POSITION) { src->add("{" _CRLF); - src->addFmt("vec4 pos = vec4(0.0,0.0,0.0,1.0);" _CRLF); + src->addFmt("float4 pos = float4(0.0,0.0,0.0,1.0);" _CRLF); src->addFmt("pos."); _emitXYZWByMask(src, cfInstruction->memWriteCompMask); src->addFmt(" = "); _emitExportGPRReadCode(shaderContext, cfInstruction, LATTE_DECOMPILER_DTYPE_FLOAT, burstIndex); src->add(";" _CRLF); - src->add("SET_POSITION(pos);" _CRLF); + src->add("out.position = pos;" _CRLF); src->add("}" _CRLF); } else if (parameterExportType == 2 && parameterExportBase < 16) @@ -3645,7 +3567,7 @@ void LatteDecompiler_emitClauseCodeMSL(LatteDecompilerShaderContext* shaderConte src->addFmt("if( {} == true ) {{" _CRLF, _getActiveMaskCVarName(shaderContext, cfInstruction->activeStackDepth + 1)); // write point size if (shaderContext->analyzer.outputPointSize && shaderContext->analyzer.writesPointSize == false) - src->add("gl_PointSize = uf_pointSize;" _CRLF); + src->add("gl_PointSize = supportBuffer.pointSize;" _CRLF); // emit vertex src->add("EmitVertex();" _CRLF); // increment transform feedback pointer @@ -3681,11 +3603,11 @@ void LatteDecompiler_emitHelperFunctions(LatteDecompilerShaderContext* shaderCon { if( shaderContext->analyzer.hasRedcCUBE ) { - fCStr_shaderSource->add("void redcCUBE(vec4 src0, vec4 src1, out vec3 stm, out int faceId)\r\n" + fCStr_shaderSource->add("void redcCUBE(float4 src0, float4 src1, out float3 stm, out int faceId)\r\n" "{\r\n" "// stm -> x .. s, y .. t, z .. MajorAxis*2.0\r\n" - "vec3 inputCoord = normalize(vec3(src1.y, src1.x, src0.x));\r\n" + "float3 inputCoord = normalize(float3(src1.y, src1.x, src0.x));\r\n" "float rx = inputCoord.x;\r\n" "float ry = inputCoord.y;\r\n" @@ -3693,7 +3615,7 @@ void LatteDecompiler_emitHelperFunctions(LatteDecompilerShaderContext* shaderCon "if( abs(rx) > abs(ry) && abs(rx) > abs(rz) )\r\n" "{\r\n" "stm.z = rx*2.0;\r\n" - "stm.xy = vec2(ry,rz); \r\n" + "stm.xy = float2(ry,rz); \r\n" "if( rx >= 0.0 )\r\n" "{\r\n" "faceId = 0;\r\n" @@ -3706,7 +3628,7 @@ void LatteDecompiler_emitHelperFunctions(LatteDecompilerShaderContext* shaderCon "else if( abs(ry) > abs(rx) && abs(ry) > abs(rz) )\r\n" "{\r\n" "stm.z = ry*2.0;\r\n" - "stm.xy = vec2(rx,rz); \r\n" + "stm.xy = float2(rx,rz); \r\n" "if( ry >= 0.0 )\r\n" "{\r\n" "faceId = 2;\r\n" @@ -3719,7 +3641,7 @@ void LatteDecompiler_emitHelperFunctions(LatteDecompilerShaderContext* shaderCon "else //if( abs(rz) > abs(ry) && abs(rz) > abs(rx) )\r\n" "{\r\n" "stm.z = rz*2.0;\r\n" - "stm.xy = vec2(rx,ry); \r\n" + "stm.xy = float2(rx,ry); \r\n" "if( rz >= 0.0 )\r\n" "{\r\n" "faceId = 4;\r\n" @@ -3734,39 +3656,39 @@ void LatteDecompiler_emitHelperFunctions(LatteDecompilerShaderContext* shaderCon if( shaderContext->analyzer.hasCubeMapTexture ) { - fCStr_shaderSource->add("vec3 redcCUBEReverse(vec2 st, int faceId)\r\n" + fCStr_shaderSource->add("float3 redcCUBEReverse(float2 st, int faceId)\r\n" "{\r\n" "st.yx = st.xy;\r\n" - "vec3 v;\r\n" + "float3 v;\r\n" "float majorAxis = 1.0;\r\n" "if( faceId == 0 )\r\n" "{\r\n" - "v.yz = (st-vec2(1.5))*(majorAxis*2.0);\r\n" + "v.yz = (st-float2(1.5))*(majorAxis*2.0);\r\n" "v.x = 1.0;\r\n" "}\r\n" "else if( faceId == 1 )\r\n" "{\r\n" - "v.yz = (st-vec2(1.5))*(majorAxis*2.0);\r\n" + "v.yz = (st-float2(1.5))*(majorAxis*2.0);\r\n" "v.x = -1.0;\r\n" "}\r\n" "else if( faceId == 2 )\r\n" "{\r\n" - "v.xz = (st-vec2(1.5))*(majorAxis*2.0);\r\n" + "v.xz = (st-float2(1.5))*(majorAxis*2.0);\r\n" "v.y = 1.0;\r\n" "}\r\n" "else if( faceId == 3 )\r\n" "{\r\n" - "v.xz = (st-vec2(1.5))*(majorAxis*2.0);\r\n" + "v.xz = (st-float2(1.5))*(majorAxis*2.0);\r\n" "v.y = -1.0;\r\n" "}\r\n" "else if( faceId == 4 )\r\n" "{\r\n" - "v.xy = (st-vec2(1.5))*(majorAxis*2.0);\r\n" + "v.xy = (st-float2(1.5))*(majorAxis*2.0);\r\n" "v.z = 1.0;\r\n" "}\r\n" "else\r\n" "{\r\n" - "v.xy = (st-vec2(1.5))*(majorAxis*2.0);\r\n" + "v.xy = (st-float2(1.5))*(majorAxis*2.0);\r\n" "v.z = -1.0;\r\n" "}\r\n" @@ -3779,10 +3701,10 @@ void LatteDecompiler_emitHelperFunctions(LatteDecompilerShaderContext* shaderCon "int clampFI32(int v)\r\n" "{\r\n" "if( v == 0x7FFFFFFF )\r\n" - " return floatBitsToInt(1.0);\r\n" + " return as_type(1.0);\r\n" "else if( v == 0xFFFFFFFF )\r\n" - " return floatBitsToInt(0.0);\r\n" - "return floatBitsToInt(clamp(intBitsToFloat(v), 0.0, 1.0));\r\n" + " return as_type(0.0);\r\n" + "return as_type(clamp(as_type(v), 0.0, 1.0));\r\n" "}\r\n"); // mul non-ieee way (0*NaN/INF => 0.0) if (shaderContext->options->strictMul) @@ -3791,7 +3713,7 @@ void LatteDecompiler_emitHelperFunctions(LatteDecompilerShaderContext* shaderCon //fCStr_shaderSource->add("float mul_nonIEEE(float a, float b){ return mix(a*b,0.0,a==0.0||b==0.0); }" STR_LINEBREAK); //fCStr_shaderSource->add("float mul_nonIEEE(float a, float b){ return mix(vec2(a*b,0.0),vec2(0.0,0.0),(equal(vec2(a),vec2(0.0,0.0))||equal(vec2(b),vec2(0.0,0.0)))).x; }" STR_LINEBREAK); //fCStr_shaderSource->add("float mul_nonIEEE(float a, float b){ if( a == 0.0 || b == 0.0 ) return 0.0; return a*b; }" STR_LINEBREAK); - //fCStr_shaderSource->add("float mul_nonIEEE(float a, float b){float r = a*b;r = intBitsToFloat(floatBitsToInt(r)&(((floatBitsToInt(a) != 0) && (floatBitsToInt(b) != 0))?0xFFFFFFFF:0));return r;}" STR_LINEBREAK); works + //fCStr_shaderSource->add("float mul_nonIEEE(float a, float b){float r = a*b;r = as_type(floatBitsToInt(r)&(((floatBitsToInt(a) != 0) && (floatBitsToInt(b) != 0))?0xFFFFFFFF:0));return r;}" STR_LINEBREAK); works // for "min" it used to be: float mul_nonIEEE(float a, float b){ return min(a*b,min(abs(a)*3.40282347E+38F,abs(b)*3.40282347E+38F)); } @@ -3836,9 +3758,9 @@ static void LatteDecompiler_emitAttributeImport(LatteDecompilerShaderContext* sh LatteDecompiler_emitAttributeDecodeMSL(shaderContext->shader, src, &attrib); if (shaderContext->typeTracker.defaultDataType == LATTE_DECOMPILER_DTYPE_SIGNED_INT) - src->addFmt("{} = ivec4(", _getRegisterVarName(shaderContext, registerIndex)); + src->addFmt("{} = int4(", _getRegisterVarName(shaderContext, registerIndex)); else - src->addFmt("{} = vec4(", _getRegisterVarName(shaderContext, registerIndex)); + src->addFmt("{} = float4(", _getRegisterVarName(shaderContext, registerIndex)); for (sint32 f = 0; f < 4; f++) { uint8 ds = attrib.ds[f]; @@ -3874,23 +3796,28 @@ void LatteDecompiler_emitMSLShader(LatteDecompilerShaderContext* shaderContext, src->addFmt("// usesIntegerValues: {}" _CRLF, shaderContext->analyzer.usesIntegerValues?"true":"false"); src->addFmt(_CRLF); #endif + // include metal standard library + src->add("#include " _CRLF); + src->add("using namespace metal;" _CRLF); // header part (definitions for inputs and outputs) LatteDecompiler::emitHeader(shaderContext); // helper functions LatteDecompiler_emitHelperFunctions(shaderContext, src); + const char* outputTypeName; switch (shader->shaderType) { case LatteConst::ShaderType::Vertex: - src->add("VertexOut"); + outputTypeName = "VertexOut"; break; case LatteConst::ShaderType::Pixel: - src->add("FragmentOut"); + outputTypeName = "FragmentOut"; break; } // start of main - src->add(" main0("); + src->addFmt("{} main0(", outputTypeName); LatteDecompiler::emitInputs(shaderContext); src->add(") {" _CRLF); + src->addFmt("{} out;" _CRLF, outputTypeName); // variable definition if (shaderContext->typeTracker.useArrayGPRs == false) { @@ -3997,7 +3924,7 @@ void LatteDecompiler_emitMSLShader(LatteDecompilerShaderContext* shaderContext, cemu_assert_debug((shaderContext->output->streamoutBufferStride[i]&3) == 0); if (shader->shaderType == LatteConst::ShaderType::Vertex) // vertex shader - src->addFmt("int sbBase{} = uf_streamoutBufferBase{}/4 + (vid + uf_verticesPerInstance * iid)*{};" _CRLF, i, i, shaderContext->output->streamoutBufferStride[i] / 4); + src->addFmt("int sbBase{} = supportBuffer.streamoutBufferBase{}/4 + (vid + supportBuffer.verticesPerInstance * iid)*{};" _CRLF, i, i, shaderContext->output->streamoutBufferStride[i] / 4); else // geometry shader { uint32 gsOutPrimType = shaderContext->contextRegisters[mmVGT_GS_OUT_PRIM_TYPE]; @@ -4006,7 +3933,7 @@ void LatteDecompiler_emitMSLShader(LatteDecompilerShaderContext* shaderContext, cemu_assert_debug(gsOutPrimType == 0); // currently we only properly handle GS output primitive points - src->addFmt("int sbBase{} = uf_streamoutBufferBase{}/4 + (gl_PrimitiveIDIn * {})*{};" _CRLF, i, i, maxVerticesInGS, shaderContext->output->streamoutBufferStride[i] / 4); + src->addFmt("int sbBase{} = supportBuffer.streamoutBufferBase{}/4 + (gl_PrimitiveIDIn * {})*{};" _CRLF, i, i, maxVerticesInGS, shaderContext->output->streamoutBufferStride[i] / 4); } } @@ -4019,7 +3946,7 @@ void LatteDecompiler_emitMSLShader(LatteDecompilerShaderContext* shaderContext, if (shaderContext->typeTracker.defaultDataType == LATTE_DECOMPILER_DTYPE_SIGNED_INT) src->addFmt("{} = int4(vid, 0, 0, iid);" _CRLF, _getRegisterVarName(shaderContext, 0)); else if (shaderContext->typeTracker.defaultDataType == LATTE_DECOMPILER_DTYPE_FLOAT) - src->addFmt("{} = as_type(float4(vid, 0, 0, iid));" _CRLF, _getRegisterVarName(shaderContext, 0)); // TODO: is this correct? + src->addFmt("{} = float4(vid, 0, 0, iid);" _CRLF, _getRegisterVarName(shaderContext, 0)); // TODO: as_type(float4(vid, 0, 0, iid))? else cemu_assert_unimplemented(); } @@ -4097,9 +4024,9 @@ void LatteDecompiler_emitMSLShader(LatteDecompilerShaderContext* shaderContext, { // import from vertex shader if (shaderContext->typeTracker.defaultDataType == LATTE_DECOMPILER_DTYPE_SIGNED_INT) - src->addFmt("{} = as_type(passParameterSem{});" _CRLF, _getRegisterVarName(shaderContext, gprIndex), psInputSemanticId); + src->addFmt("{} = as_type(in.passParameterSem{});" _CRLF, _getRegisterVarName(shaderContext, gprIndex), psInputSemanticId); else if (shaderContext->typeTracker.defaultDataType == LATTE_DECOMPILER_DTYPE_FLOAT) - src->addFmt("{} = passParameterSem{};" _CRLF, _getRegisterVarName(shaderContext, gprIndex), psInputSemanticId); + src->addFmt("{} = in.passParameterSem{};" _CRLF, _getRegisterVarName(shaderContext, gprIndex), psInputSemanticId); else cemu_assert_unimplemented(); } @@ -4112,7 +4039,7 @@ void LatteDecompiler_emitMSLShader(LatteDecompilerShaderContext* shaderContext, if (frontFace_allBits) cemu_assert_debug(false); if (shaderContext->typeTracker.defaultDataType == LATTE_DECOMPILER_DTYPE_SIGNED_INT) - src->addFmt("{}.{} = as_type(gl_FrontFacing?1.0:0.0);" _CRLF, _getRegisterVarName(shaderContext, frontFace_regIndex), _getElementStrByIndex(frontFace_chan)); + src->addFmt("{}.{} = as_type(frontFacing?1.0:0.0);" _CRLF, _getRegisterVarName(shaderContext, frontFace_regIndex), _getElementStrByIndex(frontFace_chan)); else if (shaderContext->typeTracker.defaultDataType == LATTE_DECOMPILER_DTYPE_FLOAT) src->addFmt("{}.{} = frontFacing ? 1.0 : 0.0;" _CRLF, _getRegisterVarName(shaderContext, frontFace_regIndex), _getElementStrByIndex(frontFace_chan)); else @@ -4128,8 +4055,10 @@ void LatteDecompiler_emitMSLShader(LatteDecompilerShaderContext* shaderContext, if (shaderContext->analyzer.outputPointSize && shaderContext->analyzer.writesPointSize == false) { if (shader->shaderType == LatteConst::ShaderType::Vertex && shaderContext->options->usesGeometryShader == false) - src->add("out.pointSize = uf_pointSize;" _CRLF); + src->add("out.pointSize = supportBuffer.pointSize;" _CRLF); } + // return + src->add("return out;" _CRLF); // end of shader main src->add("}" _CRLF); src->shrink_to_fit(); diff --git a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSLAttrDecoder.cpp b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSLAttrDecoder.cpp index 8219646a..cb90e45d 100644 --- a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSLAttrDecoder.cpp +++ b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSLAttrDecoder.cpp @@ -12,50 +12,50 @@ static void _readLittleEndianAttributeU32x4(LatteDecompilerShader* shaderContext, StringBuf* src, uint32 attributeInputIndex) { - src->addFmt("attrDecoder = attrDataSem{};" _CRLF, attributeInputIndex); + src->addFmt("attrDecoder = in.attrDataSem{};" _CRLF, attributeInputIndex); } static void _readLittleEndianAttributeU32x3(LatteDecompilerShader* shaderContext, StringBuf* src, uint32 attributeInputIndex) { - src->addFmt("attrDecoder = uint4(attrDataSem{}.xyz,0);" _CRLF, attributeInputIndex); + src->addFmt("attrDecoder = uint4(in.attrDataSem{}.xyz,0);" _CRLF, attributeInputIndex); } static void _readLittleEndianAttributeU32x2(LatteDecompilerShader* shaderContext, StringBuf* src, uint32 attributeInputIndex) { - src->addFmt("attrDecoder = uint4(attrDataSem{}.xy,0,0);" _CRLF, attributeInputIndex); + src->addFmt("attrDecoder = uint4(in.attrDataSem{}.xy,0,0);" _CRLF, attributeInputIndex); } static void _readLittleEndianAttributeU32x1(LatteDecompilerShader* shaderContext, StringBuf* src, uint32 attributeInputIndex) { - src->addFmt("attrDecoder = uint4(attrDataSem{}.x,0,0,0);" _CRLF, attributeInputIndex); + src->addFmt("attrDecoder = uint4(in.attrDataSem{}.x,0,0,0);" _CRLF, attributeInputIndex); } static void _readLittleEndianAttributeU16x2(LatteDecompilerShader* shaderContext, StringBuf* src, uint32 attributeInputIndex) { - src->addFmt("attrDecoder = uint4(attrDataSem{}.xy,0,0);" _CRLF, attributeInputIndex); + src->addFmt("attrDecoder = uint4(in.attrDataSem{}.xy,0,0);" _CRLF, attributeInputIndex); } static void _readLittleEndianAttributeU16x4(LatteDecompilerShader* shaderContext, StringBuf* src, uint32 attributeInputIndex) { - src->addFmt("attrDecoder = attrDataSem{};" _CRLF, attributeInputIndex); + src->addFmt("attrDecoder = in.attrDataSem{};" _CRLF, attributeInputIndex); } static void _readBigEndianAttributeU32x4(LatteDecompilerShader* shaderContext, StringBuf* src, uint32 attributeInputIndex) { - src->addFmt("attrDecoder = attrDataSem{};" _CRLF, attributeInputIndex); + src->addFmt("attrDecoder = in.attrDataSem{};" _CRLF, attributeInputIndex); src->add("attrDecoder = (attrDecoder>>24)|((attrDecoder>>8)&0xFF00)|((attrDecoder<<8)&0xFF0000)|((attrDecoder<<24));" _CRLF); } static void _readBigEndianAttributeU32x3(LatteDecompilerShader* shaderContext, StringBuf* src, uint32 attributeInputIndex) { - src->addFmt("attrDecoder.xyz = attrDataSem{}.xyz;" _CRLF, attributeInputIndex); + src->addFmt("attrDecoder.xyz = in.attrDataSem{}.xyz;" _CRLF, attributeInputIndex); src->add("attrDecoder.xyz = (attrDecoder.xyz>>24)|((attrDecoder.xyz>>8)&0xFF00)|((attrDecoder.xyz<<8)&0xFF0000)|((attrDecoder.xyz<<24));" _CRLF); src->add("attrDecoder.w = 0;" _CRLF); } static void _readBigEndianAttributeU32x2(LatteDecompilerShader* shaderContext, StringBuf* src, uint32 attributeInputIndex) { - src->addFmt("attrDecoder.xy = attrDataSem{}.xy;" _CRLF, attributeInputIndex); + src->addFmt("attrDecoder.xy = in.attrDataSem{}.xy;" _CRLF, attributeInputIndex); src->add("attrDecoder.xy = (attrDecoder.xy>>24)|((attrDecoder.xy>>8)&0xFF00)|((attrDecoder.xy<<8)&0xFF0000)|((attrDecoder.xy<<24));" _CRLF); src->add("attrDecoder.z = 0;" _CRLF); src->add("attrDecoder.w = 0;" _CRLF); @@ -63,7 +63,7 @@ static void _readBigEndianAttributeU32x2(LatteDecompilerShader* shaderContext, S static void _readBigEndianAttributeU32x1(LatteDecompilerShader* shaderContext, StringBuf* src, uint32 attributeInputIndex) { - src->addFmt("attrDecoder.x = attrDataSem{}.x;" _CRLF, attributeInputIndex); + src->addFmt("attrDecoder.x = in.attrDataSem{}.x;" _CRLF, attributeInputIndex); src->add("attrDecoder.x = (attrDecoder.x>>24)|((attrDecoder.x>>8)&0xFF00)|((attrDecoder.x<<8)&0xFF0000)|((attrDecoder.x<<24));" _CRLF); src->add("attrDecoder.y = 0;" _CRLF); src->add("attrDecoder.z = 0;" _CRLF); @@ -72,7 +72,7 @@ static void _readBigEndianAttributeU32x1(LatteDecompilerShader* shaderContext, S static void _readBigEndianAttributeU16x1(LatteDecompilerShader* shaderContext, StringBuf* src, uint32 attributeInputIndex) { - src->addFmt("attrDecoder.xy = attrDataSem{}.xy;" _CRLF, attributeInputIndex); + src->addFmt("attrDecoder.xy = in.attrDataSem{}.xy;" _CRLF, attributeInputIndex); src->add("attrDecoder.x = ((attrDecoder.x>>8)&0xFF)|((attrDecoder.x<<8)&0xFF00);" _CRLF); src->add("attrDecoder.y = 0;" _CRLF); src->add("attrDecoder.z = 0;" _CRLF); @@ -81,7 +81,7 @@ static void _readBigEndianAttributeU16x1(LatteDecompilerShader* shaderContext, S static void _readBigEndianAttributeU16x2(LatteDecompilerShader* shaderContext, StringBuf* src, uint32 attributeInputIndex) { - src->addFmt("attrDecoder.xy = attrDataSem{}.xy;" _CRLF, attributeInputIndex); + src->addFmt("attrDecoder.xy = in.attrDataSem{}.xy;" _CRLF, attributeInputIndex); src->add("attrDecoder.xy = ((attrDecoder.xy>>8)&0xFF)|((attrDecoder.xy<<8)&0xFF00);" _CRLF); src->add("attrDecoder.z = 0;" _CRLF); src->add("attrDecoder.w = 0;" _CRLF); @@ -89,7 +89,7 @@ static void _readBigEndianAttributeU16x2(LatteDecompilerShader* shaderContext, S static void _readBigEndianAttributeU16x4(LatteDecompilerShader* shaderContext, StringBuf* src, uint32 attributeInputIndex) { - src->addFmt("attrDecoder.xyzw = attrDataSem{}.xyzw;" _CRLF, attributeInputIndex); + src->addFmt("attrDecoder.xyzw = in.attrDataSem{}.xyzw;" _CRLF, attributeInputIndex); src->add("attrDecoder = ((attrDecoder>>8)&0xFF)|((attrDecoder<<8)&0xFF00);" _CRLF); } @@ -167,12 +167,12 @@ void LatteDecompiler_emitAttributeDecodeMSL(LatteDecompilerShader* shaderContext else if( attrib->format == FMT_8_8_8_8 && attrib->nfa == 0 && attrib->isSigned == 0 ) { // seen in Minecraft Wii U Edition - src->addFmt("attrDecoder.xyzw = as_type(vec4(attrDataSem{}.wzyx)/255.0);" _CRLF, attributeInputIndex); + src->addFmt("attrDecoder.xyzw = as_type(float4(in.attrDataSem{}.wzyx)/255.0);" _CRLF, attributeInputIndex); } else if( attrib->format == FMT_8_8_8_8 && attrib->nfa == 0 && attrib->isSigned != 0 ) { // seen in Minecraft Wii U Edition - src->addFmt("attrDecoder.xyzw = attrDataSem{}.wzyx;" _CRLF, attributeInputIndex); + src->addFmt("attrDecoder.xyzw = in.attrDataSem{}.wzyx;" _CRLF, attributeInputIndex); src->add("if( (attrDecoder.x&0x80) != 0 ) attrDecoder.x |= 0xFFFFFF00;" _CRLF); src->add("if( (attrDecoder.y&0x80) != 0 ) attrDecoder.y |= 0xFFFFFF00;" _CRLF); src->add("if( (attrDecoder.z&0x80) != 0 ) attrDecoder.z |= 0xFFFFFF00;" _CRLF); @@ -185,12 +185,12 @@ void LatteDecompiler_emitAttributeDecodeMSL(LatteDecompilerShader* shaderContext else if( attrib->format == FMT_8_8_8_8 && attrib->nfa == 1 && attrib->isSigned == 0 ) { // seen in Minecraft Wii U Edition - src->addFmt("attrDecoder.xyzw = attrDataSem{}.wzyx;" _CRLF, attributeInputIndex); + src->addFmt("attrDecoder.xyzw = in.attrDataSem{}.wzyx;" _CRLF, attributeInputIndex); } else if (attrib->format == FMT_8_8_8_8 && attrib->nfa == 2 && attrib->isSigned == 0) { // seen in Ben 10 Omniverse - src->addFmt("attrDecoder.xyzw = as_type(vec4(attrDataSem{}.wzyx));" _CRLF, attributeInputIndex); + src->addFmt("attrDecoder.xyzw = as_type(float4(in.attrDataSem{}.wzyx));" _CRLF, attributeInputIndex); } else { @@ -275,11 +275,11 @@ void LatteDecompiler_emitAttributeDecodeMSL(LatteDecompilerShader* shaderContext } else if( attrib->format == FMT_8_8_8_8 && attrib->nfa == 0 && attrib->isSigned == 0 ) { - src->addFmt("attrDecoder.xyzw = as_type(float4(attrDataSem{}.xyzw)/255.0);" _CRLF, attributeInputIndex); + src->addFmt("attrDecoder.xyzw = as_type(float4(in.attrDataSem{}.xyzw)/255.0);" _CRLF, attributeInputIndex); } else if( attrib->format == FMT_8_8_8_8 && attrib->nfa == 0 && attrib->isSigned != 0 ) { - src->addFmt("attrDecoder.xyzw = attrDataSem{}.xyzw;" _CRLF, attributeInputIndex); + src->addFmt("attrDecoder.xyzw = in.attrDataSem{}.xyzw;" _CRLF, attributeInputIndex); src->add("if( (attrDecoder.x&0x80) != 0 ) attrDecoder.x |= 0xFFFFFF00;" _CRLF); src->add("if( (attrDecoder.y&0x80) != 0 ) attrDecoder.y |= 0xFFFFFF00;" _CRLF); src->add("if( (attrDecoder.z&0x80) != 0 ) attrDecoder.z |= 0xFFFFFF00;" _CRLF); @@ -291,12 +291,12 @@ void LatteDecompiler_emitAttributeDecodeMSL(LatteDecompilerShader* shaderContext } else if (attrib->format == FMT_8_8_8_8 && attrib->nfa == 1 && attrib->isSigned == 0) { - src->addFmt("attrDecoder.xyzw = attrDataSem{}.xyzw;" _CRLF, attributeInputIndex); + src->addFmt("attrDecoder.xyzw = in.attrDataSem{}.xyzw;" _CRLF, attributeInputIndex); } else if (attrib->format == FMT_8_8_8_8 && attrib->nfa == 1 && attrib->isSigned != 0) { // seen in Sonic Lost World - src->addFmt("attrDecoder.xyzw = attrDataSem{}.xyzw;" _CRLF, attributeInputIndex); + src->addFmt("attrDecoder.xyzw = in.attrDataSem{}.xyzw;" _CRLF, attributeInputIndex); src->add("if( (attrDecoder.x&0x80) != 0 ) attrDecoder.x |= 0xFFFFFF00;" _CRLF); src->add("if( (attrDecoder.y&0x80) != 0 ) attrDecoder.y |= 0xFFFFFF00;" _CRLF); src->add("if( (attrDecoder.z&0x80) != 0 ) attrDecoder.z |= 0xFFFFFF00;" _CRLF); @@ -305,19 +305,19 @@ void LatteDecompiler_emitAttributeDecodeMSL(LatteDecompilerShader* shaderContext else if( attrib->format == FMT_8_8_8_8 && attrib->nfa == 2 && attrib->isSigned == 0 ) { // seen in One Piece - src->addFmt("attrDecoder.xyzw = as_type(float4(attrDataSem{}.xyzw));" _CRLF, attributeInputIndex); + src->addFmt("attrDecoder.xyzw = as_type(float4(in.attrDataSem{}.xyzw));" _CRLF, attributeInputIndex); } else if (attrib->format == FMT_8_8 && attrib->nfa == 0 && attrib->isSigned == 0) { if( (attrib->offset&3) == 2 && LatteGPUState.glVendor == GLVENDOR_AMD && g_renderer->GetType() == RendererAPI::OpenGL ) { // AMD workaround - src->addFmt("attrDecoder.xy = as_type(float2(attrDataSem{}.zw)/255.0);" _CRLF, attributeInputIndex); + src->addFmt("attrDecoder.xy = as_type(float2(in.attrDataSem{}.zw)/255.0);" _CRLF, attributeInputIndex); src->add("attrDecoder.zw = uint2(0);" _CRLF); } else { - src->addFmt("attrDecoder.xy = as_type(float2(attrDataSem{}.xy)/255.0);" _CRLF, attributeInputIndex); + src->addFmt("attrDecoder.xy = as_type(float2(in.attrDataSem{}.xy)/255.0);" _CRLF, attributeInputIndex); src->add("attrDecoder.zw = uint2(0);" _CRLF); } } @@ -327,12 +327,12 @@ void LatteDecompiler_emitAttributeDecodeMSL(LatteDecompilerShader* shaderContext if ((attrib->offset & 3) == 2 && LatteGPUState.glVendor == GLVENDOR_AMD && g_renderer->GetType() == RendererAPI::OpenGL) { // AMD workaround - src->addFmt("attrDecoder.xy = as_type(float2(attrDataSem{}.zw));" _CRLF, attributeInputIndex); + src->addFmt("attrDecoder.xy = as_type(float2(in.attrDataSem{}.zw));" _CRLF, attributeInputIndex); src->add("attrDecoder.zw = uint2(0);" _CRLF); } else { - src->addFmt("attrDecoder.xy = as_type(float2(attrDataSem{}.xy));" _CRLF, attributeInputIndex); + src->addFmt("attrDecoder.xy = as_type(float2(in.attrDataSem{}.xy));" _CRLF, attributeInputIndex); src->add("attrDecoder.zw = uint2(0);" _CRLF); } } @@ -341,7 +341,7 @@ void LatteDecompiler_emitAttributeDecodeMSL(LatteDecompilerShader* shaderContext if ((attrib->offset & 3) == 2 && LatteGPUState.glVendor == GLVENDOR_AMD && g_renderer->GetType() == RendererAPI::OpenGL) { // AMD workaround - src->addFmt("attrDecoder.xy = attrDataSem{}.zw;" _CRLF, attributeInputIndex); + src->addFmt("attrDecoder.xy = in.attrDataSem{}.zw;" _CRLF, attributeInputIndex); src->add("if( (attrDecoder.x&0x80) != 0 ) attrDecoder.x |= 0xFFFFFF00;" _CRLF); src->add("if( (attrDecoder.y&0x80) != 0 ) attrDecoder.y |= 0xFFFFFF00;" _CRLF); src->add("attrDecoder.x = as_type(max(float(int(attrDecoder.x))/127.0,-1.0));" _CRLF); @@ -350,7 +350,7 @@ void LatteDecompiler_emitAttributeDecodeMSL(LatteDecompilerShader* shaderContext } else { - src->addFmt("attrDecoder.xy = attrDataSem{}.xy;" _CRLF, attributeInputIndex); + src->addFmt("attrDecoder.xy = in.attrDataSem{}.xy;" _CRLF, attributeInputIndex); src->add("if( (attrDecoder.x&0x80) != 0 ) attrDecoder.x |= 0xFFFFFF00;" _CRLF); src->add("if( (attrDecoder.y&0x80) != 0 ) attrDecoder.y |= 0xFFFFFF00;" _CRLF); src->add("attrDecoder.x = as_type(max(float(int(attrDecoder.x))/127.0,-1.0));" _CRLF); @@ -363,22 +363,22 @@ void LatteDecompiler_emitAttributeDecodeMSL(LatteDecompilerShader* shaderContext if ((attrib->offset & 3) == 2 && LatteGPUState.glVendor == GLVENDOR_AMD && g_renderer->GetType() == RendererAPI::OpenGL) { // AMD workaround - src->addFmt("attrDecoder.xyzw = uint4(attrDataSem{}.zw,0,0);" _CRLF, attributeInputIndex); + src->addFmt("attrDecoder.xyzw = uint4(in.attrDataSem{}.zw,0,0);" _CRLF, attributeInputIndex); } else { - src->addFmt("attrDecoder.xyzw = uint4(attrDataSem{}.xy,0,0);" _CRLF, attributeInputIndex); + src->addFmt("attrDecoder.xyzw = uint4(in.attrDataSem{}.xy,0,0);" _CRLF, attributeInputIndex); } } else if( attrib->format == FMT_8 && attrib->nfa == 0 && attrib->isSigned == 0 ) { // seen in Pikmin 3 - src->addFmt("attrDecoder.x = as_type(float(attrDataSem{}.x)/255.0);" _CRLF, attributeInputIndex); + src->addFmt("attrDecoder.x = as_type(float(in.attrDataSem{}.x)/255.0);" _CRLF, attributeInputIndex); src->add("attrDecoder.yzw = uint3(0);" _CRLF); } else if( attrib->format == FMT_8 && attrib->nfa == 1 && attrib->isSigned == 0 ) { - src->addFmt("attrDecoder.xyzw = uint4(attrDataSem{}.x,0,0,0);" _CRLF, attributeInputIndex); + src->addFmt("attrDecoder.xyzw = uint4(in.attrDataSem{}.x,0,0,0);" _CRLF, attributeInputIndex); } else { diff --git a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSLHeader.hpp b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSLHeader.hpp index 39798dc5..45c88631 100644 --- a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSLHeader.hpp +++ b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSLHeader.hpp @@ -9,7 +9,7 @@ namespace LatteDecompiler LatteDecompilerShaderResourceMapping& resourceMapping = decompilerContext->output->resourceMappingGL; auto& uniformOffsets = decompilerContext->output->uniformOffsetsVK; - src->add("struct DefualtUniforms {" _CRLF); + src->add("struct SupportBuffer {" _CRLF); sint32 uniformCurrentOffset = 0; auto shader = decompilerContext->shader; @@ -19,11 +19,11 @@ namespace LatteDecompiler // uniform registers or buffers are accessed statically with predictable offsets // this allows us to remap the used entries into a more compact array if (shaderType == LatteConst::ShaderType::Vertex) - src->addFmt("ivec4 uf_remappedVS[{}];" _CRLF, (sint32)shader->list_remappedUniformEntries.size()); + src->addFmt("int4 remappedVS[{}];" _CRLF, (sint32)shader->list_remappedUniformEntries.size()); else if (shaderType == LatteConst::ShaderType::Pixel) - src->addFmt("ivec4 uf_remappedPS[{}];" _CRLF, (sint32)shader->list_remappedUniformEntries.size()); + src->addFmt("int4 remappedPS[{}];" _CRLF, (sint32)shader->list_remappedUniformEntries.size()); else if (shaderType == LatteConst::ShaderType::Geometry) - src->addFmt("ivec4 uf_remappedGS[{}];" _CRLF, (sint32)shader->list_remappedUniformEntries.size()); + src->addFmt("int4 remappedGS[{}];" _CRLF, (sint32)shader->list_remappedUniformEntries.size()); else debugBreakpoint(); uniformOffsets.offset_remapped = uniformCurrentOffset; @@ -34,11 +34,11 @@ namespace LatteDecompiler uint32 cfileSize = decompilerContext->analyzer.uniformRegisterAccessTracker.DetermineSize(decompilerContext->shaderBaseHash, 256); // full or partial uniform register file has to be present if (shaderType == LatteConst::ShaderType::Vertex) - src->addFmt("ivec4 uf_uniformRegisterVS[{}];" _CRLF, cfileSize); + src->addFmt("int4 uniformRegisterVS[{}];" _CRLF, cfileSize); else if (shaderType == LatteConst::ShaderType::Pixel) - src->addFmt("ivec4 uf_uniformRegisterPS[{}];" _CRLF, cfileSize); + src->addFmt("int4 uniformRegisterPS[{}];" _CRLF, cfileSize); else if (shaderType == LatteConst::ShaderType::Geometry) - src->addFmt("ivec4 uf_uniformRegisterGS[{}];" _CRLF, cfileSize); + src->addFmt("int4 uniformRegisterGS[{}];" _CRLF, cfileSize); uniformOffsets.offset_uniformRegister = uniformCurrentOffset; uniformOffsets.count_uniformRegister = cfileSize; uniformCurrentOffset += 16 * cfileSize; @@ -53,7 +53,7 @@ namespace LatteDecompiler { // aka GX2 special state 0 uniformCurrentOffset = (uniformCurrentOffset + 7)&~7; - src->add("float2 uf_windowSpaceToClipSpaceTransform;" _CRLF); + src->add("float2 windowSpaceToClipSpaceTransform;" _CRLF); uniformOffsets.offset_windowSpaceToClipSpaceTransform = uniformCurrentOffset; uniformCurrentOffset += 8; } @@ -61,7 +61,7 @@ namespace LatteDecompiler if (decompilerContext->shaderType == LatteConst::ShaderType::Pixel && alphaTestEnable) { uniformCurrentOffset = (uniformCurrentOffset + 3)&~3; - src->add("float uf_alphaTestRef;" _CRLF); + src->add("float alphaTestRef;" _CRLF); uniformOffsets.offset_alphaTestRef = uniformCurrentOffset; uniformCurrentOffset += 4; } @@ -71,16 +71,16 @@ namespace LatteDecompiler decompilerContext->shaderType == LatteConst::ShaderType::Geometry) { uniformCurrentOffset = (uniformCurrentOffset + 3)&~3; - src->add("float uf_pointSize;" _CRLF); + src->add("float pointSize;" _CRLF); uniformOffsets.offset_pointSize = uniformCurrentOffset; uniformCurrentOffset += 4; } } - // define uf_fragCoordScale which holds the xy scale for render target resolution vs effective resolution + // define fragCoordScale which holds the xy scale for render target resolution vs effective resolution if (shader->shaderType == LatteConst::ShaderType::Pixel) { uniformCurrentOffset = (uniformCurrentOffset + 7)&~7; - src->add("vec2 uf_fragCoordScale;" _CRLF); + src->add("float2 fragCoordScale;" _CRLF); uniformOffsets.offset_fragCoordScale = uniformCurrentOffset; uniformCurrentOffset += 8; } @@ -90,30 +90,30 @@ namespace LatteDecompiler if (decompilerContext->analyzer.texUnitUsesTexelCoordinates.test(t) == false) continue; uniformCurrentOffset = (uniformCurrentOffset + 7) & ~7; - src->addFmt("vec2 uf_tex{}Scale;" _CRLF, t); + src->addFmt("float2 tex{}Scale;" _CRLF, t); uniformOffsets.offset_texScale[t] = uniformCurrentOffset; uniformCurrentOffset += 8; } - // define uf_verticesPerInstance + uf_streamoutBufferBaseX + // define verticesPerInstance + streamoutBufferBaseX if (decompilerContext->analyzer.useSSBOForStreamout && (shader->shaderType == LatteConst::ShaderType::Vertex && decompilerContext->options->usesGeometryShader == false) || (shader->shaderType == LatteConst::ShaderType::Geometry) ) { - src->add("int uf_verticesPerInstance;" _CRLF); + src->add("int verticesPerInstance;" _CRLF); uniformOffsets.offset_verticesPerInstance = uniformCurrentOffset; uniformCurrentOffset += 4; for (uint32 i = 0; i < LATTE_NUM_STREAMOUT_BUFFER; i++) { if (decompilerContext->output->streamoutBufferWriteMask[i]) { - src->addFmt("int uf_streamoutBufferBase{};" _CRLF, i); + src->addFmt("int streamoutBufferBase{};" _CRLF, i); uniformOffsets.offset_streamoutBufferBase[i] = uniformCurrentOffset; uniformCurrentOffset += 4; } } } - src->add("}" _CRLF _CRLF); + src->add("};" _CRLF _CRLF); uniformOffsets.offset_endOfBlock = uniformCurrentOffset; } @@ -173,7 +173,7 @@ namespace LatteDecompiler cemu_assert_debug(decompilerContext->output->resourceMappingVK.attributeMapping[i] >= 0); cemu_assert_debug(decompilerContext->output->resourceMappingGL.attributeMapping[i] == decompilerContext->output->resourceMappingVK.attributeMapping[i]); - src->addFmt("ATTR_LAYOUT({}, {}) in uvec4 attrDataSem{};" _CRLF, (sint32)decompilerContext->output->resourceMappingVK.setIndex, (sint32)decompilerContext->output->resourceMappingVK.attributeMapping[i], i); + src->addFmt("uint4 attrDataSem{} [[attribute({})]];" _CRLF, i, (sint32)decompilerContext->output->resourceMappingVK.attributeMapping[i]); } } src->add("};" _CRLF _CRLF); @@ -367,7 +367,17 @@ namespace LatteDecompiler break; } - src->add(" in [[stage_in]], DefaultVariables defaultVars [[buffer(29)]]"); + src->add(" in [[stage_in]], constant SupportBuffer& supportBuffer [[buffer(29)]]"); + switch (decompilerContext->shaderType) + { + case LatteConst::ShaderType::Vertex: + src->add(", uint vid [[vertex_id]]"); + src->add(", uint iid [[instance_id]]"); + break; + case LatteConst::ShaderType::Pixel: + src->add(", bool frontFacing [[front_facing]]"); + break; + } // uniform buffers _emitUniformBufferDefinitions(decompilerContext); // textures diff --git a/src/Cafe/HW/Latte/Renderer/Metal/LatteTextureMtl.cpp b/src/Cafe/HW/Latte/Renderer/Metal/LatteTextureMtl.cpp index 1b140590..bf6cab24 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/LatteTextureMtl.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/LatteTextureMtl.cpp @@ -85,5 +85,5 @@ LatteTextureView* LatteTextureMtl::CreateView(Latte::E_DIM dim, Latte::E_GX2SURF void LatteTextureMtl::AllocateOnHost() { - cemuLog_logDebug(LogType::Force, "not implemented"); + cemuLog_log(LogType::MetalLogging, "not implemented"); } diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp index fa3b03f2..a5b7e3a6 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp @@ -34,19 +34,19 @@ void MetalRenderer::Initialize() void MetalRenderer::Shutdown() { - cemuLog_logDebug(LogType::Force, "not implemented"); + cemuLog_log(LogType::MetalLogging, "not implemented"); } bool MetalRenderer::IsPadWindowActive() { - cemuLog_logDebug(LogType::Force, "not implemented"); + cemuLog_log(LogType::MetalLogging, "not implemented"); return false; } bool MetalRenderer::GetVRAMInfo(int& usageInMB, int& totalInMB) const { - cemuLog_logDebug(LogType::Force, "not implemented"); + cemuLog_log(LogType::MetalLogging, "not implemented"); usageInMB = 1024; totalInMB = 1024; @@ -56,12 +56,12 @@ bool MetalRenderer::GetVRAMInfo(int& usageInMB, int& totalInMB) const void MetalRenderer::ClearColorbuffer(bool padView) { - cemuLog_logDebug(LogType::Force, "not implemented"); + cemuLog_log(LogType::MetalLogging, "not implemented"); } void MetalRenderer::DrawEmptyFrame(bool mainWindow) { - cemuLog_logDebug(LogType::Force, "not implemented"); + cemuLog_log(LogType::MetalLogging, "not implemented"); } void MetalRenderer::SwapBuffers(bool swapTV, bool swapDRC) @@ -83,7 +83,7 @@ void MetalRenderer::DrawBackbufferQuad(LatteTextureView* texView, RendererOutput sint32 imageX, sint32 imageY, sint32 imageWidth, sint32 imageHeight, bool padView, bool clearBackground) { - cemuLog_logDebug(LogType::Force, "not implemented"); + cemuLog_log(LogType::MetalLogging, "not implemented"); } bool MetalRenderer::BeginFrame(bool mainWindow) @@ -96,68 +96,68 @@ bool MetalRenderer::BeginFrame(bool mainWindow) void MetalRenderer::Flush(bool waitIdle) { - cemuLog_logDebug(LogType::Force, "not implemented"); + cemuLog_log(LogType::MetalLogging, "not implemented"); } void MetalRenderer::NotifyLatteCommandProcessorIdle() { - cemuLog_logDebug(LogType::Force, "not implemented"); + cemuLog_log(LogType::MetalLogging, "not implemented"); } void MetalRenderer::AppendOverlayDebugInfo() { - cemuLog_logDebug(LogType::Force, "not implemented"); + cemuLog_log(LogType::MetalLogging, "not implemented"); } void MetalRenderer::renderTarget_setViewport(float x, float y, float width, float height, float nearZ, float farZ, bool halfZ) { - cemuLog_logDebug(LogType::Force, "not implemented"); + cemuLog_log(LogType::MetalLogging, "not implemented"); } void MetalRenderer::renderTarget_setScissor(sint32 scissorX, sint32 scissorY, sint32 scissorWidth, sint32 scissorHeight) { - cemuLog_logDebug(LogType::Force, "not implemented"); + cemuLog_log(LogType::MetalLogging, "not implemented"); } LatteCachedFBO* MetalRenderer::rendertarget_createCachedFBO(uint64 key) { - cemuLog_logDebug(LogType::Force, "not implemented"); + cemuLog_log(LogType::MetalLogging, "not implemented"); return nullptr; } void MetalRenderer::rendertarget_deleteCachedFBO(LatteCachedFBO* fbo) { - cemuLog_logDebug(LogType::Force, "not implemented"); + cemuLog_log(LogType::MetalLogging, "not implemented"); } void MetalRenderer::rendertarget_bindFramebufferObject(LatteCachedFBO* cfbo) { - cemuLog_logDebug(LogType::Force, "not implemented"); + cemuLog_log(LogType::MetalLogging, "not implemented"); } void* MetalRenderer::texture_acquireTextureUploadBuffer(uint32 size) { - cemuLog_logDebug(LogType::Force, "not implemented"); + cemuLog_log(LogType::MetalLogging, "not implemented"); return nullptr; } void MetalRenderer::texture_releaseTextureUploadBuffer(uint8* mem) { - cemuLog_logDebug(LogType::Force, "not implemented"); + cemuLog_log(LogType::MetalLogging, "not implemented"); } TextureDecoder* MetalRenderer::texture_chooseDecodedFormat(Latte::E_GX2SURFFMT format, bool isDepth, Latte::E_DIM dim, uint32 width, uint32 height) { - cemuLog_logDebug(LogType::Force, "not implemented"); + cemuLog_log(LogType::MetalLogging, "not implemented"); return nullptr; } void MetalRenderer::texture_clearSlice(LatteTexture* hostTexture, sint32 sliceIndex, sint32 mipIndex) { - cemuLog_logDebug(LogType::Force, "not implemented"); + cemuLog_log(LogType::MetalLogging, "not implemented"); } void MetalRenderer::texture_loadSlice(LatteTexture* hostTexture, sint32 width, sint32 height, sint32 depth, void* pixelData, sint32 sliceIndex, sint32 mipIndex, uint32 compressedImageSize) @@ -171,12 +171,12 @@ void MetalRenderer::texture_loadSlice(LatteTexture* hostTexture, sint32 width, s void MetalRenderer::texture_clearColorSlice(LatteTexture* hostTexture, sint32 sliceIndex, sint32 mipIndex, float r, float g, float b, float a) { - cemuLog_logDebug(LogType::Force, "not implemented"); + cemuLog_log(LogType::MetalLogging, "not implemented"); } void MetalRenderer::texture_clearDepthSlice(LatteTexture* hostTexture, uint32 sliceIndex, sint32 mipIndex, bool clearDepth, bool clearStencil, float depthValue, uint32 stencilValue) { - cemuLog_logDebug(LogType::Force, "not implemented"); + cemuLog_log(LogType::MetalLogging, "not implemented"); } LatteTexture* MetalRenderer::texture_createTextureEx(Latte::E_DIM dim, MPTR physAddress, MPTR physMipAddress, Latte::E_GX2SURFFMT format, uint32 width, uint32 height, uint32 depth, uint32 pitch, uint32 mipLevels, uint32 swizzle, Latte::E_HWTILEMODE tileMode, bool isDepth) @@ -186,102 +186,101 @@ LatteTexture* MetalRenderer::texture_createTextureEx(Latte::E_DIM dim, MPTR phys void MetalRenderer::texture_setLatteTexture(LatteTextureView* textureView, uint32 textureUnit) { - cemuLog_logDebug(LogType::Force, "not implemented"); + cemuLog_log(LogType::MetalLogging, "not implemented"); } void MetalRenderer::texture_copyImageSubData(LatteTexture* src, sint32 srcMip, sint32 effectiveSrcX, sint32 effectiveSrcY, sint32 srcSlice, LatteTexture* dst, sint32 dstMip, sint32 effectiveDstX, sint32 effectiveDstY, sint32 dstSlice, sint32 effectiveCopyWidth, sint32 effectiveCopyHeight, sint32 srcDepth) { - cemuLog_logDebug(LogType::Force, "not implemented"); + cemuLog_log(LogType::MetalLogging, "not implemented"); } LatteTextureReadbackInfo* MetalRenderer::texture_createReadback(LatteTextureView* textureView) { - cemuLog_logDebug(LogType::Force, "not implemented"); + cemuLog_log(LogType::MetalLogging, "not implemented"); return nullptr; } void MetalRenderer::surfaceCopy_copySurfaceWithFormatConversion(LatteTexture* sourceTexture, sint32 srcMip, sint32 srcSlice, LatteTexture* destinationTexture, sint32 dstMip, sint32 dstSlice, sint32 width, sint32 height) { - cemuLog_logDebug(LogType::Force, "not implemented"); + cemuLog_log(LogType::MetalLogging, "not implemented"); } void MetalRenderer::bufferCache_init(const sint32 bufferSize) { - cemuLog_logDebug(LogType::Force, "not implemented"); + cemuLog_log(LogType::MetalLogging, "not implemented"); } void MetalRenderer::bufferCache_upload(uint8* buffer, sint32 size, uint32 bufferOffset) { - cemuLog_logDebug(LogType::Force, "not implemented"); + cemuLog_log(LogType::MetalLogging, "not implemented"); } void MetalRenderer::bufferCache_copy(uint32 srcOffset, uint32 dstOffset, uint32 size) { - cemuLog_logDebug(LogType::Force, "not implemented"); + cemuLog_log(LogType::MetalLogging, "not implemented"); } void MetalRenderer::bufferCache_copyStreamoutToMainBuffer(uint32 srcOffset, uint32 dstOffset, uint32 size) { - cemuLog_logDebug(LogType::Force, "not implemented"); + cemuLog_log(LogType::MetalLogging, "not implemented"); } void MetalRenderer::buffer_bindVertexBuffer(uint32 bufferIndex, uint32 offset, uint32 size) { - cemuLog_logDebug(LogType::Force, "not implemented"); + cemuLog_log(LogType::MetalLogging, "not implemented"); } void MetalRenderer::buffer_bindUniformBuffer(LatteConst::ShaderType shaderType, uint32 bufferIndex, uint32 offset, uint32 size) { - cemuLog_logDebug(LogType::Force, "not implemented"); + cemuLog_log(LogType::MetalLogging, "not implemented"); } RendererShader* MetalRenderer::shader_create(RendererShader::ShaderType type, uint64 baseHash, uint64 auxHash, const std::string& source, bool isGameShader, bool isGfxPackShader) { - //return new RendererShaderMtl(this, type, baseHash, auxHash, isGameShader, isGfxPackShader, source); - return nullptr; + return new RendererShaderMtl(this, type, baseHash, auxHash, isGameShader, isGfxPackShader, source); } void MetalRenderer::streamout_setupXfbBuffer(uint32 bufferIndex, sint32 ringBufferOffset, uint32 rangeAddr, uint32 rangeSize) { - cemuLog_logDebug(LogType::Force, "not implemented"); + cemuLog_log(LogType::MetalLogging, "not implemented"); } void MetalRenderer::streamout_begin() { - cemuLog_logDebug(LogType::Force, "not implemented"); + cemuLog_log(LogType::MetalLogging, "not implemented"); } void MetalRenderer::streamout_rendererFinishDrawcall() { - cemuLog_logDebug(LogType::Force, "not implemented"); + cemuLog_log(LogType::MetalLogging, "not implemented"); } void MetalRenderer::draw_beginSequence() { - cemuLog_logDebug(LogType::Force, "not implemented"); + cemuLog_log(LogType::MetalLogging, "not implemented"); LatteSHRC_UpdateActiveShaders(); } void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32 instanceCount, uint32 count, MPTR indexDataMPTR, Latte::LATTE_VGT_DMA_INDEX_TYPE::E_INDEX_TYPE indexType, bool isFirst) { - cemuLog_logDebug(LogType::Force, "not implemented"); + cemuLog_log(LogType::MetalLogging, "not implemented"); } void MetalRenderer::draw_endSequence() { - cemuLog_logDebug(LogType::Force, "not implemented"); + cemuLog_log(LogType::MetalLogging, "not implemented"); } void* MetalRenderer::indexData_reserveIndexMemory(uint32 size, uint32& offset, uint32& bufferIndex) { - cemuLog_logDebug(LogType::Force, "not implemented"); + cemuLog_log(LogType::MetalLogging, "not implemented"); return nullptr; } void MetalRenderer::indexData_uploadIndexMemory(uint32 offset, uint32 size) { - cemuLog_logDebug(LogType::Force, "not implemented"); + cemuLog_log(LogType::MetalLogging, "not implemented"); } diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h index 49e11be9..d43cc2be 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h @@ -39,7 +39,7 @@ public: void SwapBuffers(bool swapTV, bool swapDRC) override; void HandleScreenshotRequest(LatteTextureView* texView, bool padView) override { - cemuLog_logDebug(LogType::Force, "Screenshots are not yet supported on Metal"); + cemuLog_log(LogType::MetalLogging, "Screenshots are not yet supported on Metal"); } void DrawBackbufferQuad(LatteTextureView* texView, RendererOutputShader* shader, bool useLinearTexFilter, @@ -53,27 +53,27 @@ public: // imgui bool ImguiBegin(bool mainWindow) override { - cemuLog_logDebug(LogType::Force, "Imgui is not yet supported on Metal"); + cemuLog_log(LogType::MetalLogging, "Imgui is not yet supported on Metal"); return false; }; void ImguiEnd() override { - cemuLog_logDebug(LogType::Force, "Imgui is not yet supported on Metal"); + cemuLog_log(LogType::MetalLogging, "Imgui is not yet supported on Metal"); }; ImTextureID GenerateTexture(const std::vector& data, const Vector2i& size) override { - cemuLog_logDebug(LogType::Force, "Imgui is not yet supported on Metal"); + cemuLog_log(LogType::MetalLogging, "Imgui is not yet supported on Metal"); return nullptr; }; void DeleteTexture(ImTextureID id) override { - cemuLog_logDebug(LogType::Force, "Imgui is not yet supported on Metal"); + cemuLog_log(LogType::MetalLogging, "Imgui is not yet supported on Metal"); }; void DeleteFontTextures() override { - cemuLog_logDebug(LogType::Force, "Imgui is not yet supported on Metal"); + cemuLog_log(LogType::MetalLogging, "Imgui is not yet supported on Metal"); }; void AppendOverlayDebugInfo() override; @@ -135,21 +135,21 @@ public: // occlusion queries LatteQueryObject* occlusionQuery_create() override { - cemuLog_logDebug(LogType::Force, "Occlusion queries are not yet supported on Metal"); + cemuLog_log(LogType::MetalLogging, "Occlusion queries are not yet supported on Metal"); return nullptr; } void occlusionQuery_destroy(LatteQueryObject* queryObj) override { - cemuLog_logDebug(LogType::Force, "Occlusion queries are not yet supported on Metal"); + cemuLog_log(LogType::MetalLogging, "Occlusion queries are not yet supported on Metal"); } void occlusionQuery_flush() override { - cemuLog_logDebug(LogType::Force, "Occlusion queries are not yet supported on Metal"); + cemuLog_log(LogType::MetalLogging, "Occlusion queries are not yet supported on Metal"); } void occlusionQuery_updateState() override { - cemuLog_logDebug(LogType::Force, "Occlusion queries are not yet supported on Metal"); + cemuLog_log(LogType::MetalLogging, "Occlusion queries are not yet supported on Metal"); } diff --git a/src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.cpp b/src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.cpp index 50033d5c..28b38612 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.cpp @@ -9,11 +9,12 @@ RendererShaderMtl::RendererShaderMtl(MetalRenderer* mtlRenderer, ShaderType type MTL::Library* library = mtlRenderer->GetDevice()->newLibrary(NS::String::string(mslCode.c_str(), NS::ASCIIStringEncoding), nullptr, &error); if (error) { - cemuLog_log(LogType::MetalLogging, "Failed to create library (error: {}) -> source:\n{}", error->localizedDescription()->utf8String(), mslCode.c_str()); + printf("Failed to create library (error: %s) -> source:\n%s", error->localizedDescription()->utf8String(), mslCode.c_str()); error->release(); return; } m_function = library->newFunction(NS::String::string("main0", NS::ASCIIStringEncoding)); + library->release(); } RendererShaderMtl::~RendererShaderMtl() diff --git a/src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.h b/src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.h index 030bbff0..e440d4dc 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.h +++ b/src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.h @@ -29,10 +29,21 @@ public: return m_function; } - // TODO: implement these - sint32 GetUniformLocation(const char* name) override { return 0; } - void SetUniform2fv(sint32 location, void* data, sint32 count) override {} - void SetUniform4iv(sint32 location, void* data, sint32 count) override {} + sint32 GetUniformLocation(const char* name) override + { + cemu_assert_suspicious(); + return 0; + } + + void SetUniform2fv(sint32 location, void* data, sint32 count) override + { + cemu_assert_suspicious(); + } + + void SetUniform4iv(sint32 location, void* data, sint32 count) override + { + cemu_assert_suspicious(); + } // TODO: implement this void PreponeCompilation(bool isRenderThread) override {}