fix: shader errors and shader crashes

This commit is contained in:
Samuliak 2024-07-26 18:56:30 +02:00
parent aac9b123a5
commit af3ce80b7c
10 changed files with 252 additions and 302 deletions

View File

@ -26,7 +26,7 @@ bool gxShader_checkIfSuccessfullyLinked(GLuint glProgram)
void LatteShader_prepareSeparableUniforms(LatteDecompilerShader* shader)
{
if (g_renderer->GetType() == RendererAPI::Vulkan)
if (g_renderer->GetType() != RendererAPI::OpenGL)
return;
auto shaderGL = (RendererShaderGL*)shader->shader;

View File

@ -628,11 +628,11 @@ static void _emitUniformAccessCode(LatteDecompilerShaderContext* shaderContext,
cemu_assert_debug(remappedUniformEntry);
_emitTypeConversionPrefixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_SIGNED_INT, requiredType);
if(shaderContext->shader->shaderType == LatteConst::ShaderType::Vertex )
src->addFmt("uf_remappedVS[{}]", remappedUniformEntry->mappedIndex);
src->addFmt("supportBuffer.remappedVS[{}]", remappedUniformEntry->mappedIndex);
else if(shaderContext->shader->shaderType == LatteConst::ShaderType::Pixel )
src->addFmt("uf_remappedPS[{}]", remappedUniformEntry->mappedIndex);
src->addFmt("supportBuffer.remappedPS[{}]", remappedUniformEntry->mappedIndex);
else if(shaderContext->shader->shaderType == LatteConst::ShaderType::Geometry )
src->addFmt("uf_remappedGS[{}]", remappedUniformEntry->mappedIndex);
src->addFmt("supportBuffer.remappedGS[{}]", remappedUniformEntry->mappedIndex);
else
debugBreakpoint();
_appendChannelAccess(src, aluInstruction->sourceOperand[operandIndex].chan);
@ -643,11 +643,11 @@ static void _emitUniformAccessCode(LatteDecompilerShaderContext* shaderContext,
// uniform registers are accessed with unpredictable (dynamic) offset
_emitTypeConversionPrefixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_SIGNED_INT, requiredType);
if(shaderContext->shader->shaderType == LatteConst::ShaderType::Vertex )
src->add("uf_uniformRegisterVS[");
src->add("supportBuffer.uniformRegisterVS[");
else if (shaderContext->shader->shaderType == LatteConst::ShaderType::Pixel)
src->add("uf_uniformRegisterPS[");
src->add("supportBuffer.uniformRegisterPS[");
else if(shaderContext->shader->shaderType == LatteConst::ShaderType::Geometry )
src->add("uf_uniformRegisterGS[");
src->add("supportBuffer.uniformRegisterGS[");
else
debugBreakpoint();
_emitUniformAccessIndexCode(shaderContext, aluInstruction, operandIndex);
@ -802,7 +802,7 @@ static void _emitOperandInputCode(LatteDecompilerShaderContext* shaderContext, L
if( currentRegisterElementType == LATTE_DECOMPILER_DTYPE_SIGNED_INT )
{
// need to convert (not cast) from int bits to float
src->add("intBitsToFloat(");
src->add("as_type<float>(");
}
else if( currentRegisterElementType == LATTE_DECOMPILER_DTYPE_FLOAT )
{
@ -872,7 +872,7 @@ static void _emitOperandInputCode(LatteDecompilerShaderContext* shaderContext, L
src->add(_FormatFloatAsConstant(*(float*)&constVal));
}
else
src->addFmt("intBitsToFloat(0x{:08x})", constVal);
src->addFmt("as_type<float>(0x{:08x})", constVal);
}
}
else if( GPU7_ALU_SRC_IS_CFILE(aluInstruction->sourceOperand[operandIndex].sel) )
@ -1026,7 +1026,7 @@ static void _emitALUOP2InstructionCode(LatteDecompilerShaderContext* shaderConte
src->add(" = ");
if( outputType != LATTE_DECOMPILER_DTYPE_SIGNED_INT )
debugBreakpoint(); // todo
src->add("floatBitsToInt(tempResultf)");
src->add("as_type<int>(tempResultf)");
src->add(";" _CRLF);
}
else if( aluInstruction->opcode == ALU_OP2_INST_MOVA_INT )
@ -1123,9 +1123,9 @@ static void _emitALUOP2InstructionCode(LatteDecompilerShaderContext* shaderConte
_emitOperandInputCode(shaderContext, aluInstruction, 0, LATTE_DECOMPILER_DTYPE_FLOAT);
src->add(");" _CRLF);
// INF becomes 0.0
src->add("if( isinf(tempResultf) == true && (floatBitsToInt(tempResultf)&0x80000000) == 0 ) tempResultf = 0.0;" _CRLF);
src->add("if( isinf(tempResultf) == true && (as_type<int>(tempResultf)&0x80000000) == 0 ) tempResultf = 0.0;" _CRLF);
// -INF becomes -0.0
src->add("else if( isinf(tempResultf) == true && (floatBitsToInt(tempResultf)&0x80000000) != 0 ) tempResultf = -0.0;" _CRLF);
src->add("else if( isinf(tempResultf) == true && (as_type<int>(tempResultf)&0x80000000) != 0 ) tempResultf = -0.0;" _CRLF);
// assign result to output
_emitInstructionOutputVariableName(shaderContext, aluInstruction);
src->add(" = ");
@ -1145,14 +1145,14 @@ static void _emitALUOP2InstructionCode(LatteDecompilerShaderContext* shaderConte
if (aluInstruction->opcode == ALU_OP2_INST_RECIPSQRT_CLAMPED)
{
// note: if( -INF < 0.0 ) does not resolve to true
src->add("if( isinf(tempResultf) == true && (floatBitsToInt(tempResultf)&0x80000000) != 0 ) tempResultf = -3.40282347E+38F;" _CRLF);
src->add("else if( isinf(tempResultf) == true && (floatBitsToInt(tempResultf)&0x80000000) == 0 ) tempResultf = 3.40282347E+38F;" _CRLF);
src->add("if( isinf(tempResultf) == true && (as_type<int>(tempResultf)&0x80000000) != 0 ) tempResultf = -3.40282347E+38F;" _CRLF);
src->add("else if( isinf(tempResultf) == true && (as_type<int>(tempResultf)&0x80000000) == 0 ) tempResultf = 3.40282347E+38F;" _CRLF);
}
else if (aluInstruction->opcode == ALU_OP2_INST_RECIPSQRT_FF)
{
// untested (BotW bombs)
src->add("if( isinf(tempResultf) == true && (floatBitsToInt(tempResultf)&0x80000000) != 0 ) tempResultf = -0.0;" _CRLF);
src->add("else if( isinf(tempResultf) == true && (floatBitsToInt(tempResultf)&0x80000000) == 0 ) tempResultf = 0.0;" _CRLF);
src->add("if( isinf(tempResultf) == true && (as_type<int>(tempResultf)&0x80000000) != 0 ) tempResultf = -0.0;" _CRLF);
src->add("else if( isinf(tempResultf) == true && (as_type<int>(tempResultf)&0x80000000) == 0 ) tempResultf = 0.0;" _CRLF);
}
// assign result to output
_emitInstructionOutputVariableName(shaderContext, aluInstruction);
@ -1704,8 +1704,8 @@ static void _emitALUReductionInstructionCode(LatteDecompilerShaderContext* shade
src->add(" = ");
_emitTypeConversionPrefixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_FLOAT, outputType);
// dot(vec4(op0),vec4(op1))
src->add("dot(vec4(");
// dot(float4(op0),float4(op1))
src->add("dot(float4(");
_emitOperandInputCode(shaderContext, aluRedcInstruction[0], 0, LATTE_DECOMPILER_DTYPE_FLOAT);
src->add(",");
_emitOperandInputCode(shaderContext, aluRedcInstruction[1], 0, LATTE_DECOMPILER_DTYPE_FLOAT);
@ -1713,7 +1713,7 @@ static void _emitALUReductionInstructionCode(LatteDecompilerShaderContext* shade
_emitOperandInputCode(shaderContext, aluRedcInstruction[2], 0, LATTE_DECOMPILER_DTYPE_FLOAT);
src->add(",");
_emitOperandInputCode(shaderContext, aluRedcInstruction[3], 0, LATTE_DECOMPILER_DTYPE_FLOAT);
src->add("),vec4(");
src->add("),float4(");
_emitOperandInputCode(shaderContext, aluRedcInstruction[0], 1, LATTE_DECOMPILER_DTYPE_FLOAT);
src->add(",");
_emitOperandInputCode(shaderContext, aluRedcInstruction[1], 1, LATTE_DECOMPILER_DTYPE_FLOAT);
@ -1730,7 +1730,7 @@ static void _emitALUReductionInstructionCode(LatteDecompilerShaderContext* shade
{
/*
* How the CUBE instruction works (guessed mostly, based on DirectX/OpenGL spec):
Input: vec4, 3d direction vector (can be unnormalized) + w component (which can be ignored, since it only scales the vector but does not affect the direction)
Input: float4, 3d direction vector (can be unnormalized) + w component (which can be ignored, since it only scales the vector but does not affect the direction)
First we figure out the major axis (closest axis-aligned vector). There are six possible vectors:
+rx 0
@ -1758,7 +1758,7 @@ static void _emitALUReductionInstructionCode(LatteDecompilerShaderContext* shade
sint32 outputType;
src->add("redcCUBE(");
src->add("vec4(");
src->add("float4(");
_emitOperandInputCode(shaderContext, aluRedcInstruction[0], 0, LATTE_DECOMPILER_DTYPE_FLOAT);
src->add(",");
_emitOperandInputCode(shaderContext, aluRedcInstruction[1], 0, LATTE_DECOMPILER_DTYPE_FLOAT);
@ -1767,7 +1767,7 @@ static void _emitALUReductionInstructionCode(LatteDecompilerShaderContext* shade
src->add(",");
_emitOperandInputCode(shaderContext, aluRedcInstruction[3], 0, LATTE_DECOMPILER_DTYPE_FLOAT);
src->add("),");
src->add("vec4(");
src->add("float4(");
_emitOperandInputCode(shaderContext, aluRedcInstruction[0], 1, LATTE_DECOMPILER_DTYPE_FLOAT);
src->add(",");
_emitOperandInputCode(shaderContext, aluRedcInstruction[1], 1, LATTE_DECOMPILER_DTYPE_FLOAT);
@ -1887,12 +1887,12 @@ bool _isPVUsedInNextGroup(LatteDecompilerCFInstruction* cfInstruction, sint32 st
}
*/
static void _emitVec3(LatteDecompilerShaderContext* shaderContext, uint32 dataType, LatteDecompilerALUInstruction* aluInst0, sint32 opIdx0, LatteDecompilerALUInstruction* aluInst1, sint32 opIdx1, LatteDecompilerALUInstruction* aluInst2, sint32 opIdx2)
static void _emitFloat3(LatteDecompilerShaderContext* shaderContext, uint32 dataType, LatteDecompilerALUInstruction* aluInst0, sint32 opIdx0, LatteDecompilerALUInstruction* aluInst1, sint32 opIdx1, LatteDecompilerALUInstruction* aluInst2, sint32 opIdx2)
{
StringBuf* src = shaderContext->shaderSource;
if (dataType == LATTE_DECOMPILER_DTYPE_FLOAT)
{
src->add("vec3(");
src->add("float3(");
_emitOperandInputCode(shaderContext, aluInst0, opIdx0, LATTE_DECOMPILER_DTYPE_FLOAT);
src->add(",");
_emitOperandInputCode(shaderContext, aluInst1, opIdx1, LATTE_DECOMPILER_DTYPE_FLOAT);
@ -1902,7 +1902,7 @@ static void _emitVec3(LatteDecompilerShaderContext* shaderContext, uint32 dataTy
}
else if (dataType == LATTE_DECOMPILER_DTYPE_SIGNED_INT)
{
src->add("ivec3(");
src->add("int3(");
_emitOperandInputCode(shaderContext, aluInst0, opIdx0, LATTE_DECOMPILER_DTYPE_SIGNED_INT);
src->add(",");
_emitOperandInputCode(shaderContext, aluInst1, opIdx1, LATTE_DECOMPILER_DTYPE_SIGNED_INT);
@ -2002,7 +2002,7 @@ static void _emitALUClauseCode(LatteDecompilerShaderContext* shaderContext, Latt
{
_emitInstructionOutputVariableName(shaderContext, &aluInstruction);
src->add(" = ");
src->add("floatBitsToInt(intBitsToFloat(");
src->add("as_type<int>(");
_emitInstructionOutputVariableName(shaderContext, &aluInstruction);
src->add(")");
if( aluInstruction.omod == 1 )
@ -2099,9 +2099,9 @@ static void _emitTEXSampleCoordInputComponent(LatteDecompilerShaderContext* shad
if(interpretSrcAsType == LATTE_DECOMPILER_DTYPE_SIGNED_INT )
{
if( elementSel == 4 )
src->add("floatBitsToInt(0.0)");
src->add("as_type<int>(0.0)");
else if( elementSel == 5 )
src->add("floatBitsToInt(1.0)");
src->add("as_type<int>(1.0)");
}
else if(interpretSrcAsType == LATTE_DECOMPILER_DTYPE_FLOAT )
{
@ -2116,7 +2116,7 @@ static const char* _texGprAccessElemTable[8] = {"x","y","z","w","_","_","_","_"}
static char* _getTexGPRAccess(LatteDecompilerShaderContext* shaderContext, sint32 gprIndex, uint32 dataType, sint8 selX, sint8 selY, sint8 selZ, sint8 selW, char* tempBuffer)
{
// intBitsToFloat(R{}i.w)
// as_type<float>(R{}i.w)
*tempBuffer = '\0';
uint8 elemCount = (selX > 0 ? 1 : 0) + (selY > 0 ? 1 : 0) + (selZ > 0 ? 1 : 0) + (selW > 0 ? 1 : 0);
if (shaderContext->typeTracker.defaultDataType == LATTE_DECOMPILER_DTYPE_SIGNED_INT)
@ -2124,7 +2124,7 @@ static char* _getTexGPRAccess(LatteDecompilerShaderContext* shaderContext, sint3
if (dataType == LATTE_DECOMPILER_DTYPE_SIGNED_INT)
; // no conversion
else if (dataType == LATTE_DECOMPILER_DTYPE_FLOAT)
strcat(tempBuffer, "intBitsToFloat(");
strcat(tempBuffer, "as_type<float>(");
else
cemu_assert_unimplemented();
strcat(tempBuffer, _getRegisterVarName(shaderContext, gprIndex));
@ -2230,16 +2230,16 @@ static void _emitTEXSampleTextureCode(LatteDecompilerShaderContext* shaderContex
if(numWrittenElements == 1)
src->add(" = int(");
else
shaderContext->shaderSource->addFmt(" = ivec{}(", numWrittenElements);
shaderContext->shaderSource->addFmt(" = int{}(", numWrittenElements);
}
else if (shaderContext->typeTracker.defaultDataType == LATTE_DECOMPILER_DTYPE_FLOAT)
src->add(" = uintBitsToFloat(");
src->add(" = as_type<float>(");
}
else
{
// float samplers
if (shaderContext->typeTracker.defaultDataType == LATTE_DECOMPILER_DTYPE_SIGNED_INT)
src->add(" = floatBitsToInt(");
src->add(" = as_type<int>(");
else if (shaderContext->typeTracker.defaultDataType == LATTE_DECOMPILER_DTYPE_FLOAT)
src->add(" = (");
}
@ -2256,104 +2256,26 @@ static void _emitTEXSampleTextureCode(LatteDecompilerShaderContext* shaderContex
if (numWrittenElements == 1)
shaderContext->shaderSource->add("0.0");
else
shaderContext->shaderSource->addFmt("vec{}(0.0)", numWrittenElements);
shaderContext->shaderSource->addFmt("float{}(0.0)", numWrittenElements);
shaderContext->shaderSource->add(");" _CRLF);
return;
}
if (texOpcode == GPU7_TEX_INST_SAMPLE && (texInstruction->textureFetch.unnormalized[0] && texInstruction->textureFetch.unnormalized[1] && texInstruction->textureFetch.unnormalized[2] && texInstruction->textureFetch.unnormalized[3]) )
src->addFmt("tex{}.", texInstruction->textureFetch.textureIndex);
if ((texOpcode == GPU7_TEX_INST_SAMPLE && (texInstruction->textureFetch.unnormalized[0] && texInstruction->textureFetch.unnormalized[1] && texInstruction->textureFetch.unnormalized[2] && texInstruction->textureFetch.unnormalized[3])) ||
texOpcode == GPU7_TEX_INST_LD)
{
// texture is likely a RECT
if (hasOffset)
cemu_assert_unimplemented();
src->add("texelFetch(");
src->add("read(");
unnormalizationHandled = true;
useTexelCoordinates = true;
}
else if( texOpcode == GPU7_TEX_INST_FETCH4 )
{
if( hasOffset )
cemu_assert_unimplemented();
src->add("textureGather(");
}
else if( texOpcode == GPU7_TEX_INST_LD )
{
if( hasOffset )
cemu_assert_unimplemented();
src->add("texelFetch(");
unnormalizationHandled = true;
useTexelCoordinates = true;
}
else if( texOpcode == GPU7_TEX_INST_SAMPLE_L )
{
// sample with LOD value set in gpr.w (replaces computed LOD value)
if( hasOffset )
src->add("textureLodOffset(");
else
src->add("textureLod(");
}
else if (texOpcode == GPU7_TEX_INST_SAMPLE_LZ)
{
// sample with LOD set to 0.0 (replaces computed LOD value)
if (hasOffset)
src->add("textureLodOffset(");
else
src->add("textureLod(");
}
else if (texOpcode == GPU7_TEX_INST_SAMPLE_LB)
{
// sample with LOD biased
// note: AMD doc says LOD bias is calculated from instruction LOD_BIAS field. But it appears that LOD bias is taken from input register. Might actually be both?
if (hasOffset)
src->add("textureOffset(");
else
src->add("texture(");
}
else if (texOpcode == GPU7_TEX_INST_SAMPLE)
{
if (hasOffset)
src->add("textureOffset(");
else
src->add("texture(");
}
else if (texOpcode == GPU7_TEX_INST_SAMPLE_C_L)
{
// sample with LOD value set in gpr.w (replaces computed LOD value)
if (hasOffset)
src->add("textureLodOffset(");
else
src->add("textureLod(");
}
else if (texOpcode == GPU7_TEX_INST_SAMPLE_C_LZ)
{
// sample with LOD set to 0.0 (replaces computed LOD value)
if (hasOffset)
src->add("textureLodOffset(");
else
src->add("textureLod(");
}
else if (texOpcode == GPU7_TEX_INST_SAMPLE_C)
{
if (hasOffset)
src->add("textureOffset(");
else
src->add("texture(");
}
else if (texOpcode == GPU7_TEX_INST_SAMPLE_G)
{
if (hasOffset)
cemu_assert_unimplemented();
src->add("textureGrad(");
}
else
{
if( hasOffset )
cemu_assert_unimplemented();
cemu_assert_unimplemented();
src->add("texture(");
src->addFmt("sample(samplr{}, ", texInstruction->textureFetch.textureIndex);
}
src->addFmt("tex{}, ", texInstruction->textureFetch.textureIndex);
// for textureGather() add shift (todo: depends on rounding mode set in sampler registers?)
if (texOpcode == GPU7_TEX_INST_FETCH4)
@ -2370,7 +2292,7 @@ static void _emitTEXSampleTextureCode(LatteDecompilerShaderContext* shaderContex
// Vulkan uses truncate mode when point sampling (min and mag is both nearest) otherwise it uses rounding
// adding a small fixed bias is enough to avoid vendor-specific cases where small inaccuracies cause the number to get rounded down due to truncation
src->addFmt("vec2(0.0001) + ");
src->addFmt("float2(0.0001) + ");
}
}
@ -2380,15 +2302,15 @@ static void _emitTEXSampleTextureCode(LatteDecompilerShaderContext* shaderContex
// handle integer coordinates for texelFetch
if (texDim == Latte::E_DIM::DIM_2D || texDim == Latte::E_DIM::DIM_2D_MSAA)
{
src->add("ivec2(");
src->add("vec2(");
src->add("int2(");
src->add("float2(");
_emitTEXSampleCoordInputComponent(shaderContext, texInstruction, 0, texCoordDataType);
src->addFmt(", ");
_emitTEXSampleCoordInputComponent(shaderContext, texInstruction, 1, texCoordDataType);
src->addFmt(")*uf_tex{}Scale", texInstruction->textureFetch.textureIndex); // close vec2 and scale
src->addFmt(")*supportBuffer.tex{}Scale", texInstruction->textureFetch.textureIndex); // close float2 and scale
src->add("), 0"); // close ivec2 and lod param
src->add("), 0"); // close int2 and lod param
// todo - lod
}
else if (texDim == Latte::E_DIM::DIM_1D)
@ -2397,7 +2319,7 @@ static void _emitTEXSampleTextureCode(LatteDecompilerShaderContext* shaderContex
src->add("int(");
src->add("float(");
_emitTEXSampleCoordInputComponent(shaderContext, texInstruction, 0, (texOpcode == GPU7_TEX_INST_LD) ? LATTE_DECOMPILER_DTYPE_SIGNED_INT : LATTE_DECOMPILER_DTYPE_FLOAT);
src->addFmt(")*uf_tex{}Scale.x", texInstruction->textureFetch.textureIndex);
src->addFmt(")*supportBuffer.tex{}Scale.x", texInstruction->textureFetch.textureIndex);
src->add("), 0");
// todo - lod
}
@ -2412,8 +2334,8 @@ static void _emitTEXSampleTextureCode(LatteDecompilerShaderContext* shaderContex
// shadow sampler
if (texDim == Latte::E_DIM::DIM_2D_ARRAY)
{
// 3 coords + compare value (as vec4)
src->add("vec4(");
// 3 coords + compare value (as float4)
src->add("float4(");
_emitTEXSampleCoordInputComponent(shaderContext, texInstruction, 0, LATTE_DECOMPILER_DTYPE_FLOAT);
src->add(",");
_emitTEXSampleCoordInputComponent(shaderContext, texInstruction, 1, LATTE_DECOMPILER_DTYPE_FLOAT);
@ -2429,7 +2351,7 @@ static void _emitTEXSampleTextureCode(LatteDecompilerShaderContext* shaderContex
{
debugBreakpoint();
}
src->add("vec4(");
src->add("float4(");
src->addFmt("redcCUBEReverse({},", _getTexGPRAccess(shaderContext, texInstruction->srcGpr, LATTE_DECOMPILER_DTYPE_FLOAT, texInstruction->textureFetch.srcSel[0], texInstruction->textureFetch.srcSel[1], -1, -1, tempBuffer0));
_emitTEXSampleCoordInputComponent(shaderContext, texInstruction, 2, LATTE_DECOMPILER_DTYPE_SIGNED_INT);
src->addFmt(")");
@ -2442,22 +2364,22 @@ static void _emitTEXSampleTextureCode(LatteDecompilerShaderContext* shaderContex
{
debugBreakpoint();
}
src->addFmt("vec3({},0.0,{})", _getTexGPRAccess(shaderContext, texInstruction->srcGpr, LATTE_DECOMPILER_DTYPE_FLOAT, texInstruction->textureFetch.srcSel[0], -1, -1, -1, tempBuffer0), _getTexGPRAccess(shaderContext, texInstruction->srcGpr, LATTE_DECOMPILER_DTYPE_FLOAT, texInstruction->textureFetch.srcSel[3], -1, -1, -1, tempBuffer1));
src->addFmt("float3({},0.0,{})", _getTexGPRAccess(shaderContext, texInstruction->srcGpr, LATTE_DECOMPILER_DTYPE_FLOAT, texInstruction->textureFetch.srcSel[0], -1, -1, -1, tempBuffer0), _getTexGPRAccess(shaderContext, texInstruction->srcGpr, LATTE_DECOMPILER_DTYPE_FLOAT, texInstruction->textureFetch.srcSel[3], -1, -1, -1, tempBuffer1));
}
else
{
// 2 coords + compare value (as vec3)
// 2 coords + compare value (as float3)
if (texInstruction->textureFetch.srcSel[0] >= 4 && texInstruction->textureFetch.srcSel[1] >= 4)
{
debugBreakpoint();
}
src->addFmt("vec3({}, {})", _getTexGPRAccess(shaderContext, texInstruction->srcGpr, LATTE_DECOMPILER_DTYPE_FLOAT, texInstruction->textureFetch.srcSel[0], texInstruction->textureFetch.srcSel[1], -1, -1, tempBuffer0), _getTexGPRAccess(shaderContext, texInstruction->srcGpr, LATTE_DECOMPILER_DTYPE_FLOAT, texInstruction->textureFetch.srcSel[3], -1, -1, -1, tempBuffer1));
src->addFmt("float3({}, {})", _getTexGPRAccess(shaderContext, texInstruction->srcGpr, LATTE_DECOMPILER_DTYPE_FLOAT, texInstruction->textureFetch.srcSel[0], texInstruction->textureFetch.srcSel[1], -1, -1, tempBuffer0), _getTexGPRAccess(shaderContext, texInstruction->srcGpr, LATTE_DECOMPILER_DTYPE_FLOAT, texInstruction->textureFetch.srcSel[3], -1, -1, -1, tempBuffer1));
}
}
else if( texDim == Latte::E_DIM::DIM_3D || texDim == Latte::E_DIM::DIM_2D_ARRAY )
{
// 3 coords
src->add("vec3(");
src->add("float3(");
_emitTEXSampleCoordInputComponent(shaderContext, texInstruction, 0, LATTE_DECOMPILER_DTYPE_FLOAT);
src->add(",");
_emitTEXSampleCoordInputComponent(shaderContext, texInstruction, 1, LATTE_DECOMPILER_DTYPE_FLOAT);
@ -2470,7 +2392,7 @@ static void _emitTEXSampleTextureCode(LatteDecompilerShaderContext* shaderContex
// 2 coords + faceId
cemu_assert_debug(texInstruction->textureFetch.srcSel[0] < 4);
cemu_assert_debug(texInstruction->textureFetch.srcSel[1] < 4);
src->add("vec4(");
src->add("float4(");
src->addFmt("redcCUBEReverse({},", _getTexGPRAccess(shaderContext, texInstruction->srcGpr, LATTE_DECOMPILER_DTYPE_FLOAT, texInstruction->textureFetch.srcSel[0], texInstruction->textureFetch.srcSel[1], -1, -1, tempBuffer0));
_emitTEXSampleCoordInputComponent(shaderContext, texInstruction, 2, LATTE_DECOMPILER_DTYPE_SIGNED_INT);
src->add(")");
@ -2484,14 +2406,14 @@ static void _emitTEXSampleTextureCode(LatteDecompilerShaderContext* shaderContex
else
{
// 2 coords
src->add("vec2(");
src->add("float2(");
_emitTEXSampleCoordInputComponent(shaderContext, texInstruction, 0, LATTE_DECOMPILER_DTYPE_FLOAT);
src->add(",");
_emitTEXSampleCoordInputComponent(shaderContext, texInstruction, 1, LATTE_DECOMPILER_DTYPE_FLOAT);
src->add(")");
// avoid truncate to effectively round downwards on texel edges
if (ActiveSettings::ForceSamplerRoundToPrecision())
src->addFmt("+ vec2(1.0)/vec2(textureSize(tex{}, 0))/512.0", texInstruction->textureFetch.textureIndex);
src->addFmt("+ float2(1.0)/float2(textureSize(tex{}, 0))/512.0", texInstruction->textureFetch.textureIndex);
}
// lod or lod bias parameter
if( texOpcode == GPU7_TEX_INST_SAMPLE_L || texOpcode == GPU7_TEX_INST_SAMPLE_LB || texOpcode == GPU7_TEX_INST_SAMPLE_C_L)
@ -2547,9 +2469,9 @@ static void _emitTEXSampleTextureCode(LatteDecompilerShaderContext* shaderContex
if( offsetComponentCount == 1 )
src->addFmt(",{}", texInstruction->textureFetch.offsetX/2);
else if( offsetComponentCount == 2 )
src->addFmt(",ivec2({},{})", texInstruction->textureFetch.offsetX/2, texInstruction->textureFetch.offsetY/2, texInstruction->textureFetch.offsetZ/2);
src->addFmt(",int2({},{})", texInstruction->textureFetch.offsetX/2, texInstruction->textureFetch.offsetY/2, texInstruction->textureFetch.offsetZ/2);
else if( offsetComponentCount == 3 )
src->addFmt(",ivec3({},{},{})", texInstruction->textureFetch.offsetX/2, texInstruction->textureFetch.offsetY/2, texInstruction->textureFetch.offsetZ/2);
src->addFmt(",int3({},{},{})", texInstruction->textureFetch.offsetX/2, texInstruction->textureFetch.offsetY/2, texInstruction->textureFetch.offsetZ/2);
}
}
// lod bias
@ -2661,17 +2583,17 @@ static void _emitTEXGetTextureResInfoCode(LatteDecompilerShaderContext* shaderCo
auto texDim = shaderContext->shader->textureUnitDim[texInstruction->textureFetch.textureIndex];
if (texDim == Latte::E_DIM::DIM_1D)
src->addFmt(" = ivec4(textureSize(tex{}, 0),1,1,1).", texInstruction->textureFetch.textureIndex);
src->addFmt(" = int4(textureSize(tex{}, 0),1,1,1).", texInstruction->textureFetch.textureIndex);
else if (texDim == Latte::E_DIM::DIM_1D_ARRAY)
src->addFmt(" = ivec4(textureSize(tex{}, 0),1,1).", texInstruction->textureFetch.textureIndex);
src->addFmt(" = int4(textureSize(tex{}, 0),1,1).", texInstruction->textureFetch.textureIndex);
else if (texDim == Latte::E_DIM::DIM_2D || texDim == Latte::E_DIM::DIM_2D_MSAA)
src->addFmt(" = ivec4(textureSize(tex{}, 0),1,1).", texInstruction->textureFetch.textureIndex);
src->addFmt(" = int4(textureSize(tex{}, 0),1,1).", texInstruction->textureFetch.textureIndex);
else if (texDim == Latte::E_DIM::DIM_2D_ARRAY)
src->addFmt(" = ivec4(textureSize(tex{}, 0),1).", texInstruction->textureFetch.textureIndex);
src->addFmt(" = int4(textureSize(tex{}, 0),1).", texInstruction->textureFetch.textureIndex);
else
{
cemu_assert_debug(false);
src->addFmt(" = ivec4(textureSize(tex{}, 0),1,1).", texInstruction->textureFetch.textureIndex);
src->addFmt(" = int4(textureSize(tex{}, 0),1,1).", texInstruction->textureFetch.textureIndex);
}
for(sint32 f=0; f<4; f++)
@ -2725,16 +2647,16 @@ static void _emitTEXGetCompTexLodCode(LatteDecompilerShaderContext* shaderContex
{
// 3 coordinates
if(shaderContext->typeTracker.defaultDataType == LATTE_DECOMPILER_DTYPE_FLOAT)
src->addFmt("vec4(textureQueryLod(tex{}, {}.{}{}{}),0.0,0.0)", texInstruction->textureFetch.textureIndex, _getRegisterVarName(shaderContext, texInstruction->srcGpr), resultElemTable[texInstruction->textureFetch.srcSel[0]], resultElemTable[texInstruction->textureFetch.srcSel[1]], resultElemTable[texInstruction->textureFetch.srcSel[2]]);
src->addFmt("float4(textureQueryLod(tex{}, {}.{}{}{}),0.0,0.0)", texInstruction->textureFetch.textureIndex, _getRegisterVarName(shaderContext, texInstruction->srcGpr), resultElemTable[texInstruction->textureFetch.srcSel[0]], resultElemTable[texInstruction->textureFetch.srcSel[1]], resultElemTable[texInstruction->textureFetch.srcSel[2]]);
else
src->addFmt("vec4(textureQueryLod(tex{}, intBitsToFloat({}.{}{}{})),0.0,0.0)", texInstruction->textureFetch.textureIndex, _getRegisterVarName(shaderContext, texInstruction->srcGpr), resultElemTable[texInstruction->textureFetch.srcSel[0]], resultElemTable[texInstruction->textureFetch.srcSel[1]], resultElemTable[texInstruction->textureFetch.srcSel[2]]);
src->addFmt("float4(textureQueryLod(tex{}, as_type<float>({}.{}{}{})),0.0,0.0)", texInstruction->textureFetch.textureIndex, _getRegisterVarName(shaderContext, texInstruction->srcGpr), resultElemTable[texInstruction->textureFetch.srcSel[0]], resultElemTable[texInstruction->textureFetch.srcSel[1]], resultElemTable[texInstruction->textureFetch.srcSel[2]]);
}
else
{
if (shaderContext->typeTracker.defaultDataType == LATTE_DECOMPILER_DTYPE_FLOAT)
src->addFmt("vec4(textureQueryLod(tex{}, {}.{}{}),0.0,0.0)", texInstruction->textureFetch.textureIndex, _getRegisterVarName(shaderContext, texInstruction->srcGpr), resultElemTable[texInstruction->textureFetch.srcSel[0]], resultElemTable[texInstruction->textureFetch.srcSel[1]]);
src->addFmt("float4(textureQueryLod(tex{}, {}.{}{}),0.0,0.0)", texInstruction->textureFetch.textureIndex, _getRegisterVarName(shaderContext, texInstruction->srcGpr), resultElemTable[texInstruction->textureFetch.srcSel[0]], resultElemTable[texInstruction->textureFetch.srcSel[1]]);
else
src->addFmt("vec4(textureQueryLod(tex{}, intBitsToFloat({}.{}{})),0.0,0.0)", texInstruction->textureFetch.textureIndex, _getRegisterVarName(shaderContext, texInstruction->srcGpr), resultElemTable[texInstruction->textureFetch.srcSel[0]], resultElemTable[texInstruction->textureFetch.srcSel[1]]);
src->addFmt("float4(textureQueryLod(tex{}, as_type<float>({}.{}{})),0.0,0.0)", texInstruction->textureFetch.textureIndex, _getRegisterVarName(shaderContext, texInstruction->srcGpr), resultElemTable[texInstruction->textureFetch.srcSel[0]], resultElemTable[texInstruction->textureFetch.srcSel[1]]);
debugBreakpoint();
}
@ -2768,7 +2690,7 @@ static void _emitTEXSetCubemapIndexCode(LatteDecompilerShaderContext* shaderCont
const char* resultElemTable[4] = {"x","y","z","w"};
if (shaderContext->typeTracker.defaultDataType == LATTE_DECOMPILER_DTYPE_SIGNED_INT)
src->addFmt(" = intBitsToFloat(R{}i.{});" _CRLF, texInstruction->srcGpr, resultElemTable[texInstruction->textureFetch.srcSel[0]]);
src->addFmt(" = as_type<float>(R{}i.{});" _CRLF, texInstruction->srcGpr, resultElemTable[texInstruction->textureFetch.srcSel[0]]);
else if (shaderContext->typeTracker.defaultDataType == LATTE_DECOMPILER_DTYPE_FLOAT)
src->addFmt(" = R{}f.{};" _CRLF, texInstruction->srcGpr, resultElemTable[texInstruction->textureFetch.srcSel[0]]);
else
@ -2942,7 +2864,7 @@ static void _emitTEXVFetchCode(LatteDecompilerShaderContext* shaderContext, Latt
src->add(" = ");
if (shaderContext->typeTracker.defaultDataType == LATTE_DECOMPILER_DTYPE_SIGNED_INT)
src->add("floatBitsToInt(");
src->add("as_type<int>(");
else
src->add("(");
@ -2951,7 +2873,7 @@ static void _emitTEXVFetchCode(LatteDecompilerShaderContext* shaderContext, Latt
if( shaderContext->typeTracker.defaultDataType == LATTE_DECOMPILER_DTYPE_SIGNED_INT )
src->addFmt("{}.{}", _getRegisterVarName(shaderContext, texInstruction->srcGpr), resultElemTable[texInstruction->textureFetch.srcSel[0]]);
else
src->addFmt("floatBitsToInt({}.{})", _getRegisterVarName(shaderContext, texInstruction->srcGpr), resultElemTable[texInstruction->textureFetch.srcSel[0]]);
src->addFmt("as_type<int>({}.{})", _getRegisterVarName(shaderContext, texInstruction->srcGpr), resultElemTable[texInstruction->textureFetch.srcSel[0]]);
src->add("].");
@ -2983,7 +2905,7 @@ static void _emitTEXReadMemCode(LatteDecompilerShaderContext* shaderContext, Lat
src->add(" = ");
if (shaderContext->typeTracker.defaultDataType == LATTE_DECOMPILER_DTYPE_SIGNED_INT)
src->add("floatBitsToInt(");
src->add("as_type<int>(");
else
src->add("(");
@ -2999,13 +2921,13 @@ static void _emitTEXReadMemCode(LatteDecompilerShaderContext* shaderContext, Lat
{
readCount = 2;
// todo
src->add("vec2(0.0,0.0)");
src->add("float2(0.0,0.0)");
}
else if (texInstruction->memRead.format == FMT_32_32_32_FLOAT)
{
readCount = 3;
// todo
src->add("vec3(0.0,0.0,0.0)");
src->add("float3(0.0,0.0,0.0)");
}
else
{
@ -3068,14 +2990,14 @@ static void _emitExportGPRReadCode(LatteDecompilerShaderContext* shaderContext,
if(numOutputs == 1)
src->add("float(");
else
src->addFmt("vec{}(", numOutputs);
src->addFmt("float{}(", numOutputs);
}
else if (requiredType == LATTE_DECOMPILER_DTYPE_SIGNED_INT)
{
if (numOutputs == 1)
src->add("int(");
else
src->addFmt("ivec{}(", numOutputs);
src->addFmt("int{}(", numOutputs);
}
else
cemu_assert_unimplemented();
@ -3149,17 +3071,17 @@ static void _emitExportCode(LatteDecompilerShaderContext* shaderContext, LatteDe
if (hasAnyViewportScaleDisabled)
{
src->add("vec4 finalPos = ");
src->add("float4 finalPos = ");
_emitExportGPRReadCode(shaderContext, cfInstruction, LATTE_DECOMPILER_DTYPE_FLOAT, 0);
src->add(";" _CRLF);
src->add("finalPos.xy = finalPos.xy * uf_windowSpaceToClipSpaceTransform - vec2(1.0,1.0);");
src->add("SET_POSITION(finalPos);");
src->add("finalPos.xy = finalPos.xy * supportBuffer.windowSpaceToClipSpaceTransform - float2(1.0,1.0);");
src->add("out.position = finalPos;");
}
else
{
src->add("SET_POSITION(");
src->add("out.position = ");
_emitExportGPRReadCode(shaderContext, cfInstruction, LATTE_DECOMPILER_DTYPE_FLOAT, 0);
src->add(");" _CRLF);
src->add(";" _CRLF);
}
}
else if (cfInstruction->exportType == 1 && cfInstruction->exportArrayBase == GPU7_DECOMPILER_CF_EXPORT_POINT_SIZE )
@ -3181,7 +3103,7 @@ static void _emitExportCode(LatteDecompilerShaderContext* shaderContext, LatteDe
uint32 vsSemanticId = _getVertexShaderOutParamSemanticId(shaderContext->contextRegisters, paramIndex);
if (vsSemanticId != 0xFF)
{
src->addFmt("passParameterSem{} = ", vsSemanticId);
src->addFmt("out.passParameterSem{} = ", vsSemanticId);
_emitExportGPRReadCode(shaderContext, cfInstruction, LATTE_DECOMPILER_DTYPE_FLOAT, 0);
src->add(";" _CRLF);
}
@ -3235,7 +3157,7 @@ static void _emitExportCode(LatteDecompilerShaderContext* shaderContext, LatteDe
src->add(">=");
break;
}
src->add(" uf_alphaTestRef");
src->add(" supportBuffer.alphaTestRef");
src->add(") == false) discard;" _CRLF);
}
// pixel color output
@ -3395,13 +3317,13 @@ static void _emitCFRingWriteCode(LatteDecompilerShaderContext* shaderContext, La
if (parameterExportType == 1 && parameterExportBase == GPU7_DECOMPILER_CF_EXPORT_BASE_POSITION)
{
src->add("{" _CRLF);
src->addFmt("vec4 pos = vec4(0.0,0.0,0.0,1.0);" _CRLF);
src->addFmt("float4 pos = float4(0.0,0.0,0.0,1.0);" _CRLF);
src->addFmt("pos.");
_emitXYZWByMask(src, cfInstruction->memWriteCompMask);
src->addFmt(" = ");
_emitExportGPRReadCode(shaderContext, cfInstruction, LATTE_DECOMPILER_DTYPE_FLOAT, burstIndex);
src->add(";" _CRLF);
src->add("SET_POSITION(pos);" _CRLF);
src->add("out.position = pos;" _CRLF);
src->add("}" _CRLF);
}
else if (parameterExportType == 2 && parameterExportBase < 16)
@ -3645,7 +3567,7 @@ void LatteDecompiler_emitClauseCodeMSL(LatteDecompilerShaderContext* shaderConte
src->addFmt("if( {} == true ) {{" _CRLF, _getActiveMaskCVarName(shaderContext, cfInstruction->activeStackDepth + 1));
// write point size
if (shaderContext->analyzer.outputPointSize && shaderContext->analyzer.writesPointSize == false)
src->add("gl_PointSize = uf_pointSize;" _CRLF);
src->add("gl_PointSize = supportBuffer.pointSize;" _CRLF);
// emit vertex
src->add("EmitVertex();" _CRLF);
// increment transform feedback pointer
@ -3681,11 +3603,11 @@ void LatteDecompiler_emitHelperFunctions(LatteDecompilerShaderContext* shaderCon
{
if( shaderContext->analyzer.hasRedcCUBE )
{
fCStr_shaderSource->add("void redcCUBE(vec4 src0, vec4 src1, out vec3 stm, out int faceId)\r\n"
fCStr_shaderSource->add("void redcCUBE(float4 src0, float4 src1, out float3 stm, out int faceId)\r\n"
"{\r\n"
"// stm -> x .. s, y .. t, z .. MajorAxis*2.0\r\n"
"vec3 inputCoord = normalize(vec3(src1.y, src1.x, src0.x));\r\n"
"float3 inputCoord = normalize(float3(src1.y, src1.x, src0.x));\r\n"
"float rx = inputCoord.x;\r\n"
"float ry = inputCoord.y;\r\n"
@ -3693,7 +3615,7 @@ void LatteDecompiler_emitHelperFunctions(LatteDecompilerShaderContext* shaderCon
"if( abs(rx) > abs(ry) && abs(rx) > abs(rz) )\r\n"
"{\r\n"
"stm.z = rx*2.0;\r\n"
"stm.xy = vec2(ry,rz); \r\n"
"stm.xy = float2(ry,rz); \r\n"
"if( rx >= 0.0 )\r\n"
"{\r\n"
"faceId = 0;\r\n"
@ -3706,7 +3628,7 @@ void LatteDecompiler_emitHelperFunctions(LatteDecompilerShaderContext* shaderCon
"else if( abs(ry) > abs(rx) && abs(ry) > abs(rz) )\r\n"
"{\r\n"
"stm.z = ry*2.0;\r\n"
"stm.xy = vec2(rx,rz); \r\n"
"stm.xy = float2(rx,rz); \r\n"
"if( ry >= 0.0 )\r\n"
"{\r\n"
"faceId = 2;\r\n"
@ -3719,7 +3641,7 @@ void LatteDecompiler_emitHelperFunctions(LatteDecompilerShaderContext* shaderCon
"else //if( abs(rz) > abs(ry) && abs(rz) > abs(rx) )\r\n"
"{\r\n"
"stm.z = rz*2.0;\r\n"
"stm.xy = vec2(rx,ry); \r\n"
"stm.xy = float2(rx,ry); \r\n"
"if( rz >= 0.0 )\r\n"
"{\r\n"
"faceId = 4;\r\n"
@ -3734,39 +3656,39 @@ void LatteDecompiler_emitHelperFunctions(LatteDecompilerShaderContext* shaderCon
if( shaderContext->analyzer.hasCubeMapTexture )
{
fCStr_shaderSource->add("vec3 redcCUBEReverse(vec2 st, int faceId)\r\n"
fCStr_shaderSource->add("float3 redcCUBEReverse(float2 st, int faceId)\r\n"
"{\r\n"
"st.yx = st.xy;\r\n"
"vec3 v;\r\n"
"float3 v;\r\n"
"float majorAxis = 1.0;\r\n"
"if( faceId == 0 )\r\n"
"{\r\n"
"v.yz = (st-vec2(1.5))*(majorAxis*2.0);\r\n"
"v.yz = (st-float2(1.5))*(majorAxis*2.0);\r\n"
"v.x = 1.0;\r\n"
"}\r\n"
"else if( faceId == 1 )\r\n"
"{\r\n"
"v.yz = (st-vec2(1.5))*(majorAxis*2.0);\r\n"
"v.yz = (st-float2(1.5))*(majorAxis*2.0);\r\n"
"v.x = -1.0;\r\n"
"}\r\n"
"else if( faceId == 2 )\r\n"
"{\r\n"
"v.xz = (st-vec2(1.5))*(majorAxis*2.0);\r\n"
"v.xz = (st-float2(1.5))*(majorAxis*2.0);\r\n"
"v.y = 1.0;\r\n"
"}\r\n"
"else if( faceId == 3 )\r\n"
"{\r\n"
"v.xz = (st-vec2(1.5))*(majorAxis*2.0);\r\n"
"v.xz = (st-float2(1.5))*(majorAxis*2.0);\r\n"
"v.y = -1.0;\r\n"
"}\r\n"
"else if( faceId == 4 )\r\n"
"{\r\n"
"v.xy = (st-vec2(1.5))*(majorAxis*2.0);\r\n"
"v.xy = (st-float2(1.5))*(majorAxis*2.0);\r\n"
"v.z = 1.0;\r\n"
"}\r\n"
"else\r\n"
"{\r\n"
"v.xy = (st-vec2(1.5))*(majorAxis*2.0);\r\n"
"v.xy = (st-float2(1.5))*(majorAxis*2.0);\r\n"
"v.z = -1.0;\r\n"
"}\r\n"
@ -3779,10 +3701,10 @@ void LatteDecompiler_emitHelperFunctions(LatteDecompilerShaderContext* shaderCon
"int clampFI32(int v)\r\n"
"{\r\n"
"if( v == 0x7FFFFFFF )\r\n"
" return floatBitsToInt(1.0);\r\n"
" return as_type<int>(1.0);\r\n"
"else if( v == 0xFFFFFFFF )\r\n"
" return floatBitsToInt(0.0);\r\n"
"return floatBitsToInt(clamp(intBitsToFloat(v), 0.0, 1.0));\r\n"
" return as_type<int>(0.0);\r\n"
"return as_type<int>(clamp(as_type<float>(v), 0.0, 1.0));\r\n"
"}\r\n");
// mul non-ieee way (0*NaN/INF => 0.0)
if (shaderContext->options->strictMul)
@ -3791,7 +3713,7 @@ void LatteDecompiler_emitHelperFunctions(LatteDecompilerShaderContext* shaderCon
//fCStr_shaderSource->add("float mul_nonIEEE(float a, float b){ return mix(a*b,0.0,a==0.0||b==0.0); }" STR_LINEBREAK);
//fCStr_shaderSource->add("float mul_nonIEEE(float a, float b){ return mix(vec2(a*b,0.0),vec2(0.0,0.0),(equal(vec2(a),vec2(0.0,0.0))||equal(vec2(b),vec2(0.0,0.0)))).x; }" STR_LINEBREAK);
//fCStr_shaderSource->add("float mul_nonIEEE(float a, float b){ if( a == 0.0 || b == 0.0 ) return 0.0; return a*b; }" STR_LINEBREAK);
//fCStr_shaderSource->add("float mul_nonIEEE(float a, float b){float r = a*b;r = intBitsToFloat(floatBitsToInt(r)&(((floatBitsToInt(a) != 0) && (floatBitsToInt(b) != 0))?0xFFFFFFFF:0));return r;}" STR_LINEBREAK); works
//fCStr_shaderSource->add("float mul_nonIEEE(float a, float b){float r = a*b;r = as_type<float>(floatBitsToInt(r)&(((floatBitsToInt(a) != 0) && (floatBitsToInt(b) != 0))?0xFFFFFFFF:0));return r;}" STR_LINEBREAK); works
// for "min" it used to be: float mul_nonIEEE(float a, float b){ return min(a*b,min(abs(a)*3.40282347E+38F,abs(b)*3.40282347E+38F)); }
@ -3836,9 +3758,9 @@ static void LatteDecompiler_emitAttributeImport(LatteDecompilerShaderContext* sh
LatteDecompiler_emitAttributeDecodeMSL(shaderContext->shader, src, &attrib);
if (shaderContext->typeTracker.defaultDataType == LATTE_DECOMPILER_DTYPE_SIGNED_INT)
src->addFmt("{} = ivec4(", _getRegisterVarName(shaderContext, registerIndex));
src->addFmt("{} = int4(", _getRegisterVarName(shaderContext, registerIndex));
else
src->addFmt("{} = vec4(", _getRegisterVarName(shaderContext, registerIndex));
src->addFmt("{} = float4(", _getRegisterVarName(shaderContext, registerIndex));
for (sint32 f = 0; f < 4; f++)
{
uint8 ds = attrib.ds[f];
@ -3874,23 +3796,28 @@ void LatteDecompiler_emitMSLShader(LatteDecompilerShaderContext* shaderContext,
src->addFmt("// usesIntegerValues: {}" _CRLF, shaderContext->analyzer.usesIntegerValues?"true":"false");
src->addFmt(_CRLF);
#endif
// include metal standard library
src->add("#include <metal_stdlib>" _CRLF);
src->add("using namespace metal;" _CRLF);
// header part (definitions for inputs and outputs)
LatteDecompiler::emitHeader(shaderContext);
// helper functions
LatteDecompiler_emitHelperFunctions(shaderContext, src);
const char* outputTypeName;
switch (shader->shaderType)
{
case LatteConst::ShaderType::Vertex:
src->add("VertexOut");
outputTypeName = "VertexOut";
break;
case LatteConst::ShaderType::Pixel:
src->add("FragmentOut");
outputTypeName = "FragmentOut";
break;
}
// start of main
src->add(" main0(");
src->addFmt("{} main0(", outputTypeName);
LatteDecompiler::emitInputs(shaderContext);
src->add(") {" _CRLF);
src->addFmt("{} out;" _CRLF, outputTypeName);
// variable definition
if (shaderContext->typeTracker.useArrayGPRs == false)
{
@ -3997,7 +3924,7 @@ void LatteDecompiler_emitMSLShader(LatteDecompilerShaderContext* shaderContext,
cemu_assert_debug((shaderContext->output->streamoutBufferStride[i]&3) == 0);
if (shader->shaderType == LatteConst::ShaderType::Vertex) // vertex shader
src->addFmt("int sbBase{} = uf_streamoutBufferBase{}/4 + (vid + uf_verticesPerInstance * iid)*{};" _CRLF, i, i, shaderContext->output->streamoutBufferStride[i] / 4);
src->addFmt("int sbBase{} = supportBuffer.streamoutBufferBase{}/4 + (vid + supportBuffer.verticesPerInstance * iid)*{};" _CRLF, i, i, shaderContext->output->streamoutBufferStride[i] / 4);
else // geometry shader
{
uint32 gsOutPrimType = shaderContext->contextRegisters[mmVGT_GS_OUT_PRIM_TYPE];
@ -4006,7 +3933,7 @@ void LatteDecompiler_emitMSLShader(LatteDecompilerShaderContext* shaderContext,
cemu_assert_debug(gsOutPrimType == 0); // currently we only properly handle GS output primitive points
src->addFmt("int sbBase{} = uf_streamoutBufferBase{}/4 + (gl_PrimitiveIDIn * {})*{};" _CRLF, i, i, maxVerticesInGS, shaderContext->output->streamoutBufferStride[i] / 4);
src->addFmt("int sbBase{} = supportBuffer.streamoutBufferBase{}/4 + (gl_PrimitiveIDIn * {})*{};" _CRLF, i, i, maxVerticesInGS, shaderContext->output->streamoutBufferStride[i] / 4);
}
}
@ -4019,7 +3946,7 @@ void LatteDecompiler_emitMSLShader(LatteDecompilerShaderContext* shaderContext,
if (shaderContext->typeTracker.defaultDataType == LATTE_DECOMPILER_DTYPE_SIGNED_INT)
src->addFmt("{} = int4(vid, 0, 0, iid);" _CRLF, _getRegisterVarName(shaderContext, 0));
else if (shaderContext->typeTracker.defaultDataType == LATTE_DECOMPILER_DTYPE_FLOAT)
src->addFmt("{} = as_type<int4>(float4(vid, 0, 0, iid));" _CRLF, _getRegisterVarName(shaderContext, 0)); // TODO: is this correct?
src->addFmt("{} = float4(vid, 0, 0, iid);" _CRLF, _getRegisterVarName(shaderContext, 0)); // TODO: as_type<int4>(float4(vid, 0, 0, iid))?
else
cemu_assert_unimplemented();
}
@ -4097,9 +4024,9 @@ void LatteDecompiler_emitMSLShader(LatteDecompilerShaderContext* shaderContext,
{
// import from vertex shader
if (shaderContext->typeTracker.defaultDataType == LATTE_DECOMPILER_DTYPE_SIGNED_INT)
src->addFmt("{} = as_type<int4>(passParameterSem{});" _CRLF, _getRegisterVarName(shaderContext, gprIndex), psInputSemanticId);
src->addFmt("{} = as_type<int4>(in.passParameterSem{});" _CRLF, _getRegisterVarName(shaderContext, gprIndex), psInputSemanticId);
else if (shaderContext->typeTracker.defaultDataType == LATTE_DECOMPILER_DTYPE_FLOAT)
src->addFmt("{} = passParameterSem{};" _CRLF, _getRegisterVarName(shaderContext, gprIndex), psInputSemanticId);
src->addFmt("{} = in.passParameterSem{};" _CRLF, _getRegisterVarName(shaderContext, gprIndex), psInputSemanticId);
else
cemu_assert_unimplemented();
}
@ -4112,7 +4039,7 @@ void LatteDecompiler_emitMSLShader(LatteDecompilerShaderContext* shaderContext,
if (frontFace_allBits)
cemu_assert_debug(false);
if (shaderContext->typeTracker.defaultDataType == LATTE_DECOMPILER_DTYPE_SIGNED_INT)
src->addFmt("{}.{} = as_type<int4>(gl_FrontFacing?1.0:0.0);" _CRLF, _getRegisterVarName(shaderContext, frontFace_regIndex), _getElementStrByIndex(frontFace_chan));
src->addFmt("{}.{} = as_type<int4>(frontFacing?1.0:0.0);" _CRLF, _getRegisterVarName(shaderContext, frontFace_regIndex), _getElementStrByIndex(frontFace_chan));
else if (shaderContext->typeTracker.defaultDataType == LATTE_DECOMPILER_DTYPE_FLOAT)
src->addFmt("{}.{} = frontFacing ? 1.0 : 0.0;" _CRLF, _getRegisterVarName(shaderContext, frontFace_regIndex), _getElementStrByIndex(frontFace_chan));
else
@ -4128,8 +4055,10 @@ void LatteDecompiler_emitMSLShader(LatteDecompilerShaderContext* shaderContext,
if (shaderContext->analyzer.outputPointSize && shaderContext->analyzer.writesPointSize == false)
{
if (shader->shaderType == LatteConst::ShaderType::Vertex && shaderContext->options->usesGeometryShader == false)
src->add("out.pointSize = uf_pointSize;" _CRLF);
src->add("out.pointSize = supportBuffer.pointSize;" _CRLF);
}
// return
src->add("return out;" _CRLF);
// end of shader main
src->add("}" _CRLF);
src->shrink_to_fit();

View File

@ -12,50 +12,50 @@
static void _readLittleEndianAttributeU32x4(LatteDecompilerShader* shaderContext, StringBuf* src, uint32 attributeInputIndex)
{
src->addFmt("attrDecoder = attrDataSem{};" _CRLF, attributeInputIndex);
src->addFmt("attrDecoder = in.attrDataSem{};" _CRLF, attributeInputIndex);
}
static void _readLittleEndianAttributeU32x3(LatteDecompilerShader* shaderContext, StringBuf* src, uint32 attributeInputIndex)
{
src->addFmt("attrDecoder = uint4(attrDataSem{}.xyz,0);" _CRLF, attributeInputIndex);
src->addFmt("attrDecoder = uint4(in.attrDataSem{}.xyz,0);" _CRLF, attributeInputIndex);
}
static void _readLittleEndianAttributeU32x2(LatteDecompilerShader* shaderContext, StringBuf* src, uint32 attributeInputIndex)
{
src->addFmt("attrDecoder = uint4(attrDataSem{}.xy,0,0);" _CRLF, attributeInputIndex);
src->addFmt("attrDecoder = uint4(in.attrDataSem{}.xy,0,0);" _CRLF, attributeInputIndex);
}
static void _readLittleEndianAttributeU32x1(LatteDecompilerShader* shaderContext, StringBuf* src, uint32 attributeInputIndex)
{
src->addFmt("attrDecoder = uint4(attrDataSem{}.x,0,0,0);" _CRLF, attributeInputIndex);
src->addFmt("attrDecoder = uint4(in.attrDataSem{}.x,0,0,0);" _CRLF, attributeInputIndex);
}
static void _readLittleEndianAttributeU16x2(LatteDecompilerShader* shaderContext, StringBuf* src, uint32 attributeInputIndex)
{
src->addFmt("attrDecoder = uint4(attrDataSem{}.xy,0,0);" _CRLF, attributeInputIndex);
src->addFmt("attrDecoder = uint4(in.attrDataSem{}.xy,0,0);" _CRLF, attributeInputIndex);
}
static void _readLittleEndianAttributeU16x4(LatteDecompilerShader* shaderContext, StringBuf* src, uint32 attributeInputIndex)
{
src->addFmt("attrDecoder = attrDataSem{};" _CRLF, attributeInputIndex);
src->addFmt("attrDecoder = in.attrDataSem{};" _CRLF, attributeInputIndex);
}
static void _readBigEndianAttributeU32x4(LatteDecompilerShader* shaderContext, StringBuf* src, uint32 attributeInputIndex)
{
src->addFmt("attrDecoder = attrDataSem{};" _CRLF, attributeInputIndex);
src->addFmt("attrDecoder = in.attrDataSem{};" _CRLF, attributeInputIndex);
src->add("attrDecoder = (attrDecoder>>24)|((attrDecoder>>8)&0xFF00)|((attrDecoder<<8)&0xFF0000)|((attrDecoder<<24));" _CRLF);
}
static void _readBigEndianAttributeU32x3(LatteDecompilerShader* shaderContext, StringBuf* src, uint32 attributeInputIndex)
{
src->addFmt("attrDecoder.xyz = attrDataSem{}.xyz;" _CRLF, attributeInputIndex);
src->addFmt("attrDecoder.xyz = in.attrDataSem{}.xyz;" _CRLF, attributeInputIndex);
src->add("attrDecoder.xyz = (attrDecoder.xyz>>24)|((attrDecoder.xyz>>8)&0xFF00)|((attrDecoder.xyz<<8)&0xFF0000)|((attrDecoder.xyz<<24));" _CRLF);
src->add("attrDecoder.w = 0;" _CRLF);
}
static void _readBigEndianAttributeU32x2(LatteDecompilerShader* shaderContext, StringBuf* src, uint32 attributeInputIndex)
{
src->addFmt("attrDecoder.xy = attrDataSem{}.xy;" _CRLF, attributeInputIndex);
src->addFmt("attrDecoder.xy = in.attrDataSem{}.xy;" _CRLF, attributeInputIndex);
src->add("attrDecoder.xy = (attrDecoder.xy>>24)|((attrDecoder.xy>>8)&0xFF00)|((attrDecoder.xy<<8)&0xFF0000)|((attrDecoder.xy<<24));" _CRLF);
src->add("attrDecoder.z = 0;" _CRLF);
src->add("attrDecoder.w = 0;" _CRLF);
@ -63,7 +63,7 @@ static void _readBigEndianAttributeU32x2(LatteDecompilerShader* shaderContext, S
static void _readBigEndianAttributeU32x1(LatteDecompilerShader* shaderContext, StringBuf* src, uint32 attributeInputIndex)
{
src->addFmt("attrDecoder.x = attrDataSem{}.x;" _CRLF, attributeInputIndex);
src->addFmt("attrDecoder.x = in.attrDataSem{}.x;" _CRLF, attributeInputIndex);
src->add("attrDecoder.x = (attrDecoder.x>>24)|((attrDecoder.x>>8)&0xFF00)|((attrDecoder.x<<8)&0xFF0000)|((attrDecoder.x<<24));" _CRLF);
src->add("attrDecoder.y = 0;" _CRLF);
src->add("attrDecoder.z = 0;" _CRLF);
@ -72,7 +72,7 @@ static void _readBigEndianAttributeU32x1(LatteDecompilerShader* shaderContext, S
static void _readBigEndianAttributeU16x1(LatteDecompilerShader* shaderContext, StringBuf* src, uint32 attributeInputIndex)
{
src->addFmt("attrDecoder.xy = attrDataSem{}.xy;" _CRLF, attributeInputIndex);
src->addFmt("attrDecoder.xy = in.attrDataSem{}.xy;" _CRLF, attributeInputIndex);
src->add("attrDecoder.x = ((attrDecoder.x>>8)&0xFF)|((attrDecoder.x<<8)&0xFF00);" _CRLF);
src->add("attrDecoder.y = 0;" _CRLF);
src->add("attrDecoder.z = 0;" _CRLF);
@ -81,7 +81,7 @@ static void _readBigEndianAttributeU16x1(LatteDecompilerShader* shaderContext, S
static void _readBigEndianAttributeU16x2(LatteDecompilerShader* shaderContext, StringBuf* src, uint32 attributeInputIndex)
{
src->addFmt("attrDecoder.xy = attrDataSem{}.xy;" _CRLF, attributeInputIndex);
src->addFmt("attrDecoder.xy = in.attrDataSem{}.xy;" _CRLF, attributeInputIndex);
src->add("attrDecoder.xy = ((attrDecoder.xy>>8)&0xFF)|((attrDecoder.xy<<8)&0xFF00);" _CRLF);
src->add("attrDecoder.z = 0;" _CRLF);
src->add("attrDecoder.w = 0;" _CRLF);
@ -89,7 +89,7 @@ static void _readBigEndianAttributeU16x2(LatteDecompilerShader* shaderContext, S
static void _readBigEndianAttributeU16x4(LatteDecompilerShader* shaderContext, StringBuf* src, uint32 attributeInputIndex)
{
src->addFmt("attrDecoder.xyzw = attrDataSem{}.xyzw;" _CRLF, attributeInputIndex);
src->addFmt("attrDecoder.xyzw = in.attrDataSem{}.xyzw;" _CRLF, attributeInputIndex);
src->add("attrDecoder = ((attrDecoder>>8)&0xFF)|((attrDecoder<<8)&0xFF00);" _CRLF);
}
@ -167,12 +167,12 @@ void LatteDecompiler_emitAttributeDecodeMSL(LatteDecompilerShader* shaderContext
else if( attrib->format == FMT_8_8_8_8 && attrib->nfa == 0 && attrib->isSigned == 0 )
{
// seen in Minecraft Wii U Edition
src->addFmt("attrDecoder.xyzw = as_type<uint>(vec4(attrDataSem{}.wzyx)/255.0);" _CRLF, attributeInputIndex);
src->addFmt("attrDecoder.xyzw = as_type<uint>(float4(in.attrDataSem{}.wzyx)/255.0);" _CRLF, attributeInputIndex);
}
else if( attrib->format == FMT_8_8_8_8 && attrib->nfa == 0 && attrib->isSigned != 0 )
{
// seen in Minecraft Wii U Edition
src->addFmt("attrDecoder.xyzw = attrDataSem{}.wzyx;" _CRLF, attributeInputIndex);
src->addFmt("attrDecoder.xyzw = in.attrDataSem{}.wzyx;" _CRLF, attributeInputIndex);
src->add("if( (attrDecoder.x&0x80) != 0 ) attrDecoder.x |= 0xFFFFFF00;" _CRLF);
src->add("if( (attrDecoder.y&0x80) != 0 ) attrDecoder.y |= 0xFFFFFF00;" _CRLF);
src->add("if( (attrDecoder.z&0x80) != 0 ) attrDecoder.z |= 0xFFFFFF00;" _CRLF);
@ -185,12 +185,12 @@ void LatteDecompiler_emitAttributeDecodeMSL(LatteDecompilerShader* shaderContext
else if( attrib->format == FMT_8_8_8_8 && attrib->nfa == 1 && attrib->isSigned == 0 )
{
// seen in Minecraft Wii U Edition
src->addFmt("attrDecoder.xyzw = attrDataSem{}.wzyx;" _CRLF, attributeInputIndex);
src->addFmt("attrDecoder.xyzw = in.attrDataSem{}.wzyx;" _CRLF, attributeInputIndex);
}
else if (attrib->format == FMT_8_8_8_8 && attrib->nfa == 2 && attrib->isSigned == 0)
{
// seen in Ben 10 Omniverse
src->addFmt("attrDecoder.xyzw = as_type<uint>(vec4(attrDataSem{}.wzyx));" _CRLF, attributeInputIndex);
src->addFmt("attrDecoder.xyzw = as_type<uint>(float4(in.attrDataSem{}.wzyx));" _CRLF, attributeInputIndex);
}
else
{
@ -275,11 +275,11 @@ void LatteDecompiler_emitAttributeDecodeMSL(LatteDecompilerShader* shaderContext
}
else if( attrib->format == FMT_8_8_8_8 && attrib->nfa == 0 && attrib->isSigned == 0 )
{
src->addFmt("attrDecoder.xyzw = as_type<uint4>(float4(attrDataSem{}.xyzw)/255.0);" _CRLF, attributeInputIndex);
src->addFmt("attrDecoder.xyzw = as_type<uint4>(float4(in.attrDataSem{}.xyzw)/255.0);" _CRLF, attributeInputIndex);
}
else if( attrib->format == FMT_8_8_8_8 && attrib->nfa == 0 && attrib->isSigned != 0 )
{
src->addFmt("attrDecoder.xyzw = attrDataSem{}.xyzw;" _CRLF, attributeInputIndex);
src->addFmt("attrDecoder.xyzw = in.attrDataSem{}.xyzw;" _CRLF, attributeInputIndex);
src->add("if( (attrDecoder.x&0x80) != 0 ) attrDecoder.x |= 0xFFFFFF00;" _CRLF);
src->add("if( (attrDecoder.y&0x80) != 0 ) attrDecoder.y |= 0xFFFFFF00;" _CRLF);
src->add("if( (attrDecoder.z&0x80) != 0 ) attrDecoder.z |= 0xFFFFFF00;" _CRLF);
@ -291,12 +291,12 @@ void LatteDecompiler_emitAttributeDecodeMSL(LatteDecompilerShader* shaderContext
}
else if (attrib->format == FMT_8_8_8_8 && attrib->nfa == 1 && attrib->isSigned == 0)
{
src->addFmt("attrDecoder.xyzw = attrDataSem{}.xyzw;" _CRLF, attributeInputIndex);
src->addFmt("attrDecoder.xyzw = in.attrDataSem{}.xyzw;" _CRLF, attributeInputIndex);
}
else if (attrib->format == FMT_8_8_8_8 && attrib->nfa == 1 && attrib->isSigned != 0)
{
// seen in Sonic Lost World
src->addFmt("attrDecoder.xyzw = attrDataSem{}.xyzw;" _CRLF, attributeInputIndex);
src->addFmt("attrDecoder.xyzw = in.attrDataSem{}.xyzw;" _CRLF, attributeInputIndex);
src->add("if( (attrDecoder.x&0x80) != 0 ) attrDecoder.x |= 0xFFFFFF00;" _CRLF);
src->add("if( (attrDecoder.y&0x80) != 0 ) attrDecoder.y |= 0xFFFFFF00;" _CRLF);
src->add("if( (attrDecoder.z&0x80) != 0 ) attrDecoder.z |= 0xFFFFFF00;" _CRLF);
@ -305,19 +305,19 @@ void LatteDecompiler_emitAttributeDecodeMSL(LatteDecompilerShader* shaderContext
else if( attrib->format == FMT_8_8_8_8 && attrib->nfa == 2 && attrib->isSigned == 0 )
{
// seen in One Piece
src->addFmt("attrDecoder.xyzw = as_type<int4>(float4(attrDataSem{}.xyzw));" _CRLF, attributeInputIndex);
src->addFmt("attrDecoder.xyzw = as_type<int4>(float4(in.attrDataSem{}.xyzw));" _CRLF, attributeInputIndex);
}
else if (attrib->format == FMT_8_8 && attrib->nfa == 0 && attrib->isSigned == 0)
{
if( (attrib->offset&3) == 2 && LatteGPUState.glVendor == GLVENDOR_AMD && g_renderer->GetType() == RendererAPI::OpenGL )
{
// AMD workaround
src->addFmt("attrDecoder.xy = as_type<uint2>(float2(attrDataSem{}.zw)/255.0);" _CRLF, attributeInputIndex);
src->addFmt("attrDecoder.xy = as_type<uint2>(float2(in.attrDataSem{}.zw)/255.0);" _CRLF, attributeInputIndex);
src->add("attrDecoder.zw = uint2(0);" _CRLF);
}
else
{
src->addFmt("attrDecoder.xy = as_type<uint2>(float2(attrDataSem{}.xy)/255.0);" _CRLF, attributeInputIndex);
src->addFmt("attrDecoder.xy = as_type<uint2>(float2(in.attrDataSem{}.xy)/255.0);" _CRLF, attributeInputIndex);
src->add("attrDecoder.zw = uint2(0);" _CRLF);
}
}
@ -327,12 +327,12 @@ void LatteDecompiler_emitAttributeDecodeMSL(LatteDecompilerShader* shaderContext
if ((attrib->offset & 3) == 2 && LatteGPUState.glVendor == GLVENDOR_AMD && g_renderer->GetType() == RendererAPI::OpenGL)
{
// AMD workaround
src->addFmt("attrDecoder.xy = as_type<uint2>(float2(attrDataSem{}.zw));" _CRLF, attributeInputIndex);
src->addFmt("attrDecoder.xy = as_type<uint2>(float2(in.attrDataSem{}.zw));" _CRLF, attributeInputIndex);
src->add("attrDecoder.zw = uint2(0);" _CRLF);
}
else
{
src->addFmt("attrDecoder.xy = as_type<uint2>(float2(attrDataSem{}.xy));" _CRLF, attributeInputIndex);
src->addFmt("attrDecoder.xy = as_type<uint2>(float2(in.attrDataSem{}.xy));" _CRLF, attributeInputIndex);
src->add("attrDecoder.zw = uint2(0);" _CRLF);
}
}
@ -341,7 +341,7 @@ void LatteDecompiler_emitAttributeDecodeMSL(LatteDecompilerShader* shaderContext
if ((attrib->offset & 3) == 2 && LatteGPUState.glVendor == GLVENDOR_AMD && g_renderer->GetType() == RendererAPI::OpenGL)
{
// AMD workaround
src->addFmt("attrDecoder.xy = attrDataSem{}.zw;" _CRLF, attributeInputIndex);
src->addFmt("attrDecoder.xy = in.attrDataSem{}.zw;" _CRLF, attributeInputIndex);
src->add("if( (attrDecoder.x&0x80) != 0 ) attrDecoder.x |= 0xFFFFFF00;" _CRLF);
src->add("if( (attrDecoder.y&0x80) != 0 ) attrDecoder.y |= 0xFFFFFF00;" _CRLF);
src->add("attrDecoder.x = as_type<uint>(max(float(int(attrDecoder.x))/127.0,-1.0));" _CRLF);
@ -350,7 +350,7 @@ void LatteDecompiler_emitAttributeDecodeMSL(LatteDecompilerShader* shaderContext
}
else
{
src->addFmt("attrDecoder.xy = attrDataSem{}.xy;" _CRLF, attributeInputIndex);
src->addFmt("attrDecoder.xy = in.attrDataSem{}.xy;" _CRLF, attributeInputIndex);
src->add("if( (attrDecoder.x&0x80) != 0 ) attrDecoder.x |= 0xFFFFFF00;" _CRLF);
src->add("if( (attrDecoder.y&0x80) != 0 ) attrDecoder.y |= 0xFFFFFF00;" _CRLF);
src->add("attrDecoder.x = as_type<uint>(max(float(int(attrDecoder.x))/127.0,-1.0));" _CRLF);
@ -363,22 +363,22 @@ void LatteDecompiler_emitAttributeDecodeMSL(LatteDecompilerShader* shaderContext
if ((attrib->offset & 3) == 2 && LatteGPUState.glVendor == GLVENDOR_AMD && g_renderer->GetType() == RendererAPI::OpenGL)
{
// AMD workaround
src->addFmt("attrDecoder.xyzw = uint4(attrDataSem{}.zw,0,0);" _CRLF, attributeInputIndex);
src->addFmt("attrDecoder.xyzw = uint4(in.attrDataSem{}.zw,0,0);" _CRLF, attributeInputIndex);
}
else
{
src->addFmt("attrDecoder.xyzw = uint4(attrDataSem{}.xy,0,0);" _CRLF, attributeInputIndex);
src->addFmt("attrDecoder.xyzw = uint4(in.attrDataSem{}.xy,0,0);" _CRLF, attributeInputIndex);
}
}
else if( attrib->format == FMT_8 && attrib->nfa == 0 && attrib->isSigned == 0 )
{
// seen in Pikmin 3
src->addFmt("attrDecoder.x = as_type<uint>(float(attrDataSem{}.x)/255.0);" _CRLF, attributeInputIndex);
src->addFmt("attrDecoder.x = as_type<uint>(float(in.attrDataSem{}.x)/255.0);" _CRLF, attributeInputIndex);
src->add("attrDecoder.yzw = uint3(0);" _CRLF);
}
else if( attrib->format == FMT_8 && attrib->nfa == 1 && attrib->isSigned == 0 )
{
src->addFmt("attrDecoder.xyzw = uint4(attrDataSem{}.x,0,0,0);" _CRLF, attributeInputIndex);
src->addFmt("attrDecoder.xyzw = uint4(in.attrDataSem{}.x,0,0,0);" _CRLF, attributeInputIndex);
}
else
{

View File

@ -9,7 +9,7 @@ namespace LatteDecompiler
LatteDecompilerShaderResourceMapping& resourceMapping = decompilerContext->output->resourceMappingGL;
auto& uniformOffsets = decompilerContext->output->uniformOffsetsVK;
src->add("struct DefualtUniforms {" _CRLF);
src->add("struct SupportBuffer {" _CRLF);
sint32 uniformCurrentOffset = 0;
auto shader = decompilerContext->shader;
@ -19,11 +19,11 @@ namespace LatteDecompiler
// uniform registers or buffers are accessed statically with predictable offsets
// this allows us to remap the used entries into a more compact array
if (shaderType == LatteConst::ShaderType::Vertex)
src->addFmt("ivec4 uf_remappedVS[{}];" _CRLF, (sint32)shader->list_remappedUniformEntries.size());
src->addFmt("int4 remappedVS[{}];" _CRLF, (sint32)shader->list_remappedUniformEntries.size());
else if (shaderType == LatteConst::ShaderType::Pixel)
src->addFmt("ivec4 uf_remappedPS[{}];" _CRLF, (sint32)shader->list_remappedUniformEntries.size());
src->addFmt("int4 remappedPS[{}];" _CRLF, (sint32)shader->list_remappedUniformEntries.size());
else if (shaderType == LatteConst::ShaderType::Geometry)
src->addFmt("ivec4 uf_remappedGS[{}];" _CRLF, (sint32)shader->list_remappedUniformEntries.size());
src->addFmt("int4 remappedGS[{}];" _CRLF, (sint32)shader->list_remappedUniformEntries.size());
else
debugBreakpoint();
uniformOffsets.offset_remapped = uniformCurrentOffset;
@ -34,11 +34,11 @@ namespace LatteDecompiler
uint32 cfileSize = decompilerContext->analyzer.uniformRegisterAccessTracker.DetermineSize(decompilerContext->shaderBaseHash, 256);
// full or partial uniform register file has to be present
if (shaderType == LatteConst::ShaderType::Vertex)
src->addFmt("ivec4 uf_uniformRegisterVS[{}];" _CRLF, cfileSize);
src->addFmt("int4 uniformRegisterVS[{}];" _CRLF, cfileSize);
else if (shaderType == LatteConst::ShaderType::Pixel)
src->addFmt("ivec4 uf_uniformRegisterPS[{}];" _CRLF, cfileSize);
src->addFmt("int4 uniformRegisterPS[{}];" _CRLF, cfileSize);
else if (shaderType == LatteConst::ShaderType::Geometry)
src->addFmt("ivec4 uf_uniformRegisterGS[{}];" _CRLF, cfileSize);
src->addFmt("int4 uniformRegisterGS[{}];" _CRLF, cfileSize);
uniformOffsets.offset_uniformRegister = uniformCurrentOffset;
uniformOffsets.count_uniformRegister = cfileSize;
uniformCurrentOffset += 16 * cfileSize;
@ -53,7 +53,7 @@ namespace LatteDecompiler
{
// aka GX2 special state 0
uniformCurrentOffset = (uniformCurrentOffset + 7)&~7;
src->add("float2 uf_windowSpaceToClipSpaceTransform;" _CRLF);
src->add("float2 windowSpaceToClipSpaceTransform;" _CRLF);
uniformOffsets.offset_windowSpaceToClipSpaceTransform = uniformCurrentOffset;
uniformCurrentOffset += 8;
}
@ -61,7 +61,7 @@ namespace LatteDecompiler
if (decompilerContext->shaderType == LatteConst::ShaderType::Pixel && alphaTestEnable)
{
uniformCurrentOffset = (uniformCurrentOffset + 3)&~3;
src->add("float uf_alphaTestRef;" _CRLF);
src->add("float alphaTestRef;" _CRLF);
uniformOffsets.offset_alphaTestRef = uniformCurrentOffset;
uniformCurrentOffset += 4;
}
@ -71,16 +71,16 @@ namespace LatteDecompiler
decompilerContext->shaderType == LatteConst::ShaderType::Geometry)
{
uniformCurrentOffset = (uniformCurrentOffset + 3)&~3;
src->add("float uf_pointSize;" _CRLF);
src->add("float pointSize;" _CRLF);
uniformOffsets.offset_pointSize = uniformCurrentOffset;
uniformCurrentOffset += 4;
}
}
// define uf_fragCoordScale which holds the xy scale for render target resolution vs effective resolution
// define fragCoordScale which holds the xy scale for render target resolution vs effective resolution
if (shader->shaderType == LatteConst::ShaderType::Pixel)
{
uniformCurrentOffset = (uniformCurrentOffset + 7)&~7;
src->add("vec2 uf_fragCoordScale;" _CRLF);
src->add("float2 fragCoordScale;" _CRLF);
uniformOffsets.offset_fragCoordScale = uniformCurrentOffset;
uniformCurrentOffset += 8;
}
@ -90,30 +90,30 @@ namespace LatteDecompiler
if (decompilerContext->analyzer.texUnitUsesTexelCoordinates.test(t) == false)
continue;
uniformCurrentOffset = (uniformCurrentOffset + 7) & ~7;
src->addFmt("vec2 uf_tex{}Scale;" _CRLF, t);
src->addFmt("float2 tex{}Scale;" _CRLF, t);
uniformOffsets.offset_texScale[t] = uniformCurrentOffset;
uniformCurrentOffset += 8;
}
// define uf_verticesPerInstance + uf_streamoutBufferBaseX
// define verticesPerInstance + streamoutBufferBaseX
if (decompilerContext->analyzer.useSSBOForStreamout &&
(shader->shaderType == LatteConst::ShaderType::Vertex && decompilerContext->options->usesGeometryShader == false) ||
(shader->shaderType == LatteConst::ShaderType::Geometry) )
{
src->add("int uf_verticesPerInstance;" _CRLF);
src->add("int verticesPerInstance;" _CRLF);
uniformOffsets.offset_verticesPerInstance = uniformCurrentOffset;
uniformCurrentOffset += 4;
for (uint32 i = 0; i < LATTE_NUM_STREAMOUT_BUFFER; i++)
{
if (decompilerContext->output->streamoutBufferWriteMask[i])
{
src->addFmt("int uf_streamoutBufferBase{};" _CRLF, i);
src->addFmt("int streamoutBufferBase{};" _CRLF, i);
uniformOffsets.offset_streamoutBufferBase[i] = uniformCurrentOffset;
uniformCurrentOffset += 4;
}
}
}
src->add("}" _CRLF _CRLF);
src->add("};" _CRLF _CRLF);
uniformOffsets.offset_endOfBlock = uniformCurrentOffset;
}
@ -173,7 +173,7 @@ namespace LatteDecompiler
cemu_assert_debug(decompilerContext->output->resourceMappingVK.attributeMapping[i] >= 0);
cemu_assert_debug(decompilerContext->output->resourceMappingGL.attributeMapping[i] == decompilerContext->output->resourceMappingVK.attributeMapping[i]);
src->addFmt("ATTR_LAYOUT({}, {}) in uvec4 attrDataSem{};" _CRLF, (sint32)decompilerContext->output->resourceMappingVK.setIndex, (sint32)decompilerContext->output->resourceMappingVK.attributeMapping[i], i);
src->addFmt("uint4 attrDataSem{} [[attribute({})]];" _CRLF, i, (sint32)decompilerContext->output->resourceMappingVK.attributeMapping[i]);
}
}
src->add("};" _CRLF _CRLF);
@ -367,7 +367,17 @@ namespace LatteDecompiler
break;
}
src->add(" in [[stage_in]], DefaultVariables defaultVars [[buffer(29)]]");
src->add(" in [[stage_in]], constant SupportBuffer& supportBuffer [[buffer(29)]]");
switch (decompilerContext->shaderType)
{
case LatteConst::ShaderType::Vertex:
src->add(", uint vid [[vertex_id]]");
src->add(", uint iid [[instance_id]]");
break;
case LatteConst::ShaderType::Pixel:
src->add(", bool frontFacing [[front_facing]]");
break;
}
// uniform buffers
_emitUniformBufferDefinitions(decompilerContext);
// textures

View File

@ -85,5 +85,5 @@ LatteTextureView* LatteTextureMtl::CreateView(Latte::E_DIM dim, Latte::E_GX2SURF
void LatteTextureMtl::AllocateOnHost()
{
cemuLog_logDebug(LogType::Force, "not implemented");
cemuLog_log(LogType::MetalLogging, "not implemented");
}

View File

@ -34,19 +34,19 @@ void MetalRenderer::Initialize()
void MetalRenderer::Shutdown()
{
cemuLog_logDebug(LogType::Force, "not implemented");
cemuLog_log(LogType::MetalLogging, "not implemented");
}
bool MetalRenderer::IsPadWindowActive()
{
cemuLog_logDebug(LogType::Force, "not implemented");
cemuLog_log(LogType::MetalLogging, "not implemented");
return false;
}
bool MetalRenderer::GetVRAMInfo(int& usageInMB, int& totalInMB) const
{
cemuLog_logDebug(LogType::Force, "not implemented");
cemuLog_log(LogType::MetalLogging, "not implemented");
usageInMB = 1024;
totalInMB = 1024;
@ -56,12 +56,12 @@ bool MetalRenderer::GetVRAMInfo(int& usageInMB, int& totalInMB) const
void MetalRenderer::ClearColorbuffer(bool padView)
{
cemuLog_logDebug(LogType::Force, "not implemented");
cemuLog_log(LogType::MetalLogging, "not implemented");
}
void MetalRenderer::DrawEmptyFrame(bool mainWindow)
{
cemuLog_logDebug(LogType::Force, "not implemented");
cemuLog_log(LogType::MetalLogging, "not implemented");
}
void MetalRenderer::SwapBuffers(bool swapTV, bool swapDRC)
@ -83,7 +83,7 @@ void MetalRenderer::DrawBackbufferQuad(LatteTextureView* texView, RendererOutput
sint32 imageX, sint32 imageY, sint32 imageWidth, sint32 imageHeight,
bool padView, bool clearBackground)
{
cemuLog_logDebug(LogType::Force, "not implemented");
cemuLog_log(LogType::MetalLogging, "not implemented");
}
bool MetalRenderer::BeginFrame(bool mainWindow)
@ -96,68 +96,68 @@ bool MetalRenderer::BeginFrame(bool mainWindow)
void MetalRenderer::Flush(bool waitIdle)
{
cemuLog_logDebug(LogType::Force, "not implemented");
cemuLog_log(LogType::MetalLogging, "not implemented");
}
void MetalRenderer::NotifyLatteCommandProcessorIdle()
{
cemuLog_logDebug(LogType::Force, "not implemented");
cemuLog_log(LogType::MetalLogging, "not implemented");
}
void MetalRenderer::AppendOverlayDebugInfo()
{
cemuLog_logDebug(LogType::Force, "not implemented");
cemuLog_log(LogType::MetalLogging, "not implemented");
}
void MetalRenderer::renderTarget_setViewport(float x, float y, float width, float height, float nearZ, float farZ, bool halfZ)
{
cemuLog_logDebug(LogType::Force, "not implemented");
cemuLog_log(LogType::MetalLogging, "not implemented");
}
void MetalRenderer::renderTarget_setScissor(sint32 scissorX, sint32 scissorY, sint32 scissorWidth, sint32 scissorHeight)
{
cemuLog_logDebug(LogType::Force, "not implemented");
cemuLog_log(LogType::MetalLogging, "not implemented");
}
LatteCachedFBO* MetalRenderer::rendertarget_createCachedFBO(uint64 key)
{
cemuLog_logDebug(LogType::Force, "not implemented");
cemuLog_log(LogType::MetalLogging, "not implemented");
return nullptr;
}
void MetalRenderer::rendertarget_deleteCachedFBO(LatteCachedFBO* fbo)
{
cemuLog_logDebug(LogType::Force, "not implemented");
cemuLog_log(LogType::MetalLogging, "not implemented");
}
void MetalRenderer::rendertarget_bindFramebufferObject(LatteCachedFBO* cfbo)
{
cemuLog_logDebug(LogType::Force, "not implemented");
cemuLog_log(LogType::MetalLogging, "not implemented");
}
void* MetalRenderer::texture_acquireTextureUploadBuffer(uint32 size)
{
cemuLog_logDebug(LogType::Force, "not implemented");
cemuLog_log(LogType::MetalLogging, "not implemented");
return nullptr;
}
void MetalRenderer::texture_releaseTextureUploadBuffer(uint8* mem)
{
cemuLog_logDebug(LogType::Force, "not implemented");
cemuLog_log(LogType::MetalLogging, "not implemented");
}
TextureDecoder* MetalRenderer::texture_chooseDecodedFormat(Latte::E_GX2SURFFMT format, bool isDepth, Latte::E_DIM dim, uint32 width, uint32 height)
{
cemuLog_logDebug(LogType::Force, "not implemented");
cemuLog_log(LogType::MetalLogging, "not implemented");
return nullptr;
}
void MetalRenderer::texture_clearSlice(LatteTexture* hostTexture, sint32 sliceIndex, sint32 mipIndex)
{
cemuLog_logDebug(LogType::Force, "not implemented");
cemuLog_log(LogType::MetalLogging, "not implemented");
}
void MetalRenderer::texture_loadSlice(LatteTexture* hostTexture, sint32 width, sint32 height, sint32 depth, void* pixelData, sint32 sliceIndex, sint32 mipIndex, uint32 compressedImageSize)
@ -171,12 +171,12 @@ void MetalRenderer::texture_loadSlice(LatteTexture* hostTexture, sint32 width, s
void MetalRenderer::texture_clearColorSlice(LatteTexture* hostTexture, sint32 sliceIndex, sint32 mipIndex, float r, float g, float b, float a)
{
cemuLog_logDebug(LogType::Force, "not implemented");
cemuLog_log(LogType::MetalLogging, "not implemented");
}
void MetalRenderer::texture_clearDepthSlice(LatteTexture* hostTexture, uint32 sliceIndex, sint32 mipIndex, bool clearDepth, bool clearStencil, float depthValue, uint32 stencilValue)
{
cemuLog_logDebug(LogType::Force, "not implemented");
cemuLog_log(LogType::MetalLogging, "not implemented");
}
LatteTexture* MetalRenderer::texture_createTextureEx(Latte::E_DIM dim, MPTR physAddress, MPTR physMipAddress, Latte::E_GX2SURFFMT format, uint32 width, uint32 height, uint32 depth, uint32 pitch, uint32 mipLevels, uint32 swizzle, Latte::E_HWTILEMODE tileMode, bool isDepth)
@ -186,102 +186,101 @@ LatteTexture* MetalRenderer::texture_createTextureEx(Latte::E_DIM dim, MPTR phys
void MetalRenderer::texture_setLatteTexture(LatteTextureView* textureView, uint32 textureUnit)
{
cemuLog_logDebug(LogType::Force, "not implemented");
cemuLog_log(LogType::MetalLogging, "not implemented");
}
void MetalRenderer::texture_copyImageSubData(LatteTexture* src, sint32 srcMip, sint32 effectiveSrcX, sint32 effectiveSrcY, sint32 srcSlice, LatteTexture* dst, sint32 dstMip, sint32 effectiveDstX, sint32 effectiveDstY, sint32 dstSlice, sint32 effectiveCopyWidth, sint32 effectiveCopyHeight, sint32 srcDepth)
{
cemuLog_logDebug(LogType::Force, "not implemented");
cemuLog_log(LogType::MetalLogging, "not implemented");
}
LatteTextureReadbackInfo* MetalRenderer::texture_createReadback(LatteTextureView* textureView)
{
cemuLog_logDebug(LogType::Force, "not implemented");
cemuLog_log(LogType::MetalLogging, "not implemented");
return nullptr;
}
void MetalRenderer::surfaceCopy_copySurfaceWithFormatConversion(LatteTexture* sourceTexture, sint32 srcMip, sint32 srcSlice, LatteTexture* destinationTexture, sint32 dstMip, sint32 dstSlice, sint32 width, sint32 height)
{
cemuLog_logDebug(LogType::Force, "not implemented");
cemuLog_log(LogType::MetalLogging, "not implemented");
}
void MetalRenderer::bufferCache_init(const sint32 bufferSize)
{
cemuLog_logDebug(LogType::Force, "not implemented");
cemuLog_log(LogType::MetalLogging, "not implemented");
}
void MetalRenderer::bufferCache_upload(uint8* buffer, sint32 size, uint32 bufferOffset)
{
cemuLog_logDebug(LogType::Force, "not implemented");
cemuLog_log(LogType::MetalLogging, "not implemented");
}
void MetalRenderer::bufferCache_copy(uint32 srcOffset, uint32 dstOffset, uint32 size)
{
cemuLog_logDebug(LogType::Force, "not implemented");
cemuLog_log(LogType::MetalLogging, "not implemented");
}
void MetalRenderer::bufferCache_copyStreamoutToMainBuffer(uint32 srcOffset, uint32 dstOffset, uint32 size)
{
cemuLog_logDebug(LogType::Force, "not implemented");
cemuLog_log(LogType::MetalLogging, "not implemented");
}
void MetalRenderer::buffer_bindVertexBuffer(uint32 bufferIndex, uint32 offset, uint32 size)
{
cemuLog_logDebug(LogType::Force, "not implemented");
cemuLog_log(LogType::MetalLogging, "not implemented");
}
void MetalRenderer::buffer_bindUniformBuffer(LatteConst::ShaderType shaderType, uint32 bufferIndex, uint32 offset, uint32 size)
{
cemuLog_logDebug(LogType::Force, "not implemented");
cemuLog_log(LogType::MetalLogging, "not implemented");
}
RendererShader* MetalRenderer::shader_create(RendererShader::ShaderType type, uint64 baseHash, uint64 auxHash, const std::string& source, bool isGameShader, bool isGfxPackShader)
{
//return new RendererShaderMtl(this, type, baseHash, auxHash, isGameShader, isGfxPackShader, source);
return nullptr;
return new RendererShaderMtl(this, type, baseHash, auxHash, isGameShader, isGfxPackShader, source);
}
void MetalRenderer::streamout_setupXfbBuffer(uint32 bufferIndex, sint32 ringBufferOffset, uint32 rangeAddr, uint32 rangeSize)
{
cemuLog_logDebug(LogType::Force, "not implemented");
cemuLog_log(LogType::MetalLogging, "not implemented");
}
void MetalRenderer::streamout_begin()
{
cemuLog_logDebug(LogType::Force, "not implemented");
cemuLog_log(LogType::MetalLogging, "not implemented");
}
void MetalRenderer::streamout_rendererFinishDrawcall()
{
cemuLog_logDebug(LogType::Force, "not implemented");
cemuLog_log(LogType::MetalLogging, "not implemented");
}
void MetalRenderer::draw_beginSequence()
{
cemuLog_logDebug(LogType::Force, "not implemented");
cemuLog_log(LogType::MetalLogging, "not implemented");
LatteSHRC_UpdateActiveShaders();
}
void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32 instanceCount, uint32 count, MPTR indexDataMPTR, Latte::LATTE_VGT_DMA_INDEX_TYPE::E_INDEX_TYPE indexType, bool isFirst)
{
cemuLog_logDebug(LogType::Force, "not implemented");
cemuLog_log(LogType::MetalLogging, "not implemented");
}
void MetalRenderer::draw_endSequence()
{
cemuLog_logDebug(LogType::Force, "not implemented");
cemuLog_log(LogType::MetalLogging, "not implemented");
}
void* MetalRenderer::indexData_reserveIndexMemory(uint32 size, uint32& offset, uint32& bufferIndex)
{
cemuLog_logDebug(LogType::Force, "not implemented");
cemuLog_log(LogType::MetalLogging, "not implemented");
return nullptr;
}
void MetalRenderer::indexData_uploadIndexMemory(uint32 offset, uint32 size)
{
cemuLog_logDebug(LogType::Force, "not implemented");
cemuLog_log(LogType::MetalLogging, "not implemented");
}

View File

@ -39,7 +39,7 @@ public:
void SwapBuffers(bool swapTV, bool swapDRC) override;
void HandleScreenshotRequest(LatteTextureView* texView, bool padView) override {
cemuLog_logDebug(LogType::Force, "Screenshots are not yet supported on Metal");
cemuLog_log(LogType::MetalLogging, "Screenshots are not yet supported on Metal");
}
void DrawBackbufferQuad(LatteTextureView* texView, RendererOutputShader* shader, bool useLinearTexFilter,
@ -53,27 +53,27 @@ public:
// imgui
bool ImguiBegin(bool mainWindow) override {
cemuLog_logDebug(LogType::Force, "Imgui is not yet supported on Metal");
cemuLog_log(LogType::MetalLogging, "Imgui is not yet supported on Metal");
return false;
};
void ImguiEnd() override {
cemuLog_logDebug(LogType::Force, "Imgui is not yet supported on Metal");
cemuLog_log(LogType::MetalLogging, "Imgui is not yet supported on Metal");
};
ImTextureID GenerateTexture(const std::vector<uint8>& data, const Vector2i& size) override {
cemuLog_logDebug(LogType::Force, "Imgui is not yet supported on Metal");
cemuLog_log(LogType::MetalLogging, "Imgui is not yet supported on Metal");
return nullptr;
};
void DeleteTexture(ImTextureID id) override {
cemuLog_logDebug(LogType::Force, "Imgui is not yet supported on Metal");
cemuLog_log(LogType::MetalLogging, "Imgui is not yet supported on Metal");
};
void DeleteFontTextures() override {
cemuLog_logDebug(LogType::Force, "Imgui is not yet supported on Metal");
cemuLog_log(LogType::MetalLogging, "Imgui is not yet supported on Metal");
};
void AppendOverlayDebugInfo() override;
@ -135,21 +135,21 @@ public:
// occlusion queries
LatteQueryObject* occlusionQuery_create() override {
cemuLog_logDebug(LogType::Force, "Occlusion queries are not yet supported on Metal");
cemuLog_log(LogType::MetalLogging, "Occlusion queries are not yet supported on Metal");
return nullptr;
}
void occlusionQuery_destroy(LatteQueryObject* queryObj) override {
cemuLog_logDebug(LogType::Force, "Occlusion queries are not yet supported on Metal");
cemuLog_log(LogType::MetalLogging, "Occlusion queries are not yet supported on Metal");
}
void occlusionQuery_flush() override {
cemuLog_logDebug(LogType::Force, "Occlusion queries are not yet supported on Metal");
cemuLog_log(LogType::MetalLogging, "Occlusion queries are not yet supported on Metal");
}
void occlusionQuery_updateState() override {
cemuLog_logDebug(LogType::Force, "Occlusion queries are not yet supported on Metal");
cemuLog_log(LogType::MetalLogging, "Occlusion queries are not yet supported on Metal");
}

View File

@ -9,11 +9,12 @@ RendererShaderMtl::RendererShaderMtl(MetalRenderer* mtlRenderer, ShaderType type
MTL::Library* library = mtlRenderer->GetDevice()->newLibrary(NS::String::string(mslCode.c_str(), NS::ASCIIStringEncoding), nullptr, &error);
if (error)
{
cemuLog_log(LogType::MetalLogging, "Failed to create library (error: {}) -> source:\n{}", error->localizedDescription()->utf8String(), mslCode.c_str());
printf("Failed to create library (error: %s) -> source:\n%s", error->localizedDescription()->utf8String(), mslCode.c_str());
error->release();
return;
}
m_function = library->newFunction(NS::String::string("main0", NS::ASCIIStringEncoding));
library->release();
}
RendererShaderMtl::~RendererShaderMtl()

View File

@ -29,10 +29,21 @@ public:
return m_function;
}
// TODO: implement these
sint32 GetUniformLocation(const char* name) override { return 0; }
void SetUniform2fv(sint32 location, void* data, sint32 count) override {}
void SetUniform4iv(sint32 location, void* data, sint32 count) override {}
sint32 GetUniformLocation(const char* name) override
{
cemu_assert_suspicious();
return 0;
}
void SetUniform2fv(sint32 location, void* data, sint32 count) override
{
cemu_assert_suspicious();
}
void SetUniform4iv(sint32 location, void* data, sint32 count) override
{
cemu_assert_suspicious();
}
// TODO: implement this
void PreponeCompilation(bool isRenderThread) override {}