Latte: Optimize shader decompiler output

This commit is contained in:
Exzap 2023-03-12 08:33:31 +01:00
parent 3acdd47eaf
commit 8ce3f834c4
3 changed files with 408 additions and 460 deletions

View File

@ -76,75 +76,6 @@ void _remapUniformAccess(LatteDecompilerShaderContext* shaderContext, bool isReg
list_uniformMapping.emplace_back(newMapping);
}
/*
* Checks for register collisions and marks the instructions accordingly
* startIndex is the first instruction of the group
* endIndex is inclusive the last instruction of the same group
*/
void _analyzeALUInstructionGroupForRegisterCollision(LatteDecompilerShaderContext* shaderContext, LatteDecompilerCFInstruction* cfInstruction, sint32 startIndex, sint32 endIndex)
{
uint8 registerChannelWriteMask[(LATTE_NUM_GPR *4+7)/8] = {0};
struct
{
uint8 gprIndex;
uint8 channel;
}registerBackupEntries[5];
sint32 registerBackupCount = 0;
for(sint32 i=startIndex; i<=endIndex; i++)
{
LatteDecompilerALUInstruction& aluInstruction = cfInstruction->instructionsALU[i];
// ignore NOP instruction
if( aluInstruction.isOP3 == false && aluInstruction.opcode == ALU_OP2_INST_NOP )
continue;
if( aluInstruction.destElem > 3 )
debugBreakpoint();
registerChannelWriteMask[(aluInstruction.destGpr * 4 + aluInstruction.destElem) / 8] |= (1 << ((aluInstruction.destGpr * 4 + aluInstruction.destElem) % 8));
// check if any previously written register is read
for(sint32 f=0; f<3; f++)
{
if( GPU7_ALU_SRC_IS_GPR(aluInstruction.sourceOperand[f].sel) == false )
continue;
sint32 gprIndex = GPU7_ALU_SRC_GET_GPR_INDEX(aluInstruction.sourceOperand[f].sel);
if( aluInstruction.sourceOperand[f].chan > 3 )
debugBreakpoint();
if( (registerChannelWriteMask[(gprIndex*4+aluInstruction.sourceOperand[f].chan)/8]&(1<<((gprIndex*4+aluInstruction.sourceOperand[f].chan)%8))) != 0 )
{
// register is overwritten by same or previous instruction, mark register backup for this instruction
// check if this register already has a backup
bool hasBackup = false;
for(sint32 t=0; t<registerBackupCount; t++)
{
if( (sint32)registerBackupEntries[t].gprIndex == gprIndex && registerBackupEntries[t].channel == aluInstruction.sourceOperand[f].chan )
{
aluInstruction.sourceOperand[f].requiredRegisterBackup = true;
aluInstruction.sourceOperand[f].registerBackupIndex = t;
hasBackup = true;
break;
}
}
if( hasBackup == false )
{
// add new entry
if( registerBackupCount < sizeof(registerBackupEntries)/sizeof(registerBackupEntries[0]) )
{
// add entry
registerBackupEntries[registerBackupCount].gprIndex = gprIndex;
registerBackupEntries[registerBackupCount].channel = aluInstruction.sourceOperand[f].chan;
registerBackupCount++;
// mark operand for backup
aluInstruction.sourceOperand[f].requiredRegisterBackup = true;
aluInstruction.sourceOperand[f].registerBackupIndex = registerBackupCount-1;
}
else
debugBreakpoint();
}
}
}
}
}
/*
* Returns true if the instruction takes integer operands or returns a integer value
*/
@ -283,10 +214,10 @@ void LatteDecompiler_analyzeALUClause(LatteDecompilerShaderContext* shaderContex
for(auto& aluInstruction : cfInstruction->instructionsALU)
{
// ignore NOP instruction
if( aluInstruction.isOP3 == false && aluInstruction.opcode == ALU_OP2_INST_NOP )
if( !aluInstruction.isOP3 && aluInstruction.opcode == ALU_OP2_INST_NOP )
continue;
// check for CUBE instruction
if( aluInstruction.isOP3 == false && aluInstruction.opcode == ALU_OP2_INST_CUBE )
if( !aluInstruction.isOP3 && aluInstruction.opcode == ALU_OP2_INST_CUBE )
{
shaderContext->analyzer.hasRedcCUBE = true;
}
@ -305,7 +236,7 @@ void LatteDecompiler_analyzeALUClause(LatteDecompilerShaderContext* shaderContex
// relative register file accesses are tricky because the range of possible indices is unknown
// worst case we have to load the full file (256 * 16 byte entries)
// but here we track all access indices so the analyzer can make guesstimates about the actual size when there are relative accesses
// by tracking the accessed base indices the shader analyzer can determine bounds for the potentially accessed ranges
shaderContext->analyzer.uniformRegisterAccess = true;
if (aluInstruction.sourceOperand[f].rel)
@ -355,30 +286,9 @@ void LatteDecompiler_analyzeALUClause(LatteDecompilerShaderContext* shaderContex
}
}
if( aluInstruction.destRel != 0 )
{
shaderContext->analyzer.usesRelativeGPRWrite = true;
}
shaderContext->analyzer.gprUseMask[aluInstruction.destGpr/8] |= (1<<(aluInstruction.destGpr%8));
}
// check for register collisions inside instruction groups (registers that are overwritten while being read)
sint32 currentGroupIndex = 0;
sint32 currentGroupStartIndex = 0;
for(uint32 i=0; i<cfInstruction->instructionsALU.size(); i++)
{
LatteDecompilerALUInstruction& aluInstruction = cfInstruction->instructionsALU[i];
if( aluInstruction.instructionGroupIndex != currentGroupIndex )
{
cemu_assert_debug(i != 0); // first group cant end at first instruction
_analyzeALUInstructionGroupForRegisterCollision(shaderContext, cfInstruction, currentGroupStartIndex, i-1);
// start next group
currentGroupIndex = aluInstruction.instructionGroupIndex;
currentGroupStartIndex = i;
}
}
if( currentGroupStartIndex < (sint32)cfInstruction->instructionsALU.size() )
{
_analyzeALUInstructionGroupForRegisterCollision(shaderContext, cfInstruction, currentGroupStartIndex, (uint32)cfInstruction->instructionsALU.size()-1);
}
}
// analyze TEX CF instruction and all instructions within the TEX clause

View File

@ -25,9 +25,6 @@ struct LatteDecompilerALUInstruction
uint8 abs{};
uint8 neg{};
uint8 chan{};
// register backup information (used for instruction groups where the same register is read and written)
bool requiredRegisterBackup{};
uint8 registerBackupIndex{}; // index of the used register backup variable (at the beginning of the group the register value is copied to the temporary register with this index)
}sourceOperand[3];
union
{
@ -214,7 +211,7 @@ struct LatteDecompilerShaderContext
// emitter
bool hasUniformVarBlock;
sint32 currentBindingPointVK{};
struct ALUClauseTemporariesState* aluPVPSState{nullptr};
// misc
std::vector<LatteDecompilerSubroutineInfo> list_subroutines;
};