mirror of
https://github.com/cemu-project/Cemu.git
synced 2024-11-29 04:24:17 +01:00
Latte: Optimize shader decompiler output
This commit is contained in:
parent
3acdd47eaf
commit
8ce3f834c4
@ -76,75 +76,6 @@ void _remapUniformAccess(LatteDecompilerShaderContext* shaderContext, bool isReg
|
|||||||
list_uniformMapping.emplace_back(newMapping);
|
list_uniformMapping.emplace_back(newMapping);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* Checks for register collisions and marks the instructions accordingly
|
|
||||||
* startIndex is the first instruction of the group
|
|
||||||
* endIndex is inclusive the last instruction of the same group
|
|
||||||
*/
|
|
||||||
void _analyzeALUInstructionGroupForRegisterCollision(LatteDecompilerShaderContext* shaderContext, LatteDecompilerCFInstruction* cfInstruction, sint32 startIndex, sint32 endIndex)
|
|
||||||
{
|
|
||||||
uint8 registerChannelWriteMask[(LATTE_NUM_GPR *4+7)/8] = {0};
|
|
||||||
|
|
||||||
struct
|
|
||||||
{
|
|
||||||
uint8 gprIndex;
|
|
||||||
uint8 channel;
|
|
||||||
}registerBackupEntries[5];
|
|
||||||
sint32 registerBackupCount = 0;
|
|
||||||
|
|
||||||
for(sint32 i=startIndex; i<=endIndex; i++)
|
|
||||||
{
|
|
||||||
LatteDecompilerALUInstruction& aluInstruction = cfInstruction->instructionsALU[i];
|
|
||||||
// ignore NOP instruction
|
|
||||||
if( aluInstruction.isOP3 == false && aluInstruction.opcode == ALU_OP2_INST_NOP )
|
|
||||||
continue;
|
|
||||||
if( aluInstruction.destElem > 3 )
|
|
||||||
debugBreakpoint();
|
|
||||||
registerChannelWriteMask[(aluInstruction.destGpr * 4 + aluInstruction.destElem) / 8] |= (1 << ((aluInstruction.destGpr * 4 + aluInstruction.destElem) % 8));
|
|
||||||
// check if any previously written register is read
|
|
||||||
for(sint32 f=0; f<3; f++)
|
|
||||||
{
|
|
||||||
if( GPU7_ALU_SRC_IS_GPR(aluInstruction.sourceOperand[f].sel) == false )
|
|
||||||
continue;
|
|
||||||
sint32 gprIndex = GPU7_ALU_SRC_GET_GPR_INDEX(aluInstruction.sourceOperand[f].sel);
|
|
||||||
if( aluInstruction.sourceOperand[f].chan > 3 )
|
|
||||||
debugBreakpoint();
|
|
||||||
if( (registerChannelWriteMask[(gprIndex*4+aluInstruction.sourceOperand[f].chan)/8]&(1<<((gprIndex*4+aluInstruction.sourceOperand[f].chan)%8))) != 0 )
|
|
||||||
{
|
|
||||||
// register is overwritten by same or previous instruction, mark register backup for this instruction
|
|
||||||
// check if this register already has a backup
|
|
||||||
bool hasBackup = false;
|
|
||||||
for(sint32 t=0; t<registerBackupCount; t++)
|
|
||||||
{
|
|
||||||
if( (sint32)registerBackupEntries[t].gprIndex == gprIndex && registerBackupEntries[t].channel == aluInstruction.sourceOperand[f].chan )
|
|
||||||
{
|
|
||||||
aluInstruction.sourceOperand[f].requiredRegisterBackup = true;
|
|
||||||
aluInstruction.sourceOperand[f].registerBackupIndex = t;
|
|
||||||
hasBackup = true;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if( hasBackup == false )
|
|
||||||
{
|
|
||||||
// add new entry
|
|
||||||
if( registerBackupCount < sizeof(registerBackupEntries)/sizeof(registerBackupEntries[0]) )
|
|
||||||
{
|
|
||||||
// add entry
|
|
||||||
registerBackupEntries[registerBackupCount].gprIndex = gprIndex;
|
|
||||||
registerBackupEntries[registerBackupCount].channel = aluInstruction.sourceOperand[f].chan;
|
|
||||||
registerBackupCount++;
|
|
||||||
// mark operand for backup
|
|
||||||
aluInstruction.sourceOperand[f].requiredRegisterBackup = true;
|
|
||||||
aluInstruction.sourceOperand[f].registerBackupIndex = registerBackupCount-1;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
debugBreakpoint();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Returns true if the instruction takes integer operands or returns a integer value
|
* Returns true if the instruction takes integer operands or returns a integer value
|
||||||
*/
|
*/
|
||||||
@ -283,10 +214,10 @@ void LatteDecompiler_analyzeALUClause(LatteDecompilerShaderContext* shaderContex
|
|||||||
for(auto& aluInstruction : cfInstruction->instructionsALU)
|
for(auto& aluInstruction : cfInstruction->instructionsALU)
|
||||||
{
|
{
|
||||||
// ignore NOP instruction
|
// ignore NOP instruction
|
||||||
if( aluInstruction.isOP3 == false && aluInstruction.opcode == ALU_OP2_INST_NOP )
|
if( !aluInstruction.isOP3 && aluInstruction.opcode == ALU_OP2_INST_NOP )
|
||||||
continue;
|
continue;
|
||||||
// check for CUBE instruction
|
// check for CUBE instruction
|
||||||
if( aluInstruction.isOP3 == false && aluInstruction.opcode == ALU_OP2_INST_CUBE )
|
if( !aluInstruction.isOP3 && aluInstruction.opcode == ALU_OP2_INST_CUBE )
|
||||||
{
|
{
|
||||||
shaderContext->analyzer.hasRedcCUBE = true;
|
shaderContext->analyzer.hasRedcCUBE = true;
|
||||||
}
|
}
|
||||||
@ -305,7 +236,7 @@ void LatteDecompiler_analyzeALUClause(LatteDecompilerShaderContext* shaderContex
|
|||||||
|
|
||||||
// relative register file accesses are tricky because the range of possible indices is unknown
|
// relative register file accesses are tricky because the range of possible indices is unknown
|
||||||
// worst case we have to load the full file (256 * 16 byte entries)
|
// worst case we have to load the full file (256 * 16 byte entries)
|
||||||
// but here we track all access indices so the analyzer can make guesstimates about the actual size when there are relative accesses
|
// by tracking the accessed base indices the shader analyzer can determine bounds for the potentially accessed ranges
|
||||||
|
|
||||||
shaderContext->analyzer.uniformRegisterAccess = true;
|
shaderContext->analyzer.uniformRegisterAccess = true;
|
||||||
if (aluInstruction.sourceOperand[f].rel)
|
if (aluInstruction.sourceOperand[f].rel)
|
||||||
@ -355,30 +286,9 @@ void LatteDecompiler_analyzeALUClause(LatteDecompilerShaderContext* shaderContex
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
if( aluInstruction.destRel != 0 )
|
if( aluInstruction.destRel != 0 )
|
||||||
{
|
|
||||||
shaderContext->analyzer.usesRelativeGPRWrite = true;
|
shaderContext->analyzer.usesRelativeGPRWrite = true;
|
||||||
}
|
|
||||||
shaderContext->analyzer.gprUseMask[aluInstruction.destGpr/8] |= (1<<(aluInstruction.destGpr%8));
|
shaderContext->analyzer.gprUseMask[aluInstruction.destGpr/8] |= (1<<(aluInstruction.destGpr%8));
|
||||||
}
|
}
|
||||||
// check for register collisions inside instruction groups (registers that are overwritten while being read)
|
|
||||||
sint32 currentGroupIndex = 0;
|
|
||||||
sint32 currentGroupStartIndex = 0;
|
|
||||||
for(uint32 i=0; i<cfInstruction->instructionsALU.size(); i++)
|
|
||||||
{
|
|
||||||
LatteDecompilerALUInstruction& aluInstruction = cfInstruction->instructionsALU[i];
|
|
||||||
if( aluInstruction.instructionGroupIndex != currentGroupIndex )
|
|
||||||
{
|
|
||||||
cemu_assert_debug(i != 0); // first group cant end at first instruction
|
|
||||||
_analyzeALUInstructionGroupForRegisterCollision(shaderContext, cfInstruction, currentGroupStartIndex, i-1);
|
|
||||||
// start next group
|
|
||||||
currentGroupIndex = aluInstruction.instructionGroupIndex;
|
|
||||||
currentGroupStartIndex = i;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if( currentGroupStartIndex < (sint32)cfInstruction->instructionsALU.size() )
|
|
||||||
{
|
|
||||||
_analyzeALUInstructionGroupForRegisterCollision(shaderContext, cfInstruction, currentGroupStartIndex, (uint32)cfInstruction->instructionsALU.size()-1);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// analyze TEX CF instruction and all instructions within the TEX clause
|
// analyze TEX CF instruction and all instructions within the TEX clause
|
||||||
|
File diff suppressed because it is too large
Load Diff
@ -25,9 +25,6 @@ struct LatteDecompilerALUInstruction
|
|||||||
uint8 abs{};
|
uint8 abs{};
|
||||||
uint8 neg{};
|
uint8 neg{};
|
||||||
uint8 chan{};
|
uint8 chan{};
|
||||||
// register backup information (used for instruction groups where the same register is read and written)
|
|
||||||
bool requiredRegisterBackup{};
|
|
||||||
uint8 registerBackupIndex{}; // index of the used register backup variable (at the beginning of the group the register value is copied to the temporary register with this index)
|
|
||||||
}sourceOperand[3];
|
}sourceOperand[3];
|
||||||
union
|
union
|
||||||
{
|
{
|
||||||
@ -214,7 +211,7 @@ struct LatteDecompilerShaderContext
|
|||||||
// emitter
|
// emitter
|
||||||
bool hasUniformVarBlock;
|
bool hasUniformVarBlock;
|
||||||
sint32 currentBindingPointVK{};
|
sint32 currentBindingPointVK{};
|
||||||
|
struct ALUClauseTemporariesState* aluPVPSState{nullptr};
|
||||||
// misc
|
// misc
|
||||||
std::vector<LatteDecompilerSubroutineInfo> list_subroutines;
|
std::vector<LatteDecompilerSubroutineInfo> list_subroutines;
|
||||||
};
|
};
|
||||||
|
Loading…
Reference in New Issue
Block a user