PPCRec: Rework RLWIMI

This commit is contained in:
Exzap 2024-10-28 09:21:42 +01:00
parent f309d5d8a8
commit 099d1d4e1f
5 changed files with 29 additions and 55 deletions

View File

@ -1073,26 +1073,6 @@ bool PPCRecompilerX64Gen_imlInstruction_r_r_s32(PPCRecFunction_t* PPCRecFunction
else // XOR else // XOR
x64Gen_xor_reg64Low32_imm32(x64GenContext, regR, immS32); x64Gen_xor_reg64Low32_imm32(x64GenContext, regR, immS32);
} }
else if( imlInstruction->operation == PPCREC_IML_OP_RLWIMI )
{
// registerResult = ((registerResult<<<SH)&mask) | (registerOperand&~mask)
uint32 vImm = (uint32)imlInstruction->op_r_r_s32.immS32;
uint32 mb = (vImm>>0)&0xFF;
uint32 me = (vImm>>8)&0xFF;
uint32 sh = (vImm>>16)&0xFF;
uint32 mask = ppc_mask(mb, me);
// copy rS to temporary register
x64Gen_mov_reg64Low32_reg64Low32(x64GenContext, REG_RESV_TEMP, regA);
// rotate destination register
if( sh )
x64Gen_rol_reg64Low32_imm8(x64GenContext, REG_RESV_TEMP, (uint8)sh&0x1F);
// AND destination register with inverted mask
x64Gen_and_reg64Low32_imm32(x64GenContext, regR, ~mask);
// AND temporary rS register with mask
x64Gen_and_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, mask);
// OR result with temporary
x64Gen_or_reg64Low32_reg64Low32(x64GenContext, regR, REG_RESV_TEMP);
}
else if( imlInstruction->operation == PPCREC_IML_OP_MULTIPLY_SIGNED ) else if( imlInstruction->operation == PPCREC_IML_OP_MULTIPLY_SIGNED )
{ {
// registerResult = registerOperand * immS32 // registerResult = registerOperand * immS32

View File

@ -93,19 +93,8 @@ void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const
} }
else if (type == PPCREC_IML_TYPE_R_R_S32) else if (type == PPCREC_IML_TYPE_R_R_S32)
{ {
if (operation == PPCREC_IML_OP_RLWIMI) registersUsed->writtenGPR1 = op_r_r_s32.regR;
{ registersUsed->readGPR1 = op_r_r_s32.regA;
// result and operand register are both read, result is written
registersUsed->writtenGPR1 = op_r_r_s32.regR;
registersUsed->readGPR1 = op_r_r_s32.regR;
registersUsed->readGPR2 = op_r_r_s32.regA;
}
else
{
// result is write only and operand is read only
registersUsed->writtenGPR1 = op_r_r_s32.regR;
registersUsed->readGPR1 = op_r_r_s32.regA;
}
} }
else if (type == PPCREC_IML_TYPE_R_R_S32_CARRY) else if (type == PPCREC_IML_TYPE_R_R_S32_CARRY)
{ {

View File

@ -122,7 +122,6 @@ enum
PPCREC_IML_OP_RIGHT_SHIFT_U, // right shift operator (unsigned) PPCREC_IML_OP_RIGHT_SHIFT_U, // right shift operator (unsigned)
PPCREC_IML_OP_RIGHT_SHIFT_S, // right shift operator (signed) PPCREC_IML_OP_RIGHT_SHIFT_S, // right shift operator (signed)
// ppc // ppc
PPCREC_IML_OP_RLWIMI, // RLWIMI instruction (rotate, merge based on mask)
PPCREC_IML_OP_SLW, // SLW (shift based on register by up to 63 bits) PPCREC_IML_OP_SLW, // SLW (shift based on register by up to 63 bits)
PPCREC_IML_OP_SRW, // SRW (shift based on register by up to 63 bits) PPCREC_IML_OP_SRW, // SRW (shift based on register by up to 63 bits)
PPCREC_IML_OP_CNTLZW, PPCREC_IML_OP_CNTLZW,

View File

@ -290,7 +290,7 @@ void IMLRA_MergeSubranges(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange*
PPCRecRA_debugValidateSubrange(absorbedSubrange); PPCRecRA_debugValidateSubrange(absorbedSubrange);
if (subrange->imlSegment != absorbedSubrange->imlSegment) if (subrange->imlSegment != absorbedSubrange->imlSegment)
assert_dbg(); assert_dbg();
cemu_assert_debug(subrange->interval2.end == absorbedSubrange->interval2.start); cemu_assert_debug(subrange->interval.end == absorbedSubrange->interval.start);
if (subrange->subrangeBranchTaken || subrange->subrangeBranchNotTaken) if (subrange->subrangeBranchTaken || subrange->subrangeBranchNotTaken)
assert_dbg(); assert_dbg();
@ -375,23 +375,23 @@ void PPCRecRA_debugValidateSubrange(raLivenessRange* range)
if(range->subrangeBranchTaken || range->subrangeBranchNotTaken) if(range->subrangeBranchTaken || range->subrangeBranchNotTaken)
{ {
cemu_assert_debug(range->interval2.end.ConnectsToNextSegment()); cemu_assert_debug(range->interval.end.ConnectsToNextSegment());
} }
if(!range->previousRanges.empty()) if(!range->previousRanges.empty())
{ {
cemu_assert_debug(range->interval2.start.ConnectsToPreviousSegment()); cemu_assert_debug(range->interval.start.ConnectsToPreviousSegment());
} }
// validate locations // validate locations
if (!range->list_accessLocations.empty()) if (!range->list_accessLocations.empty())
{ {
cemu_assert_debug(range->list_accessLocations.front().pos >= range->interval2.start); cemu_assert_debug(range->list_accessLocations.front().pos >= range->interval.start);
cemu_assert_debug(range->list_accessLocations.back().pos <= range->interval2.end); cemu_assert_debug(range->list_accessLocations.back().pos <= range->interval.end);
} }
// validate fixed reg requirements // validate fixed reg requirements
if (!range->list_fixedRegRequirements.empty()) if (!range->list_fixedRegRequirements.empty())
{ {
cemu_assert_debug(range->list_fixedRegRequirements.front().pos >= range->interval2.start); cemu_assert_debug(range->list_fixedRegRequirements.front().pos >= range->interval.start);
cemu_assert_debug(range->list_fixedRegRequirements.back().pos <= range->interval2.end); cemu_assert_debug(range->list_fixedRegRequirements.back().pos <= range->interval.end);
for(sint32 i = 0; i < (sint32)range->list_fixedRegRequirements.size()-1; i++) for(sint32 i = 0; i < (sint32)range->list_fixedRegRequirements.size()-1; i++)
cemu_assert_debug(range->list_fixedRegRequirements[i].pos < range->list_fixedRegRequirements[i+1].pos); cemu_assert_debug(range->list_fixedRegRequirements[i].pos < range->list_fixedRegRequirements[i+1].pos);
} }
@ -423,12 +423,12 @@ void IMLRA_TrimRangeToUse(raLivenessRange* range)
range->interval.end = range->list_accessLocations.back().pos; range->interval.end = range->list_accessLocations.back().pos;
// extra checks // extra checks
#ifdef CEMU_DEBUG_ASSERT #ifdef CEMU_DEBUG_ASSERT
cemu_assert_debug(range->interval2.start <= range->interval2.end); cemu_assert_debug(range->interval.start <= range->interval.end);
for(auto& loc : range->list_accessLocations) for(auto& loc : range->list_accessLocations)
{ {
cemu_assert_debug(range->interval2.ContainsEdge(loc.pos)); cemu_assert_debug(range->interval.ContainsEdge(loc.pos));
} }
cemu_assert_debug(prevInterval.ContainsWholeInterval(range->interval2)); cemu_assert_debug(prevInterval.ContainsWholeInterval(range->interval));
#endif #endif
} }
@ -580,7 +580,7 @@ sint32 IMLRA_CalculateAdditionalCostAfterSplit(raLivenessRange* subrange, raInst
{ {
// validation // validation
#ifdef CEMU_DEBUG_ASSERT #ifdef CEMU_DEBUG_ASSERT
if (subrange->interval2.ExtendsIntoNextSegment()) if (subrange->interval.ExtendsIntoNextSegment())
assert_dbg(); assert_dbg();
#endif #endif
cemu_assert_debug(splitPosition.IsInstructionIndex()); cemu_assert_debug(splitPosition.IsInstructionIndex());

View File

@ -982,12 +982,12 @@ bool PPCRecompilerImlGen_DIVWU(ppcImlGenContext_t* ppcImlGenContext, uint32 opco
bool PPCRecompilerImlGen_RLWINM(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) bool PPCRecompilerImlGen_RLWINM(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode)
{ {
int rS, rA, SH, MB, ME; sint32 rS, rA, SH, MB, ME;
PPC_OPC_TEMPL_M(opcode, rS, rA, SH, MB, ME); PPC_OPC_TEMPL_M(opcode, rS, rA, SH, MB, ME);
uint32 mask = ppc_mask(MB, ME); uint32 mask = ppc_mask(MB, ME);
IMLReg regS = _GetRegGPR(ppcImlGenContext, rS); IMLReg regS = _GetRegGPR(ppcImlGenContext, rS);
IMLReg regA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); IMLReg regA = _GetRegGPR(ppcImlGenContext, rA);
if( ME == (31-SH) && MB == 0 ) if( ME == (31-SH) && MB == 0 )
{ {
// SLWI // SLWI
@ -1015,16 +1015,22 @@ bool PPCRecompilerImlGen_RLWINM(ppcImlGenContext_t* ppcImlGenContext, uint32 opc
bool PPCRecompilerImlGen_RLWIMI(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) bool PPCRecompilerImlGen_RLWIMI(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode)
{ {
int rS, rA, SH, MB, ME; sint32 rS, rA, SH, MB, ME;
PPC_OPC_TEMPL_M(opcode, rS, rA, SH, MB, ME); PPC_OPC_TEMPL_M(opcode, rS, rA, SH, MB, ME);
IMLReg regS = _GetRegGPR(ppcImlGenContext, rS);
IMLReg regS = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS); IMLReg regR = _GetRegGPR(ppcImlGenContext, rA);
IMLReg regA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); IMLReg regTmp = _GetRegTemporary(ppcImlGenContext, 0);
// pack RLWIMI parameters into single integer uint32 mask = ppc_mask(MB, ME);
uint32 vImm = MB|(ME<<8)|(SH<<16); ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, regTmp, regS);
ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_RLWIMI, regA, regS, (sint32)vImm); if (SH)
ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_LEFT_ROTATE, regTmp, SH);
if (mask != 0)
ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_AND, regR, regR, (sint32)~mask);
if (mask != 0xFFFFFFFF)
ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_AND, regTmp, regTmp, (sint32)mask);
ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_OR, regR, regR, regTmp);
if (opcode & PPC_OPC_RC) if (opcode & PPC_OPC_RC)
PPCImlGen_UpdateCR0(ppcImlGenContext, regA); PPCImlGen_UpdateCR0(ppcImlGenContext, regR);
return true; return true;
} }