JitArm64: divwx - Optimize constant dividend

When the dividend is known at compile time, we can eliminate some
of the branching and precompute the result for the overflow case.
This commit is contained in:
JosJuice 2021-08-21 16:28:51 +02:00
parent 559de262a1
commit 09cdb076a3
3 changed files with 39 additions and 4 deletions

View File

@ -1373,6 +1373,10 @@ void ARM64XEmitter::CMP(ARM64Reg Rn, u32 imm, bool shift)
{
EncodeAddSubImmInst(1, true, shift, imm, Rn, Is64Bit(Rn) ? ARM64Reg::SP : ARM64Reg::WSP);
}
void ARM64XEmitter::CMN(ARM64Reg Rn, u32 imm, bool shift)
{
EncodeAddSubImmInst(0, true, shift, imm, Rn, Is64Bit(Rn) ? ARM64Reg::SP : ARM64Reg::WSP);
}
// Data Processing (Immediate)
void ARM64XEmitter::MOVZ(ARM64Reg Rd, u32 imm, ShiftAmount pos)

View File

@ -1006,6 +1006,7 @@ public:
void SUB(ARM64Reg Rd, ARM64Reg Rn, u32 imm, bool shift = false);
void SUBS(ARM64Reg Rd, ARM64Reg Rn, u32 imm, bool shift = false);
void CMP(ARM64Reg Rn, u32 imm, bool shift = false);
void CMN(ARM64Reg Rn, u32 imm, bool shift = false);
// Data Processing (Immediate)
void MOVZ(ARM64Reg Rd, u32 imm, ShiftAmount pos = ShiftAmount::Shift0);

View File

@ -1327,6 +1327,36 @@ void JitArm64::divwx(UGeckoInstruction inst)
if (inst.Rc)
ComputeRC0(imm_d);
}
else if (gpr.IsImm(a))
{
const u32 dividend = gpr.GetImm(a);
gpr.BindToRegister(d, d == b);
ARM64Reg RB = gpr.R(b);
ARM64Reg RD = gpr.R(d);
FixupBranch overflow1 = CBZ(RB);
FixupBranch overflow2;
if (dividend == 0x80000000)
{
CMN(RB, 1);
overflow2 = B(CC_EQ);
}
SDIV(RD, gpr.R(a), RB);
FixupBranch done = B();
SetJumpTarget(overflow1);
if (dividend == 0x80000000)
SetJumpTarget(overflow2);
MOVI2R(RD, dividend & 0x80000000 ? 0xFFFFFFFF : 0);
SetJumpTarget(done);
if (inst.Rc)
ComputeRC0(RD);
}
else if (gpr.IsImm(b) && gpr.GetImm(b) != 0 && gpr.GetImm(b) != UINT32_C(0xFFFFFFFF))
{
ARM64Reg WA = gpr.GetReg();
@ -1352,16 +1382,16 @@ void JitArm64::divwx(UGeckoInstruction inst)
ARM64Reg RB = gpr.R(b);
ARM64Reg RD = gpr.R(d);
FixupBranch slow1 = CBZ(RB);
FixupBranch overflow1 = CBZ(RB);
MOVI2R(WA, -0x80000000LL);
CMP(RA, WA);
CCMN(RB, 1, 0, CC_EQ);
FixupBranch slow2 = B(CC_EQ);
FixupBranch overflow2 = B(CC_EQ);
SDIV(RD, RA, RB);
FixupBranch done = B();
SetJumpTarget(slow1);
SetJumpTarget(slow2);
SetJumpTarget(overflow1);
SetJumpTarget(overflow2);
ASR(RD, RA, 31);