Merge pull request #12801 from JosJuice/jitarm64-crxxx-opt

JitArm64: Optimize crXXX
This commit is contained in:
JMC47 2025-01-03 16:32:49 -05:00 committed by GitHub
commit 9b3b6bea9d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 197 additions and 156 deletions

View File

@ -355,8 +355,13 @@ protected:
Arm64Gen::ARM64Reg exit_address_after_return_reg = Arm64Gen::ARM64Reg::INVALID_REG); Arm64Gen::ARM64Reg exit_address_after_return_reg = Arm64Gen::ARM64Reg::INVALID_REG);
void WriteBLRExit(Arm64Gen::ARM64Reg dest); void WriteBLRExit(Arm64Gen::ARM64Reg dest);
Arm64Gen::FixupBranch JumpIfCRFieldBit(int field, int bit, bool jump_if_set); void GetCRFieldBit(int field, int bit, Arm64Gen::ARM64Reg out);
void SetCRFieldBit(int field, int bit, Arm64Gen::ARM64Reg in, bool negate = false);
void ClearCRFieldBit(int field, int bit);
void SetCRFieldBit(int field, int bit);
void FixGTBeforeSettingCRFieldBit(Arm64Gen::ARM64Reg reg); void FixGTBeforeSettingCRFieldBit(Arm64Gen::ARM64Reg reg);
Arm64Gen::FixupBranch JumpIfCRFieldBit(int field, int bit, bool jump_if_set);
void UpdateFPExceptionSummary(Arm64Gen::ARM64Reg fpscr); void UpdateFPExceptionSummary(Arm64Gen::ARM64Reg fpscr);
void UpdateRoundingMode(); void UpdateRoundingMode();

View File

@ -20,6 +20,144 @@
using namespace Arm64Gen; using namespace Arm64Gen;
void JitArm64::GetCRFieldBit(int field, int bit, ARM64Reg out)
{
ARM64Reg CR = gpr.CR(field);
ARM64Reg WCR = EncodeRegTo32(CR);
switch (bit)
{
case PowerPC::CR_SO_BIT: // check bit 59 set
UBFX(out, CR, PowerPC::CR_EMU_SO_BIT, 1);
break;
case PowerPC::CR_EQ_BIT: // check bits 31-0 == 0
CMP(WCR, ARM64Reg::WZR);
CSET(out, CC_EQ);
break;
case PowerPC::CR_GT_BIT: // check val > 0
CMP(CR, ARM64Reg::ZR);
CSET(out, CC_GT);
break;
case PowerPC::CR_LT_BIT: // check bit 62 set
UBFX(out, CR, PowerPC::CR_EMU_LT_BIT, 1);
break;
default:
ASSERT_MSG(DYNA_REC, false, "Invalid CR bit");
}
}
void JitArm64::SetCRFieldBit(int field, int bit, ARM64Reg in, bool negate)
{
gpr.BindCRToRegister(field, true);
ARM64Reg CR = gpr.CR(field);
if (bit != PowerPC::CR_GT_BIT)
FixGTBeforeSettingCRFieldBit(CR);
switch (bit)
{
case PowerPC::CR_SO_BIT: // set bit 59 to input
BFI(CR, in, PowerPC::CR_EMU_SO_BIT, 1);
if (negate)
EOR(CR, CR, LogicalImm(1ULL << PowerPC::CR_EMU_SO_BIT, GPRSize::B64));
break;
case PowerPC::CR_EQ_BIT: // clear low 32 bits, set bit 0 to !input
AND(CR, CR, LogicalImm(0xFFFF'FFFF'0000'0000, GPRSize::B64));
ORR(CR, CR, in);
if (!negate)
EOR(CR, CR, LogicalImm(1ULL << 0, GPRSize::B64));
break;
case PowerPC::CR_GT_BIT: // set bit 63 to !input
BFI(CR, in, 63, 1);
if (!negate)
EOR(CR, CR, LogicalImm(1ULL << 63, GPRSize::B64));
break;
case PowerPC::CR_LT_BIT: // set bit 62 to input
BFI(CR, in, PowerPC::CR_EMU_LT_BIT, 1);
if (negate)
EOR(CR, CR, LogicalImm(1ULL << PowerPC::CR_EMU_LT_BIT, GPRSize::B64));
break;
}
ORR(CR, CR, LogicalImm(1ULL << 32, GPRSize::B64));
}
void JitArm64::ClearCRFieldBit(int field, int bit)
{
gpr.BindCRToRegister(field, true);
ARM64Reg XA = gpr.CR(field);
switch (bit)
{
case PowerPC::CR_SO_BIT:
AND(XA, XA, LogicalImm(~(u64(1) << PowerPC::CR_EMU_SO_BIT), GPRSize::B64));
break;
case PowerPC::CR_EQ_BIT:
FixGTBeforeSettingCRFieldBit(XA);
ORR(XA, XA, LogicalImm(1, GPRSize::B64));
break;
case PowerPC::CR_GT_BIT:
ORR(XA, XA, LogicalImm(u64(1) << 63, GPRSize::B64));
break;
case PowerPC::CR_LT_BIT:
AND(XA, XA, LogicalImm(~(u64(1) << PowerPC::CR_EMU_LT_BIT), GPRSize::B64));
break;
}
}
void JitArm64::SetCRFieldBit(int field, int bit)
{
gpr.BindCRToRegister(field, true);
ARM64Reg XA = gpr.CR(field);
if (bit != PowerPC::CR_GT_BIT)
FixGTBeforeSettingCRFieldBit(XA);
switch (bit)
{
case PowerPC::CR_SO_BIT:
ORR(XA, XA, LogicalImm(u64(1) << PowerPC::CR_EMU_SO_BIT, GPRSize::B64));
break;
case PowerPC::CR_EQ_BIT:
AND(XA, XA, LogicalImm(0xFFFF'FFFF'0000'0000, GPRSize::B64));
break;
case PowerPC::CR_GT_BIT:
AND(XA, XA, LogicalImm(~(u64(1) << 63), GPRSize::B64));
break;
case PowerPC::CR_LT_BIT:
ORR(XA, XA, LogicalImm(u64(1) << PowerPC::CR_EMU_LT_BIT, GPRSize::B64));
break;
}
ORR(XA, XA, LogicalImm(u64(1) << 32, GPRSize::B64));
}
void JitArm64::FixGTBeforeSettingCRFieldBit(ARM64Reg reg)
{
// GT is considered unset if the internal representation is <= 0, or in other words,
// if the internal representation either has bit 63 set or has all bits set to zero.
// If all bits are zero and we set some bit that's unrelated to GT, we need to set bit 63 so GT
// doesn't accidentally become considered set. Gross but necessary; this can break actual games.
auto WA = gpr.GetScopedReg();
ARM64Reg XA = EncodeRegTo64(WA);
ORR(XA, reg, LogicalImm(1ULL << 63, GPRSize::B64));
CMP(reg, ARM64Reg::ZR);
CSEL(reg, reg, XA, CC_NEQ);
}
FixupBranch JitArm64::JumpIfCRFieldBit(int field, int bit, bool jump_if_set) FixupBranch JitArm64::JumpIfCRFieldBit(int field, int bit, bool jump_if_set)
{ {
ARM64Reg XA = gpr.CR(field); ARM64Reg XA = gpr.CR(field);
@ -42,19 +180,6 @@ FixupBranch JitArm64::JumpIfCRFieldBit(int field, int bit, bool jump_if_set)
} }
} }
void JitArm64::FixGTBeforeSettingCRFieldBit(Arm64Gen::ARM64Reg reg)
{
// GT is considered unset if the internal representation is <= 0, or in other words,
// if the internal representation either has bit 63 set or has all bits set to zero.
// If all bits are zero and we set some bit that's unrelated to GT, we need to set bit 63 so GT
// doesn't accidentally become considered set. Gross but necessary; this can break actual games.
auto WA = gpr.GetScopedReg();
ARM64Reg XA = EncodeRegTo64(WA);
ORR(XA, reg, LogicalImm(1ULL << 63, GPRSize::B64));
CMP(reg, ARM64Reg::ZR);
CSEL(reg, reg, XA, CC_NEQ);
}
void JitArm64::UpdateFPExceptionSummary(ARM64Reg fpscr) void JitArm64::UpdateFPExceptionSummary(ARM64Reg fpscr)
{ {
auto WA = gpr.GetScopedReg(); auto WA = gpr.GetScopedReg();
@ -468,72 +593,47 @@ void JitArm64::crXXX(UGeckoInstruction inst)
INSTRUCTION_START INSTRUCTION_START
JITDISABLE(bJITSystemRegistersOff); JITDISABLE(bJITSystemRegistersOff);
// Special case: crclr if (inst.CRBA == inst.CRBB)
if (inst.CRBA == inst.CRBB && inst.CRBA == inst.CRBD && inst.SUBOP10 == 193)
{ {
// Clear CR field bit switch (inst.SUBOP10)
int field = inst.CRBD >> 2;
int bit = 3 - (inst.CRBD & 3);
gpr.BindCRToRegister(field, true);
ARM64Reg XA = gpr.CR(field);
switch (bit)
{ {
case PowerPC::CR_SO_BIT: // crclr
AND(XA, XA, LogicalImm(~(u64(1) << PowerPC::CR_EMU_SO_BIT), GPRSize::B64)); case 129: // crandc: A && ~B => 0
break; case 193: // crxor: A ^ B => 0
{
case PowerPC::CR_EQ_BIT: ClearCRFieldBit(inst.CRBD >> 2, 3 - (inst.CRBD & 3));
FixGTBeforeSettingCRFieldBit(XA);
ORR(XA, XA, LogicalImm(1, GPRSize::B64));
break;
case PowerPC::CR_GT_BIT:
ORR(XA, XA, LogicalImm(u64(1) << 63, GPRSize::B64));
break;
case PowerPC::CR_LT_BIT:
AND(XA, XA, LogicalImm(~(u64(1) << PowerPC::CR_EMU_LT_BIT), GPRSize::B64));
break;
}
return; return;
} }
// crset
// Special case: crset case 289: // creqv: ~(A ^ B) => 1
if (inst.CRBA == inst.CRBB && inst.CRBA == inst.CRBD && inst.SUBOP10 == 289) case 417: // crorc: A || ~B => 1
{ {
// SetCRFieldBit SetCRFieldBit(inst.CRBD >> 2, 3 - (inst.CRBD & 3));
int field = inst.CRBD >> 2;
int bit = 3 - (inst.CRBD & 3);
gpr.BindCRToRegister(field, true);
ARM64Reg XA = gpr.CR(field);
if (bit != PowerPC::CR_GT_BIT)
FixGTBeforeSettingCRFieldBit(XA);
switch (bit)
{
case PowerPC::CR_SO_BIT:
ORR(XA, XA, LogicalImm(u64(1) << PowerPC::CR_EMU_SO_BIT, GPRSize::B64));
break;
case PowerPC::CR_EQ_BIT:
AND(XA, XA, LogicalImm(0xFFFF'FFFF'0000'0000, GPRSize::B64));
break;
case PowerPC::CR_GT_BIT:
AND(XA, XA, LogicalImm(~(u64(1) << 63), GPRSize::B64));
break;
case PowerPC::CR_LT_BIT:
ORR(XA, XA, LogicalImm(u64(1) << PowerPC::CR_EMU_LT_BIT, GPRSize::B64));
break;
}
ORR(XA, XA, LogicalImm(u64(1) << 32, GPRSize::B64));
return; return;
} }
case 257: // crand: A && B => A
case 449: // cror: A || B => A
{
auto WA = gpr.GetScopedReg();
ARM64Reg XA = EncodeRegTo64(WA);
GetCRFieldBit(inst.CRBA >> 2, 3 - (inst.CRBA & 3), XA);
SetCRFieldBit(inst.CRBD >> 2, 3 - (inst.CRBD & 3), XA, false);
return;
}
case 33: // crnor: ~(A || B) => ~A
case 225: // crnand: ~(A && B) => ~A
{
auto WA = gpr.GetScopedReg();
ARM64Reg XA = EncodeRegTo64(WA);
GetCRFieldBit(inst.CRBA >> 2, 3 - (inst.CRBA & 3), XA);
SetCRFieldBit(inst.CRBD >> 2, 3 - (inst.CRBD & 3), XA, true);
return;
}
}
}
// crnor or crnand
const bool negate_result = inst.SUBOP10 == 33 || inst.SUBOP10 == 225;
auto WA = gpr.GetScopedReg(); auto WA = gpr.GetScopedReg();
ARM64Reg XA = EncodeRegTo64(WA); ARM64Reg XA = EncodeRegTo64(WA);
@ -541,106 +641,42 @@ void JitArm64::crXXX(UGeckoInstruction inst)
auto WB = gpr.GetScopedReg(); auto WB = gpr.GetScopedReg();
ARM64Reg XB = EncodeRegTo64(WB); ARM64Reg XB = EncodeRegTo64(WB);
// creqv or crnand or crnor GetCRFieldBit(inst.CRBA >> 2, 3 - (inst.CRBA & 3), XA);
bool negateA = inst.SUBOP10 == 289 || inst.SUBOP10 == 225 || inst.SUBOP10 == 33; GetCRFieldBit(inst.CRBB >> 2, 3 - (inst.CRBB & 3), XB);
// crandc or crorc or crnand or crnor
bool negateB =
inst.SUBOP10 == 129 || inst.SUBOP10 == 417 || inst.SUBOP10 == 225 || inst.SUBOP10 == 33;
// GetCRFieldBit
for (int i = 0; i < 2; i++)
{
int field = i ? inst.CRBB >> 2 : inst.CRBA >> 2;
int bit = i ? 3 - (inst.CRBB & 3) : 3 - (inst.CRBA & 3);
ARM64Reg out = i ? XB : XA;
bool negate = i ? negateB : negateA;
ARM64Reg XC = gpr.CR(field);
ARM64Reg WC = EncodeRegTo32(XC);
switch (bit)
{
case PowerPC::CR_SO_BIT: // check bit 59 set
UBFX(out, XC, PowerPC::CR_EMU_SO_BIT, 1);
if (negate)
EOR(out, out, LogicalImm(1, GPRSize::B64));
break;
case PowerPC::CR_EQ_BIT: // check bits 31-0 == 0
CMP(WC, ARM64Reg::WZR);
CSET(out, negate ? CC_NEQ : CC_EQ);
break;
case PowerPC::CR_GT_BIT: // check val > 0
CMP(XC, ARM64Reg::ZR);
CSET(out, negate ? CC_LE : CC_GT);
break;
case PowerPC::CR_LT_BIT: // check bit 62 set
UBFX(out, XC, PowerPC::CR_EMU_LT_BIT, 1);
if (negate)
EOR(out, out, LogicalImm(1, GPRSize::B64));
break;
default:
ASSERT_MSG(DYNA_REC, false, "Invalid CR bit");
}
}
// Compute combined bit // Compute combined bit
switch (inst.SUBOP10) switch (inst.SUBOP10)
{ {
case 33: // crnor: ~(A || B) == (~A && ~B) case 225: // crnand: ~(A && B)
case 129: // crandc: A && ~B
case 257: // crand: A && B case 257: // crand: A && B
AND(XA, XA, XB); AND(XA, XA, XB);
break; break;
case 129: // crandc: A && ~B
BIC(XA, XA, XB);
break;
case 193: // crxor: A ^ B case 193: // crxor: A ^ B
case 289: // creqv: ~(A ^ B) = ~A ^ B
EOR(XA, XA, XB); EOR(XA, XA, XB);
break; break;
case 225: // crnand: ~(A && B) == (~A || ~B) case 289: // creqv: ~(A ^ B) = A ^ ~B
case 417: // crorc: A || ~B EON(XA, XA, XB);
break;
case 33: // crnor: ~(A || B)
case 449: // cror: A || B case 449: // cror: A || B
ORR(XA, XA, XB); ORR(XA, XA, XB);
break; break;
case 417: // crorc: A || ~B
ORN(XA, XA, XB);
break;
} }
} }
// Store result bit in CRBD // Store result bit in CRBD
int field = inst.CRBD >> 2; SetCRFieldBit(inst.CRBD >> 2, 3 - (inst.CRBD & 3), XA, negate_result);
int bit = 3 - (inst.CRBD & 3);
gpr.BindCRToRegister(field, true);
ARM64Reg CR = gpr.CR(field);
if (bit != PowerPC::CR_GT_BIT)
FixGTBeforeSettingCRFieldBit(CR);
switch (bit)
{
case PowerPC::CR_SO_BIT: // set bit 59 to input
BFI(CR, XA, PowerPC::CR_EMU_SO_BIT, 1);
break;
case PowerPC::CR_EQ_BIT: // clear low 32 bits, set bit 0 to !input
AND(CR, CR, LogicalImm(0xFFFF'FFFF'0000'0000, GPRSize::B64));
EOR(XA, XA, LogicalImm(1, GPRSize::B64));
ORR(CR, CR, XA);
break;
case PowerPC::CR_GT_BIT: // set bit 63 to !input
EOR(XA, XA, LogicalImm(1, GPRSize::B64));
BFI(CR, XA, 63, 1);
break;
case PowerPC::CR_LT_BIT: // set bit 62 to input
BFI(CR, XA, PowerPC::CR_EMU_LT_BIT, 1);
break;
}
ORR(CR, CR, LogicalImm(1ULL << 32, GPRSize::B64));
} }
void JitArm64::mfcr(UGeckoInstruction inst) void JitArm64::mfcr(UGeckoInstruction inst)