Merge pull request #12801 from JosJuice/jitarm64-crxxx-opt

JitArm64: Optimize crXXX
This commit is contained in:
JMC47 2025-01-03 16:32:49 -05:00 committed by GitHub
commit 9b3b6bea9d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 197 additions and 156 deletions

View File

@ -355,8 +355,13 @@ protected:
Arm64Gen::ARM64Reg exit_address_after_return_reg = Arm64Gen::ARM64Reg::INVALID_REG);
void WriteBLRExit(Arm64Gen::ARM64Reg dest);
Arm64Gen::FixupBranch JumpIfCRFieldBit(int field, int bit, bool jump_if_set);
void GetCRFieldBit(int field, int bit, Arm64Gen::ARM64Reg out);
void SetCRFieldBit(int field, int bit, Arm64Gen::ARM64Reg in, bool negate = false);
void ClearCRFieldBit(int field, int bit);
void SetCRFieldBit(int field, int bit);
void FixGTBeforeSettingCRFieldBit(Arm64Gen::ARM64Reg reg);
Arm64Gen::FixupBranch JumpIfCRFieldBit(int field, int bit, bool jump_if_set);
void UpdateFPExceptionSummary(Arm64Gen::ARM64Reg fpscr);
void UpdateRoundingMode();

View File

@ -20,6 +20,144 @@
using namespace Arm64Gen;
void JitArm64::GetCRFieldBit(int field, int bit, ARM64Reg out)
{
ARM64Reg CR = gpr.CR(field);
ARM64Reg WCR = EncodeRegTo32(CR);
switch (bit)
{
case PowerPC::CR_SO_BIT: // check bit 59 set
UBFX(out, CR, PowerPC::CR_EMU_SO_BIT, 1);
break;
case PowerPC::CR_EQ_BIT: // check bits 31-0 == 0
CMP(WCR, ARM64Reg::WZR);
CSET(out, CC_EQ);
break;
case PowerPC::CR_GT_BIT: // check val > 0
CMP(CR, ARM64Reg::ZR);
CSET(out, CC_GT);
break;
case PowerPC::CR_LT_BIT: // check bit 62 set
UBFX(out, CR, PowerPC::CR_EMU_LT_BIT, 1);
break;
default:
ASSERT_MSG(DYNA_REC, false, "Invalid CR bit");
}
}
void JitArm64::SetCRFieldBit(int field, int bit, ARM64Reg in, bool negate)
{
gpr.BindCRToRegister(field, true);
ARM64Reg CR = gpr.CR(field);
if (bit != PowerPC::CR_GT_BIT)
FixGTBeforeSettingCRFieldBit(CR);
switch (bit)
{
case PowerPC::CR_SO_BIT: // set bit 59 to input
BFI(CR, in, PowerPC::CR_EMU_SO_BIT, 1);
if (negate)
EOR(CR, CR, LogicalImm(1ULL << PowerPC::CR_EMU_SO_BIT, GPRSize::B64));
break;
case PowerPC::CR_EQ_BIT: // clear low 32 bits, set bit 0 to !input
AND(CR, CR, LogicalImm(0xFFFF'FFFF'0000'0000, GPRSize::B64));
ORR(CR, CR, in);
if (!negate)
EOR(CR, CR, LogicalImm(1ULL << 0, GPRSize::B64));
break;
case PowerPC::CR_GT_BIT: // set bit 63 to !input
BFI(CR, in, 63, 1);
if (!negate)
EOR(CR, CR, LogicalImm(1ULL << 63, GPRSize::B64));
break;
case PowerPC::CR_LT_BIT: // set bit 62 to input
BFI(CR, in, PowerPC::CR_EMU_LT_BIT, 1);
if (negate)
EOR(CR, CR, LogicalImm(1ULL << PowerPC::CR_EMU_LT_BIT, GPRSize::B64));
break;
}
ORR(CR, CR, LogicalImm(1ULL << 32, GPRSize::B64));
}
void JitArm64::ClearCRFieldBit(int field, int bit)
{
gpr.BindCRToRegister(field, true);
ARM64Reg XA = gpr.CR(field);
switch (bit)
{
case PowerPC::CR_SO_BIT:
AND(XA, XA, LogicalImm(~(u64(1) << PowerPC::CR_EMU_SO_BIT), GPRSize::B64));
break;
case PowerPC::CR_EQ_BIT:
FixGTBeforeSettingCRFieldBit(XA);
ORR(XA, XA, LogicalImm(1, GPRSize::B64));
break;
case PowerPC::CR_GT_BIT:
ORR(XA, XA, LogicalImm(u64(1) << 63, GPRSize::B64));
break;
case PowerPC::CR_LT_BIT:
AND(XA, XA, LogicalImm(~(u64(1) << PowerPC::CR_EMU_LT_BIT), GPRSize::B64));
break;
}
}
void JitArm64::SetCRFieldBit(int field, int bit)
{
gpr.BindCRToRegister(field, true);
ARM64Reg XA = gpr.CR(field);
if (bit != PowerPC::CR_GT_BIT)
FixGTBeforeSettingCRFieldBit(XA);
switch (bit)
{
case PowerPC::CR_SO_BIT:
ORR(XA, XA, LogicalImm(u64(1) << PowerPC::CR_EMU_SO_BIT, GPRSize::B64));
break;
case PowerPC::CR_EQ_BIT:
AND(XA, XA, LogicalImm(0xFFFF'FFFF'0000'0000, GPRSize::B64));
break;
case PowerPC::CR_GT_BIT:
AND(XA, XA, LogicalImm(~(u64(1) << 63), GPRSize::B64));
break;
case PowerPC::CR_LT_BIT:
ORR(XA, XA, LogicalImm(u64(1) << PowerPC::CR_EMU_LT_BIT, GPRSize::B64));
break;
}
ORR(XA, XA, LogicalImm(u64(1) << 32, GPRSize::B64));
}
void JitArm64::FixGTBeforeSettingCRFieldBit(ARM64Reg reg)
{
// GT is considered unset if the internal representation is <= 0, or in other words,
// if the internal representation either has bit 63 set or has all bits set to zero.
// If all bits are zero and we set some bit that's unrelated to GT, we need to set bit 63 so GT
// doesn't accidentally become considered set. Gross but necessary; this can break actual games.
auto WA = gpr.GetScopedReg();
ARM64Reg XA = EncodeRegTo64(WA);
ORR(XA, reg, LogicalImm(1ULL << 63, GPRSize::B64));
CMP(reg, ARM64Reg::ZR);
CSEL(reg, reg, XA, CC_NEQ);
}
FixupBranch JitArm64::JumpIfCRFieldBit(int field, int bit, bool jump_if_set)
{
ARM64Reg XA = gpr.CR(field);
@ -42,19 +180,6 @@ FixupBranch JitArm64::JumpIfCRFieldBit(int field, int bit, bool jump_if_set)
}
}
void JitArm64::FixGTBeforeSettingCRFieldBit(Arm64Gen::ARM64Reg reg)
{
// GT is considered unset if the internal representation is <= 0, or in other words,
// if the internal representation either has bit 63 set or has all bits set to zero.
// If all bits are zero and we set some bit that's unrelated to GT, we need to set bit 63 so GT
// doesn't accidentally become considered set. Gross but necessary; this can break actual games.
auto WA = gpr.GetScopedReg();
ARM64Reg XA = EncodeRegTo64(WA);
ORR(XA, reg, LogicalImm(1ULL << 63, GPRSize::B64));
CMP(reg, ARM64Reg::ZR);
CSEL(reg, reg, XA, CC_NEQ);
}
void JitArm64::UpdateFPExceptionSummary(ARM64Reg fpscr)
{
auto WA = gpr.GetScopedReg();
@ -468,72 +593,47 @@ void JitArm64::crXXX(UGeckoInstruction inst)
INSTRUCTION_START
JITDISABLE(bJITSystemRegistersOff);
// Special case: crclr
if (inst.CRBA == inst.CRBB && inst.CRBA == inst.CRBD && inst.SUBOP10 == 193)
if (inst.CRBA == inst.CRBB)
{
// Clear CR field bit
int field = inst.CRBD >> 2;
int bit = 3 - (inst.CRBD & 3);
gpr.BindCRToRegister(field, true);
ARM64Reg XA = gpr.CR(field);
switch (bit)
switch (inst.SUBOP10)
{
case PowerPC::CR_SO_BIT:
AND(XA, XA, LogicalImm(~(u64(1) << PowerPC::CR_EMU_SO_BIT), GPRSize::B64));
break;
case PowerPC::CR_EQ_BIT:
FixGTBeforeSettingCRFieldBit(XA);
ORR(XA, XA, LogicalImm(1, GPRSize::B64));
break;
case PowerPC::CR_GT_BIT:
ORR(XA, XA, LogicalImm(u64(1) << 63, GPRSize::B64));
break;
case PowerPC::CR_LT_BIT:
AND(XA, XA, LogicalImm(~(u64(1) << PowerPC::CR_EMU_LT_BIT), GPRSize::B64));
break;
// crclr
case 129: // crandc: A && ~B => 0
case 193: // crxor: A ^ B => 0
{
ClearCRFieldBit(inst.CRBD >> 2, 3 - (inst.CRBD & 3));
return;
}
// crset
case 289: // creqv: ~(A ^ B) => 1
case 417: // crorc: A || ~B => 1
{
SetCRFieldBit(inst.CRBD >> 2, 3 - (inst.CRBD & 3));
return;
}
case 257: // crand: A && B => A
case 449: // cror: A || B => A
{
auto WA = gpr.GetScopedReg();
ARM64Reg XA = EncodeRegTo64(WA);
GetCRFieldBit(inst.CRBA >> 2, 3 - (inst.CRBA & 3), XA);
SetCRFieldBit(inst.CRBD >> 2, 3 - (inst.CRBD & 3), XA, false);
return;
}
case 33: // crnor: ~(A || B) => ~A
case 225: // crnand: ~(A && B) => ~A
{
auto WA = gpr.GetScopedReg();
ARM64Reg XA = EncodeRegTo64(WA);
GetCRFieldBit(inst.CRBA >> 2, 3 - (inst.CRBA & 3), XA);
SetCRFieldBit(inst.CRBD >> 2, 3 - (inst.CRBD & 3), XA, true);
return;
}
}
return;
}
// Special case: crset
if (inst.CRBA == inst.CRBB && inst.CRBA == inst.CRBD && inst.SUBOP10 == 289)
{
// SetCRFieldBit
int field = inst.CRBD >> 2;
int bit = 3 - (inst.CRBD & 3);
gpr.BindCRToRegister(field, true);
ARM64Reg XA = gpr.CR(field);
if (bit != PowerPC::CR_GT_BIT)
FixGTBeforeSettingCRFieldBit(XA);
switch (bit)
{
case PowerPC::CR_SO_BIT:
ORR(XA, XA, LogicalImm(u64(1) << PowerPC::CR_EMU_SO_BIT, GPRSize::B64));
break;
case PowerPC::CR_EQ_BIT:
AND(XA, XA, LogicalImm(0xFFFF'FFFF'0000'0000, GPRSize::B64));
break;
case PowerPC::CR_GT_BIT:
AND(XA, XA, LogicalImm(~(u64(1) << 63), GPRSize::B64));
break;
case PowerPC::CR_LT_BIT:
ORR(XA, XA, LogicalImm(u64(1) << PowerPC::CR_EMU_LT_BIT, GPRSize::B64));
break;
}
ORR(XA, XA, LogicalImm(u64(1) << 32, GPRSize::B64));
return;
}
// crnor or crnand
const bool negate_result = inst.SUBOP10 == 33 || inst.SUBOP10 == 225;
auto WA = gpr.GetScopedReg();
ARM64Reg XA = EncodeRegTo64(WA);
@ -541,106 +641,42 @@ void JitArm64::crXXX(UGeckoInstruction inst)
auto WB = gpr.GetScopedReg();
ARM64Reg XB = EncodeRegTo64(WB);
// creqv or crnand or crnor
bool negateA = inst.SUBOP10 == 289 || inst.SUBOP10 == 225 || inst.SUBOP10 == 33;
// crandc or crorc or crnand or crnor
bool negateB =
inst.SUBOP10 == 129 || inst.SUBOP10 == 417 || inst.SUBOP10 == 225 || inst.SUBOP10 == 33;
// GetCRFieldBit
for (int i = 0; i < 2; i++)
{
int field = i ? inst.CRBB >> 2 : inst.CRBA >> 2;
int bit = i ? 3 - (inst.CRBB & 3) : 3 - (inst.CRBA & 3);
ARM64Reg out = i ? XB : XA;
bool negate = i ? negateB : negateA;
ARM64Reg XC = gpr.CR(field);
ARM64Reg WC = EncodeRegTo32(XC);
switch (bit)
{
case PowerPC::CR_SO_BIT: // check bit 59 set
UBFX(out, XC, PowerPC::CR_EMU_SO_BIT, 1);
if (negate)
EOR(out, out, LogicalImm(1, GPRSize::B64));
break;
case PowerPC::CR_EQ_BIT: // check bits 31-0 == 0
CMP(WC, ARM64Reg::WZR);
CSET(out, negate ? CC_NEQ : CC_EQ);
break;
case PowerPC::CR_GT_BIT: // check val > 0
CMP(XC, ARM64Reg::ZR);
CSET(out, negate ? CC_LE : CC_GT);
break;
case PowerPC::CR_LT_BIT: // check bit 62 set
UBFX(out, XC, PowerPC::CR_EMU_LT_BIT, 1);
if (negate)
EOR(out, out, LogicalImm(1, GPRSize::B64));
break;
default:
ASSERT_MSG(DYNA_REC, false, "Invalid CR bit");
}
}
GetCRFieldBit(inst.CRBA >> 2, 3 - (inst.CRBA & 3), XA);
GetCRFieldBit(inst.CRBB >> 2, 3 - (inst.CRBB & 3), XB);
// Compute combined bit
switch (inst.SUBOP10)
{
case 33: // crnor: ~(A || B) == (~A && ~B)
case 129: // crandc: A && ~B
case 225: // crnand: ~(A && B)
case 257: // crand: A && B
AND(XA, XA, XB);
break;
case 129: // crandc: A && ~B
BIC(XA, XA, XB);
break;
case 193: // crxor: A ^ B
case 289: // creqv: ~(A ^ B) = ~A ^ B
EOR(XA, XA, XB);
break;
case 225: // crnand: ~(A && B) == (~A || ~B)
case 417: // crorc: A || ~B
case 289: // creqv: ~(A ^ B) = A ^ ~B
EON(XA, XA, XB);
break;
case 33: // crnor: ~(A || B)
case 449: // cror: A || B
ORR(XA, XA, XB);
break;
case 417: // crorc: A || ~B
ORN(XA, XA, XB);
break;
}
}
// Store result bit in CRBD
int field = inst.CRBD >> 2;
int bit = 3 - (inst.CRBD & 3);
gpr.BindCRToRegister(field, true);
ARM64Reg CR = gpr.CR(field);
if (bit != PowerPC::CR_GT_BIT)
FixGTBeforeSettingCRFieldBit(CR);
switch (bit)
{
case PowerPC::CR_SO_BIT: // set bit 59 to input
BFI(CR, XA, PowerPC::CR_EMU_SO_BIT, 1);
break;
case PowerPC::CR_EQ_BIT: // clear low 32 bits, set bit 0 to !input
AND(CR, CR, LogicalImm(0xFFFF'FFFF'0000'0000, GPRSize::B64));
EOR(XA, XA, LogicalImm(1, GPRSize::B64));
ORR(CR, CR, XA);
break;
case PowerPC::CR_GT_BIT: // set bit 63 to !input
EOR(XA, XA, LogicalImm(1, GPRSize::B64));
BFI(CR, XA, 63, 1);
break;
case PowerPC::CR_LT_BIT: // set bit 62 to input
BFI(CR, XA, PowerPC::CR_EMU_LT_BIT, 1);
break;
}
ORR(CR, CR, LogicalImm(1ULL << 32, GPRSize::B64));
SetCRFieldBit(inst.CRBD >> 2, 3 - (inst.CRBD & 3), XA, negate_result);
}
void JitArm64::mfcr(UGeckoInstruction inst)