JitArm64: Add negate parameter to SetCRFieldBit

Unlike on x64, inverting EQ or GT in SetCRFieldBit saves us one
instruction. Also unlike on x64, inverting SO or LT in GetCRFieldBit
requires an extra instruction (just like in SetCRFieldBit). Due to this,
replacing an invert in GetCRFieldBit with an invert in SetCRFieldBit
when possible is either equally good or better - never worse.
This commit is contained in:
JosJuice 2024-05-25 17:17:08 +02:00
parent 7fddd39d97
commit 9246bcad55
2 changed files with 24 additions and 19 deletions

View File

@ -356,7 +356,7 @@ protected:
void WriteBLRExit(Arm64Gen::ARM64Reg dest); void WriteBLRExit(Arm64Gen::ARM64Reg dest);
void GetCRFieldBit(int field, int bit, Arm64Gen::ARM64Reg out, bool negate = false); void GetCRFieldBit(int field, int bit, Arm64Gen::ARM64Reg out, bool negate = false);
void SetCRFieldBit(int field, int bit, Arm64Gen::ARM64Reg in); void SetCRFieldBit(int field, int bit, Arm64Gen::ARM64Reg in, bool negate = false);
void ClearCRFieldBit(int field, int bit); void ClearCRFieldBit(int field, int bit);
void SetCRFieldBit(int field, int bit); void SetCRFieldBit(int field, int bit);
void FixGTBeforeSettingCRFieldBit(Arm64Gen::ARM64Reg reg); void FixGTBeforeSettingCRFieldBit(Arm64Gen::ARM64Reg reg);

View File

@ -54,7 +54,7 @@ void JitArm64::GetCRFieldBit(int field, int bit, ARM64Reg out, bool negate)
} }
} }
void JitArm64::SetCRFieldBit(int field, int bit, ARM64Reg in) void JitArm64::SetCRFieldBit(int field, int bit, ARM64Reg in, bool negate)
{ {
gpr.BindCRToRegister(field, true); gpr.BindCRToRegister(field, true);
ARM64Reg CR = gpr.CR(field); ARM64Reg CR = gpr.CR(field);
@ -66,21 +66,27 @@ void JitArm64::SetCRFieldBit(int field, int bit, ARM64Reg in)
{ {
case PowerPC::CR_SO_BIT: // set bit 59 to input case PowerPC::CR_SO_BIT: // set bit 59 to input
BFI(CR, in, PowerPC::CR_EMU_SO_BIT, 1); BFI(CR, in, PowerPC::CR_EMU_SO_BIT, 1);
if (negate)
EOR(CR, CR, LogicalImm(1ULL << PowerPC::CR_EMU_SO_BIT, GPRSize::B64));
break; break;
case PowerPC::CR_EQ_BIT: // clear low 32 bits, set bit 0 to !input case PowerPC::CR_EQ_BIT: // clear low 32 bits, set bit 0 to !input
AND(CR, CR, LogicalImm(0xFFFF'FFFF'0000'0000, GPRSize::B64)); AND(CR, CR, LogicalImm(0xFFFF'FFFF'0000'0000, GPRSize::B64));
EOR(in, in, LogicalImm(1, GPRSize::B64));
ORR(CR, CR, in); ORR(CR, CR, in);
if (!negate)
EOR(CR, CR, LogicalImm(1ULL << 0, GPRSize::B64));
break; break;
case PowerPC::CR_GT_BIT: // set bit 63 to !input case PowerPC::CR_GT_BIT: // set bit 63 to !input
EOR(in, in, LogicalImm(1, GPRSize::B64));
BFI(CR, in, 63, 1); BFI(CR, in, 63, 1);
if (!negate)
EOR(CR, CR, LogicalImm(1ULL << 63, GPRSize::B64));
break; break;
case PowerPC::CR_LT_BIT: // set bit 62 to input case PowerPC::CR_LT_BIT: // set bit 62 to input
BFI(CR, in, PowerPC::CR_EMU_LT_BIT, 1); BFI(CR, in, PowerPC::CR_EMU_LT_BIT, 1);
if (negate)
EOR(CR, CR, LogicalImm(1ULL << PowerPC::CR_EMU_LT_BIT, GPRSize::B64));
break; break;
} }
@ -615,7 +621,7 @@ void JitArm64::crXXX(UGeckoInstruction inst)
auto WA = gpr.GetScopedReg(); auto WA = gpr.GetScopedReg();
ARM64Reg XA = EncodeRegTo64(WA); ARM64Reg XA = EncodeRegTo64(WA);
GetCRFieldBit(inst.CRBA >> 2, 3 - (inst.CRBA & 3), XA, false); GetCRFieldBit(inst.CRBA >> 2, 3 - (inst.CRBA & 3), XA, false);
SetCRFieldBit(inst.CRBD >> 2, 3 - (inst.CRBD & 3), XA); SetCRFieldBit(inst.CRBD >> 2, 3 - (inst.CRBD & 3), XA, false);
return; return;
} }
case 33: // crnor: ~(A || B) => ~A case 33: // crnor: ~(A || B) => ~A
@ -623,43 +629,42 @@ void JitArm64::crXXX(UGeckoInstruction inst)
{ {
auto WA = gpr.GetScopedReg(); auto WA = gpr.GetScopedReg();
ARM64Reg XA = EncodeRegTo64(WA); ARM64Reg XA = EncodeRegTo64(WA);
GetCRFieldBit(inst.CRBA >> 2, 3 - (inst.CRBA & 3), XA, true); GetCRFieldBit(inst.CRBA >> 2, 3 - (inst.CRBA & 3), XA, false);
SetCRFieldBit(inst.CRBD >> 2, 3 - (inst.CRBD & 3), XA); SetCRFieldBit(inst.CRBD >> 2, 3 - (inst.CRBD & 3), XA, true);
return; return;
} }
} }
} }
// crandc or crorc
const bool negate_b = inst.SUBOP10 == 129 || inst.SUBOP10 == 417;
// crnor or crnand or creqv
const bool negate_result = inst.SUBOP10 == 33 || inst.SUBOP10 == 225 || inst.SUBOP10 == 289;
auto WA = gpr.GetScopedReg(); auto WA = gpr.GetScopedReg();
ARM64Reg XA = EncodeRegTo64(WA); ARM64Reg XA = EncodeRegTo64(WA);
{ {
auto WB = gpr.GetScopedReg(); auto WB = gpr.GetScopedReg();
ARM64Reg XB = EncodeRegTo64(WB); ARM64Reg XB = EncodeRegTo64(WB);
// creqv or crnand or crnor GetCRFieldBit(inst.CRBA >> 2, 3 - (inst.CRBA & 3), XA, false);
bool negateA = inst.SUBOP10 == 289 || inst.SUBOP10 == 225 || inst.SUBOP10 == 33; GetCRFieldBit(inst.CRBB >> 2, 3 - (inst.CRBB & 3), XB, negate_b);
// crandc or crorc or crnand or crnor
bool negateB =
inst.SUBOP10 == 129 || inst.SUBOP10 == 417 || inst.SUBOP10 == 225 || inst.SUBOP10 == 33;
GetCRFieldBit(inst.CRBA >> 2, 3 - (inst.CRBA & 3), XA, negateA);
GetCRFieldBit(inst.CRBB >> 2, 3 - (inst.CRBB & 3), XB, negateB);
// Compute combined bit // Compute combined bit
switch (inst.SUBOP10) switch (inst.SUBOP10)
{ {
case 33: // crnor: ~(A || B) == (~A && ~B)
case 129: // crandc: A && ~B case 129: // crandc: A && ~B
case 225: // crnand: ~(A && B)
case 257: // crand: A && B case 257: // crand: A && B
AND(XA, XA, XB); AND(XA, XA, XB);
break; break;
case 193: // crxor: A ^ B case 193: // crxor: A ^ B
case 289: // creqv: ~(A ^ B) = ~A ^ B case 289: // creqv: ~(A ^ B)
EOR(XA, XA, XB); EOR(XA, XA, XB);
break; break;
case 225: // crnand: ~(A && B) == (~A || ~B) case 33: // crnor: ~(A || B)
case 417: // crorc: A || ~B case 417: // crorc: A || ~B
case 449: // cror: A || B case 449: // cror: A || B
ORR(XA, XA, XB); ORR(XA, XA, XB);
@ -668,7 +673,7 @@ void JitArm64::crXXX(UGeckoInstruction inst)
} }
// Store result bit in CRBD // Store result bit in CRBD
SetCRFieldBit(inst.CRBD >> 2, 3 - (inst.CRBD & 3), XA); SetCRFieldBit(inst.CRBD >> 2, 3 - (inst.CRBD & 3), XA, negate_result);
} }
void JitArm64::mfcr(UGeckoInstruction inst) void JitArm64::mfcr(UGeckoInstruction inst)