From bbe271eec6735632a35aca7a87f38ad9800ab15c Mon Sep 17 00:00:00 2001 From: JosJuice Date: Sat, 25 May 2024 08:57:35 +0200 Subject: [PATCH 1/5] JitArm64: Refactor CR bit manipulation code This brings JitArm64 more in line with Jit64, and makes the next commit easier to implement. No functional change. --- Source/Core/Core/PowerPC/JitArm64/Jit.h | 7 +- .../JitArm64/JitArm64_SystemRegisters.cpp | 279 +++++++++--------- 2 files changed, 147 insertions(+), 139 deletions(-) diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.h b/Source/Core/Core/PowerPC/JitArm64/Jit.h index 0c1ea0d647..6cfbdb6a47 100644 --- a/Source/Core/Core/PowerPC/JitArm64/Jit.h +++ b/Source/Core/Core/PowerPC/JitArm64/Jit.h @@ -355,8 +355,13 @@ protected: Arm64Gen::ARM64Reg exit_address_after_return_reg = Arm64Gen::ARM64Reg::INVALID_REG); void WriteBLRExit(Arm64Gen::ARM64Reg dest); - Arm64Gen::FixupBranch JumpIfCRFieldBit(int field, int bit, bool jump_if_set); + void GetCRFieldBit(int field, int bit, Arm64Gen::ARM64Reg out, bool negate = false); + void SetCRFieldBit(int field, int bit, Arm64Gen::ARM64Reg in); + void ClearCRFieldBit(int field, int bit); + void SetCRFieldBit(int field, int bit); void FixGTBeforeSettingCRFieldBit(Arm64Gen::ARM64Reg reg); + Arm64Gen::FixupBranch JumpIfCRFieldBit(int field, int bit, bool jump_if_set); + void UpdateFPExceptionSummary(Arm64Gen::ARM64Reg fpscr); void UpdateRoundingMode(); diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp index df4ea4931a..666103467c 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp @@ -20,6 +20,142 @@ using namespace Arm64Gen; +void JitArm64::GetCRFieldBit(int field, int bit, ARM64Reg out, bool negate) +{ + ARM64Reg CR = gpr.CR(field); + ARM64Reg WCR = EncodeRegTo32(CR); + + switch (bit) + { + case PowerPC::CR_SO_BIT: // check bit 59 set + UBFX(out, CR, PowerPC::CR_EMU_SO_BIT, 1); + if (negate) + EOR(out, out, LogicalImm(1, GPRSize::B64)); + break; + + case PowerPC::CR_EQ_BIT: // check bits 31-0 == 0 + CMP(WCR, ARM64Reg::WZR); + CSET(out, negate ? CC_NEQ : CC_EQ); + break; + + case PowerPC::CR_GT_BIT: // check val > 0 + CMP(CR, ARM64Reg::ZR); + CSET(out, negate ? CC_LE : CC_GT); + break; + + case PowerPC::CR_LT_BIT: // check bit 62 set + UBFX(out, CR, PowerPC::CR_EMU_LT_BIT, 1); + if (negate) + EOR(out, out, LogicalImm(1, GPRSize::B64)); + break; + + default: + ASSERT_MSG(DYNA_REC, false, "Invalid CR bit"); + } +} + +void JitArm64::SetCRFieldBit(int field, int bit, ARM64Reg in) +{ + gpr.BindCRToRegister(field, true); + ARM64Reg CR = gpr.CR(field); + + if (bit != PowerPC::CR_GT_BIT) + FixGTBeforeSettingCRFieldBit(CR); + + switch (bit) + { + case PowerPC::CR_SO_BIT: // set bit 59 to input + BFI(CR, in, PowerPC::CR_EMU_SO_BIT, 1); + break; + + case PowerPC::CR_EQ_BIT: // clear low 32 bits, set bit 0 to !input + AND(CR, CR, LogicalImm(0xFFFF'FFFF'0000'0000, GPRSize::B64)); + EOR(in, in, LogicalImm(1, GPRSize::B64)); + ORR(CR, CR, in); + break; + + case PowerPC::CR_GT_BIT: // set bit 63 to !input + EOR(in, in, LogicalImm(1, GPRSize::B64)); + BFI(CR, in, 63, 1); + break; + + case PowerPC::CR_LT_BIT: // set bit 62 to input + BFI(CR, in, PowerPC::CR_EMU_LT_BIT, 1); + break; + } + + ORR(CR, CR, LogicalImm(1ULL << 32, GPRSize::B64)); +} + +void JitArm64::ClearCRFieldBit(int field, int bit) +{ + gpr.BindCRToRegister(field, true); + ARM64Reg XA = gpr.CR(field); + + switch (bit) + { + case PowerPC::CR_SO_BIT: + AND(XA, XA, LogicalImm(~(u64(1) << PowerPC::CR_EMU_SO_BIT), GPRSize::B64)); + break; + + case PowerPC::CR_EQ_BIT: + FixGTBeforeSettingCRFieldBit(XA); + ORR(XA, XA, LogicalImm(1, GPRSize::B64)); + break; + + case PowerPC::CR_GT_BIT: + ORR(XA, XA, LogicalImm(u64(1) << 63, GPRSize::B64)); + break; + + case PowerPC::CR_LT_BIT: + AND(XA, XA, LogicalImm(~(u64(1) << PowerPC::CR_EMU_LT_BIT), GPRSize::B64)); + break; + } +} + +void JitArm64::SetCRFieldBit(int field, int bit) +{ + gpr.BindCRToRegister(field, true); + ARM64Reg XA = gpr.CR(field); + + if (bit != PowerPC::CR_GT_BIT) + FixGTBeforeSettingCRFieldBit(XA); + + switch (bit) + { + case PowerPC::CR_SO_BIT: + ORR(XA, XA, LogicalImm(u64(1) << PowerPC::CR_EMU_SO_BIT, GPRSize::B64)); + break; + + case PowerPC::CR_EQ_BIT: + AND(XA, XA, LogicalImm(0xFFFF'FFFF'0000'0000, GPRSize::B64)); + break; + + case PowerPC::CR_GT_BIT: + AND(XA, XA, LogicalImm(~(u64(1) << 63), GPRSize::B64)); + break; + + case PowerPC::CR_LT_BIT: + ORR(XA, XA, LogicalImm(u64(1) << PowerPC::CR_EMU_LT_BIT, GPRSize::B64)); + break; + } + + ORR(XA, XA, LogicalImm(u64(1) << 32, GPRSize::B64)); +} + +void JitArm64::FixGTBeforeSettingCRFieldBit(ARM64Reg reg) +{ + // GT is considered unset if the internal representation is <= 0, or in other words, + // if the internal representation either has bit 63 set or has all bits set to zero. + // If all bits are zero and we set some bit that's unrelated to GT, we need to set bit 63 so GT + // doesn't accidentally become considered set. Gross but necessary; this can break actual games. + auto WA = gpr.GetScopedReg(); + ARM64Reg XA = EncodeRegTo64(WA); + ORR(XA, reg, LogicalImm(1ULL << 63, GPRSize::B64)); + CMP(reg, ARM64Reg::ZR); + CSEL(reg, reg, XA, CC_NEQ); +} + FixupBranch JitArm64::JumpIfCRFieldBit(int field, int bit, bool jump_if_set) { ARM64Reg XA = gpr.CR(field); @@ -42,19 +178,6 @@ FixupBranch JitArm64::JumpIfCRFieldBit(int field, int bit, bool jump_if_set) } } -void JitArm64::FixGTBeforeSettingCRFieldBit(Arm64Gen::ARM64Reg reg) -{ - // GT is considered unset if the internal representation is <= 0, or in other words, - // if the internal representation either has bit 63 set or has all bits set to zero. - // If all bits are zero and we set some bit that's unrelated to GT, we need to set bit 63 so GT - // doesn't accidentally become considered set. Gross but necessary; this can break actual games. - auto WA = gpr.GetScopedReg(); - ARM64Reg XA = EncodeRegTo64(WA); - ORR(XA, reg, LogicalImm(1ULL << 63, GPRSize::B64)); - CMP(reg, ARM64Reg::ZR); - CSEL(reg, reg, XA, CC_NEQ); -} - void JitArm64::UpdateFPExceptionSummary(ARM64Reg fpscr) { auto WA = gpr.GetScopedReg(); @@ -471,67 +594,14 @@ void JitArm64::crXXX(UGeckoInstruction inst) // Special case: crclr if (inst.CRBA == inst.CRBB && inst.CRBA == inst.CRBD && inst.SUBOP10 == 193) { - // Clear CR field bit - int field = inst.CRBD >> 2; - int bit = 3 - (inst.CRBD & 3); - - gpr.BindCRToRegister(field, true); - ARM64Reg XA = gpr.CR(field); - switch (bit) - { - case PowerPC::CR_SO_BIT: - AND(XA, XA, LogicalImm(~(u64(1) << PowerPC::CR_EMU_SO_BIT), GPRSize::B64)); - break; - - case PowerPC::CR_EQ_BIT: - FixGTBeforeSettingCRFieldBit(XA); - ORR(XA, XA, LogicalImm(1, GPRSize::B64)); - break; - - case PowerPC::CR_GT_BIT: - ORR(XA, XA, LogicalImm(u64(1) << 63, GPRSize::B64)); - break; - - case PowerPC::CR_LT_BIT: - AND(XA, XA, LogicalImm(~(u64(1) << PowerPC::CR_EMU_LT_BIT), GPRSize::B64)); - break; - } + ClearCRFieldBit(inst.CRBD >> 2, 3 - (inst.CRBD & 3)); return; } // Special case: crset if (inst.CRBA == inst.CRBB && inst.CRBA == inst.CRBD && inst.SUBOP10 == 289) { - // SetCRFieldBit - int field = inst.CRBD >> 2; - int bit = 3 - (inst.CRBD & 3); - - gpr.BindCRToRegister(field, true); - ARM64Reg XA = gpr.CR(field); - - if (bit != PowerPC::CR_GT_BIT) - FixGTBeforeSettingCRFieldBit(XA); - - switch (bit) - { - case PowerPC::CR_SO_BIT: - ORR(XA, XA, LogicalImm(u64(1) << PowerPC::CR_EMU_SO_BIT, GPRSize::B64)); - break; - - case PowerPC::CR_EQ_BIT: - AND(XA, XA, LogicalImm(0xFFFF'FFFF'0000'0000, GPRSize::B64)); - break; - - case PowerPC::CR_GT_BIT: - AND(XA, XA, LogicalImm(~(u64(1) << 63), GPRSize::B64)); - break; - - case PowerPC::CR_LT_BIT: - ORR(XA, XA, LogicalImm(u64(1) << PowerPC::CR_EMU_LT_BIT, GPRSize::B64)); - break; - } - - ORR(XA, XA, LogicalImm(u64(1) << 32, GPRSize::B64)); + SetCRFieldBit(inst.CRBD >> 2, 3 - (inst.CRBD & 3)); return; } @@ -547,44 +617,8 @@ void JitArm64::crXXX(UGeckoInstruction inst) bool negateB = inst.SUBOP10 == 129 || inst.SUBOP10 == 417 || inst.SUBOP10 == 225 || inst.SUBOP10 == 33; - // GetCRFieldBit - for (int i = 0; i < 2; i++) - { - int field = i ? inst.CRBB >> 2 : inst.CRBA >> 2; - int bit = i ? 3 - (inst.CRBB & 3) : 3 - (inst.CRBA & 3); - ARM64Reg out = i ? XB : XA; - bool negate = i ? negateB : negateA; - - ARM64Reg XC = gpr.CR(field); - ARM64Reg WC = EncodeRegTo32(XC); - switch (bit) - { - case PowerPC::CR_SO_BIT: // check bit 59 set - UBFX(out, XC, PowerPC::CR_EMU_SO_BIT, 1); - if (negate) - EOR(out, out, LogicalImm(1, GPRSize::B64)); - break; - - case PowerPC::CR_EQ_BIT: // check bits 31-0 == 0 - CMP(WC, ARM64Reg::WZR); - CSET(out, negate ? CC_NEQ : CC_EQ); - break; - - case PowerPC::CR_GT_BIT: // check val > 0 - CMP(XC, ARM64Reg::ZR); - CSET(out, negate ? CC_LE : CC_GT); - break; - - case PowerPC::CR_LT_BIT: // check bit 62 set - UBFX(out, XC, PowerPC::CR_EMU_LT_BIT, 1); - if (negate) - EOR(out, out, LogicalImm(1, GPRSize::B64)); - break; - - default: - ASSERT_MSG(DYNA_REC, false, "Invalid CR bit"); - } - } + GetCRFieldBit(inst.CRBA >> 2, 3 - (inst.CRBA & 3), XA, negateA); + GetCRFieldBit(inst.CRBB >> 2, 3 - (inst.CRBB & 3), XB, negateB); // Compute combined bit switch (inst.SUBOP10) @@ -609,38 +643,7 @@ void JitArm64::crXXX(UGeckoInstruction inst) } // Store result bit in CRBD - int field = inst.CRBD >> 2; - int bit = 3 - (inst.CRBD & 3); - - gpr.BindCRToRegister(field, true); - ARM64Reg CR = gpr.CR(field); - - if (bit != PowerPC::CR_GT_BIT) - FixGTBeforeSettingCRFieldBit(CR); - - switch (bit) - { - case PowerPC::CR_SO_BIT: // set bit 59 to input - BFI(CR, XA, PowerPC::CR_EMU_SO_BIT, 1); - break; - - case PowerPC::CR_EQ_BIT: // clear low 32 bits, set bit 0 to !input - AND(CR, CR, LogicalImm(0xFFFF'FFFF'0000'0000, GPRSize::B64)); - EOR(XA, XA, LogicalImm(1, GPRSize::B64)); - ORR(CR, CR, XA); - break; - - case PowerPC::CR_GT_BIT: // set bit 63 to !input - EOR(XA, XA, LogicalImm(1, GPRSize::B64)); - BFI(CR, XA, 63, 1); - break; - - case PowerPC::CR_LT_BIT: // set bit 62 to input - BFI(CR, XA, PowerPC::CR_EMU_LT_BIT, 1); - break; - } - - ORR(CR, CR, LogicalImm(1ULL << 32, GPRSize::B64)); + SetCRFieldBit(inst.CRBD >> 2, 3 - (inst.CRBD & 3), XA); } void JitArm64::mfcr(UGeckoInstruction inst) From 7fddd39d97d529bb8e2f551ea01392e3650b58ef Mon Sep 17 00:00:00 2001 From: JosJuice Date: Sat, 25 May 2024 16:23:37 +0200 Subject: [PATCH 2/5] JitArm64: Port some crXXX optimizations from Jit64 --- .../JitArm64/JitArm64_SystemRegisters.cpp | 47 ++++++++++++++----- 1 file changed, 36 insertions(+), 11 deletions(-) diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp index 666103467c..7f9cf8fbd4 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp @@ -591,18 +591,43 @@ void JitArm64::crXXX(UGeckoInstruction inst) INSTRUCTION_START JITDISABLE(bJITSystemRegistersOff); - // Special case: crclr - if (inst.CRBA == inst.CRBB && inst.CRBA == inst.CRBD && inst.SUBOP10 == 193) + if (inst.CRBA == inst.CRBB) { - ClearCRFieldBit(inst.CRBD >> 2, 3 - (inst.CRBD & 3)); - return; - } - - // Special case: crset - if (inst.CRBA == inst.CRBB && inst.CRBA == inst.CRBD && inst.SUBOP10 == 289) - { - SetCRFieldBit(inst.CRBD >> 2, 3 - (inst.CRBD & 3)); - return; + switch (inst.SUBOP10) + { + // crclr + case 129: // crandc: A && ~B => 0 + case 193: // crxor: A ^ B => 0 + { + ClearCRFieldBit(inst.CRBD >> 2, 3 - (inst.CRBD & 3)); + return; + } + // crset + case 289: // creqv: ~(A ^ B) => 1 + case 417: // crorc: A || ~B => 1 + { + SetCRFieldBit(inst.CRBD >> 2, 3 - (inst.CRBD & 3)); + return; + } + case 257: // crand: A && B => A + case 449: // cror: A || B => A + { + auto WA = gpr.GetScopedReg(); + ARM64Reg XA = EncodeRegTo64(WA); + GetCRFieldBit(inst.CRBA >> 2, 3 - (inst.CRBA & 3), XA, false); + SetCRFieldBit(inst.CRBD >> 2, 3 - (inst.CRBD & 3), XA); + return; + } + case 33: // crnor: ~(A || B) => ~A + case 225: // crnand: ~(A && B) => ~A + { + auto WA = gpr.GetScopedReg(); + ARM64Reg XA = EncodeRegTo64(WA); + GetCRFieldBit(inst.CRBA >> 2, 3 - (inst.CRBA & 3), XA, true); + SetCRFieldBit(inst.CRBD >> 2, 3 - (inst.CRBD & 3), XA); + return; + } + } } auto WA = gpr.GetScopedReg(); From 9246bcad556995a971a65151cdff7d5e9341493e Mon Sep 17 00:00:00 2001 From: JosJuice Date: Sat, 25 May 2024 17:17:08 +0200 Subject: [PATCH 3/5] JitArm64: Add negate parameter to SetCRFieldBit Unlike on x64, inverting EQ or GT in SetCRFieldBit saves us one instruction. Also unlike on x64, inverting SO or LT in GetCRFieldBit requires an extra instruction (just like in SetCRFieldBit). Due to this, replacing an invert in GetCRFieldBit with an invert in SetCRFieldBit when possible is either equally good or better - never worse. --- Source/Core/Core/PowerPC/JitArm64/Jit.h | 2 +- .../JitArm64/JitArm64_SystemRegisters.cpp | 41 +++++++++++-------- 2 files changed, 24 insertions(+), 19 deletions(-) diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.h b/Source/Core/Core/PowerPC/JitArm64/Jit.h index 6cfbdb6a47..07b3f95187 100644 --- a/Source/Core/Core/PowerPC/JitArm64/Jit.h +++ b/Source/Core/Core/PowerPC/JitArm64/Jit.h @@ -356,7 +356,7 @@ protected: void WriteBLRExit(Arm64Gen::ARM64Reg dest); void GetCRFieldBit(int field, int bit, Arm64Gen::ARM64Reg out, bool negate = false); - void SetCRFieldBit(int field, int bit, Arm64Gen::ARM64Reg in); + void SetCRFieldBit(int field, int bit, Arm64Gen::ARM64Reg in, bool negate = false); void ClearCRFieldBit(int field, int bit); void SetCRFieldBit(int field, int bit); void FixGTBeforeSettingCRFieldBit(Arm64Gen::ARM64Reg reg); diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp index 7f9cf8fbd4..7be7740d61 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp @@ -54,7 +54,7 @@ void JitArm64::GetCRFieldBit(int field, int bit, ARM64Reg out, bool negate) } } -void JitArm64::SetCRFieldBit(int field, int bit, ARM64Reg in) +void JitArm64::SetCRFieldBit(int field, int bit, ARM64Reg in, bool negate) { gpr.BindCRToRegister(field, true); ARM64Reg CR = gpr.CR(field); @@ -66,21 +66,27 @@ void JitArm64::SetCRFieldBit(int field, int bit, ARM64Reg in) { case PowerPC::CR_SO_BIT: // set bit 59 to input BFI(CR, in, PowerPC::CR_EMU_SO_BIT, 1); + if (negate) + EOR(CR, CR, LogicalImm(1ULL << PowerPC::CR_EMU_SO_BIT, GPRSize::B64)); break; case PowerPC::CR_EQ_BIT: // clear low 32 bits, set bit 0 to !input AND(CR, CR, LogicalImm(0xFFFF'FFFF'0000'0000, GPRSize::B64)); - EOR(in, in, LogicalImm(1, GPRSize::B64)); ORR(CR, CR, in); + if (!negate) + EOR(CR, CR, LogicalImm(1ULL << 0, GPRSize::B64)); break; case PowerPC::CR_GT_BIT: // set bit 63 to !input - EOR(in, in, LogicalImm(1, GPRSize::B64)); BFI(CR, in, 63, 1); + if (!negate) + EOR(CR, CR, LogicalImm(1ULL << 63, GPRSize::B64)); break; case PowerPC::CR_LT_BIT: // set bit 62 to input BFI(CR, in, PowerPC::CR_EMU_LT_BIT, 1); + if (negate) + EOR(CR, CR, LogicalImm(1ULL << PowerPC::CR_EMU_LT_BIT, GPRSize::B64)); break; } @@ -615,7 +621,7 @@ void JitArm64::crXXX(UGeckoInstruction inst) auto WA = gpr.GetScopedReg(); ARM64Reg XA = EncodeRegTo64(WA); GetCRFieldBit(inst.CRBA >> 2, 3 - (inst.CRBA & 3), XA, false); - SetCRFieldBit(inst.CRBD >> 2, 3 - (inst.CRBD & 3), XA); + SetCRFieldBit(inst.CRBD >> 2, 3 - (inst.CRBD & 3), XA, false); return; } case 33: // crnor: ~(A || B) => ~A @@ -623,43 +629,42 @@ void JitArm64::crXXX(UGeckoInstruction inst) { auto WA = gpr.GetScopedReg(); ARM64Reg XA = EncodeRegTo64(WA); - GetCRFieldBit(inst.CRBA >> 2, 3 - (inst.CRBA & 3), XA, true); - SetCRFieldBit(inst.CRBD >> 2, 3 - (inst.CRBD & 3), XA); + GetCRFieldBit(inst.CRBA >> 2, 3 - (inst.CRBA & 3), XA, false); + SetCRFieldBit(inst.CRBD >> 2, 3 - (inst.CRBD & 3), XA, true); return; } } } + // crandc or crorc + const bool negate_b = inst.SUBOP10 == 129 || inst.SUBOP10 == 417; + // crnor or crnand or creqv + const bool negate_result = inst.SUBOP10 == 33 || inst.SUBOP10 == 225 || inst.SUBOP10 == 289; + auto WA = gpr.GetScopedReg(); ARM64Reg XA = EncodeRegTo64(WA); { auto WB = gpr.GetScopedReg(); ARM64Reg XB = EncodeRegTo64(WB); - // creqv or crnand or crnor - bool negateA = inst.SUBOP10 == 289 || inst.SUBOP10 == 225 || inst.SUBOP10 == 33; - // crandc or crorc or crnand or crnor - bool negateB = - inst.SUBOP10 == 129 || inst.SUBOP10 == 417 || inst.SUBOP10 == 225 || inst.SUBOP10 == 33; - - GetCRFieldBit(inst.CRBA >> 2, 3 - (inst.CRBA & 3), XA, negateA); - GetCRFieldBit(inst.CRBB >> 2, 3 - (inst.CRBB & 3), XB, negateB); + GetCRFieldBit(inst.CRBA >> 2, 3 - (inst.CRBA & 3), XA, false); + GetCRFieldBit(inst.CRBB >> 2, 3 - (inst.CRBB & 3), XB, negate_b); // Compute combined bit switch (inst.SUBOP10) { - case 33: // crnor: ~(A || B) == (~A && ~B) case 129: // crandc: A && ~B + case 225: // crnand: ~(A && B) case 257: // crand: A && B AND(XA, XA, XB); break; case 193: // crxor: A ^ B - case 289: // creqv: ~(A ^ B) = ~A ^ B + case 289: // creqv: ~(A ^ B) EOR(XA, XA, XB); break; - case 225: // crnand: ~(A && B) == (~A || ~B) + case 33: // crnor: ~(A || B) case 417: // crorc: A || ~B case 449: // cror: A || B ORR(XA, XA, XB); @@ -668,7 +673,7 @@ void JitArm64::crXXX(UGeckoInstruction inst) } // Store result bit in CRBD - SetCRFieldBit(inst.CRBD >> 2, 3 - (inst.CRBD & 3), XA); + SetCRFieldBit(inst.CRBD >> 2, 3 - (inst.CRBD & 3), XA, negate_result); } void JitArm64::mfcr(UGeckoInstruction inst) From 71e97665192ef45487262fcdd4df2286208e61cd Mon Sep 17 00:00:00 2001 From: JosJuice Date: Sat, 25 May 2024 17:35:42 +0200 Subject: [PATCH 4/5] JitArm64: Use BIC/EON/ORN in crXXX This lets us save an instruction in certain scenarios. --- .../JitArm64/JitArm64_SystemRegisters.cpp | 23 ++++++++++++------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp index 7be7740d61..91a048a353 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp @@ -636,10 +636,8 @@ void JitArm64::crXXX(UGeckoInstruction inst) } } - // crandc or crorc - const bool negate_b = inst.SUBOP10 == 129 || inst.SUBOP10 == 417; - // crnor or crnand or creqv - const bool negate_result = inst.SUBOP10 == 33 || inst.SUBOP10 == 225 || inst.SUBOP10 == 289; + // crnor or crnand + const bool negate_result = inst.SUBOP10 == 33 || inst.SUBOP10 == 225; auto WA = gpr.GetScopedReg(); ARM64Reg XA = EncodeRegTo64(WA); @@ -648,27 +646,36 @@ void JitArm64::crXXX(UGeckoInstruction inst) ARM64Reg XB = EncodeRegTo64(WB); GetCRFieldBit(inst.CRBA >> 2, 3 - (inst.CRBA & 3), XA, false); - GetCRFieldBit(inst.CRBB >> 2, 3 - (inst.CRBB & 3), XB, negate_b); + GetCRFieldBit(inst.CRBB >> 2, 3 - (inst.CRBB & 3), XB, false); // Compute combined bit switch (inst.SUBOP10) { - case 129: // crandc: A && ~B case 225: // crnand: ~(A && B) case 257: // crand: A && B AND(XA, XA, XB); break; + case 129: // crandc: A && ~B + BIC(XA, XA, XB); + break; + case 193: // crxor: A ^ B - case 289: // creqv: ~(A ^ B) EOR(XA, XA, XB); break; + case 289: // creqv: ~(A ^ B) = A ^ ~B + EON(XA, XA, XB); + break; + case 33: // crnor: ~(A || B) - case 417: // crorc: A || ~B case 449: // cror: A || B ORR(XA, XA, XB); break; + + case 417: // crorc: A || ~B + ORN(XA, XA, XB); + break; } } From 980a7263131b18f851d83a6ac72b4abe3bed831b Mon Sep 17 00:00:00 2001 From: JosJuice Date: Sat, 2 Nov 2024 10:33:04 +0100 Subject: [PATCH 5/5] JitArm64: Drop GetCRFieldBit's negate parameter No caller is using it anymore. --- Source/Core/Core/PowerPC/JitArm64/Jit.h | 2 +- .../JitArm64/JitArm64_SystemRegisters.cpp | 18 +++++++----------- 2 files changed, 8 insertions(+), 12 deletions(-) diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.h b/Source/Core/Core/PowerPC/JitArm64/Jit.h index 07b3f95187..3f9c471e11 100644 --- a/Source/Core/Core/PowerPC/JitArm64/Jit.h +++ b/Source/Core/Core/PowerPC/JitArm64/Jit.h @@ -355,7 +355,7 @@ protected: Arm64Gen::ARM64Reg exit_address_after_return_reg = Arm64Gen::ARM64Reg::INVALID_REG); void WriteBLRExit(Arm64Gen::ARM64Reg dest); - void GetCRFieldBit(int field, int bit, Arm64Gen::ARM64Reg out, bool negate = false); + void GetCRFieldBit(int field, int bit, Arm64Gen::ARM64Reg out); void SetCRFieldBit(int field, int bit, Arm64Gen::ARM64Reg in, bool negate = false); void ClearCRFieldBit(int field, int bit); void SetCRFieldBit(int field, int bit); diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp index 91a048a353..a0084953b9 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp @@ -20,7 +20,7 @@ using namespace Arm64Gen; -void JitArm64::GetCRFieldBit(int field, int bit, ARM64Reg out, bool negate) +void JitArm64::GetCRFieldBit(int field, int bit, ARM64Reg out) { ARM64Reg CR = gpr.CR(field); ARM64Reg WCR = EncodeRegTo32(CR); @@ -29,24 +29,20 @@ void JitArm64::GetCRFieldBit(int field, int bit, ARM64Reg out, bool negate) { case PowerPC::CR_SO_BIT: // check bit 59 set UBFX(out, CR, PowerPC::CR_EMU_SO_BIT, 1); - if (negate) - EOR(out, out, LogicalImm(1, GPRSize::B64)); break; case PowerPC::CR_EQ_BIT: // check bits 31-0 == 0 CMP(WCR, ARM64Reg::WZR); - CSET(out, negate ? CC_NEQ : CC_EQ); + CSET(out, CC_EQ); break; case PowerPC::CR_GT_BIT: // check val > 0 CMP(CR, ARM64Reg::ZR); - CSET(out, negate ? CC_LE : CC_GT); + CSET(out, CC_GT); break; case PowerPC::CR_LT_BIT: // check bit 62 set UBFX(out, CR, PowerPC::CR_EMU_LT_BIT, 1); - if (negate) - EOR(out, out, LogicalImm(1, GPRSize::B64)); break; default: @@ -620,7 +616,7 @@ void JitArm64::crXXX(UGeckoInstruction inst) { auto WA = gpr.GetScopedReg(); ARM64Reg XA = EncodeRegTo64(WA); - GetCRFieldBit(inst.CRBA >> 2, 3 - (inst.CRBA & 3), XA, false); + GetCRFieldBit(inst.CRBA >> 2, 3 - (inst.CRBA & 3), XA); SetCRFieldBit(inst.CRBD >> 2, 3 - (inst.CRBD & 3), XA, false); return; } @@ -629,7 +625,7 @@ void JitArm64::crXXX(UGeckoInstruction inst) { auto WA = gpr.GetScopedReg(); ARM64Reg XA = EncodeRegTo64(WA); - GetCRFieldBit(inst.CRBA >> 2, 3 - (inst.CRBA & 3), XA, false); + GetCRFieldBit(inst.CRBA >> 2, 3 - (inst.CRBA & 3), XA); SetCRFieldBit(inst.CRBD >> 2, 3 - (inst.CRBD & 3), XA, true); return; } @@ -645,8 +641,8 @@ void JitArm64::crXXX(UGeckoInstruction inst) auto WB = gpr.GetScopedReg(); ARM64Reg XB = EncodeRegTo64(WB); - GetCRFieldBit(inst.CRBA >> 2, 3 - (inst.CRBA & 3), XA, false); - GetCRFieldBit(inst.CRBB >> 2, 3 - (inst.CRBB & 3), XB, false); + GetCRFieldBit(inst.CRBA >> 2, 3 - (inst.CRBA & 3), XA); + GetCRFieldBit(inst.CRBB >> 2, 3 - (inst.CRBB & 3), XB); // Compute combined bit switch (inst.SUBOP10)