From 4a29e0e4f45bd1d6c8bfa2fbed22a0ae82cb29a1 Mon Sep 17 00:00:00 2001 From: Sintendo <3380580+Sintendo@users.noreply.github.com> Date: Sat, 1 Feb 2025 16:50:08 +0100 Subject: [PATCH 1/8] JitArm64_Integer: cmp - Subtract 12-bit constant You can encode a 12-bit immediate in a SUB instruction on ARM64. Constants in this range do not need to be sign extended, so we can exploit this to avoid materializing the immediate. This approach saves an instruction if it does not need to be materialized in a register afterwards. Otherwise, we just materialize it later and the total number of instructions stays the same. Before: 0x52800416 mov w22, #0x20 ; =32 0x93407f78 sxtw x24, w27 0xcb36c318 sub x24, x24, w22, sxtw After: 0x93407f78 sxtw x24, w27 0xd1008318 sub x24, x24, #0x20 --- Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp index 16cf25fdef..370ad619c5 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp @@ -652,9 +652,11 @@ void JitArm64::cmp(UGeckoInstruction inst) SXTW(CR, gpr.R(b)); MVN(CR, CR); } - else if (gpr.IsImm(b) && !gpr.GetImm(b)) + else if (gpr.IsImm(b) && (gpr.GetImm(b) & 0xFFF) == gpr.GetImm(b)) { SXTW(CR, gpr.R(a)); + if (const u32 imm = gpr.GetImm(b); imm != 0) + SUB(CR, CR, imm); } else { From 352cbc4772378d5048a2d8f80f5b5938f6946c5e Mon Sep 17 00:00:00 2001 From: Sintendo <3380580+Sintendo@users.noreply.github.com> Date: Sat, 1 Feb 2025 16:57:40 +0100 Subject: [PATCH 2/8] JitArm64_Integer: cmp - Subtract shifted 12-bit constant You can encode a shifted 12-bit immediate in a SUB instruction on ARM64. Constants in this range do not need to be sign extended, so we can exploit this to avoid materializing the immediate. This approach saves an instruction if it does not need to be materialized in a register afterwards. Otherwise, we just materialize it later and the total number of instructions stays the same. Before: 0x52a00099 mov w25, #0x40000 ; =262144 0x93407f7a sxtw x26, w27 0xcb39c35a sub x26, x26, w25, sxtw After: 0x93407f7a sxtw x26, w27 0xd141035a sub x26, x26, #0x40, lsl #12 ; =0x40000 --- Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp index 370ad619c5..3a56e58b84 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp @@ -658,6 +658,11 @@ void JitArm64::cmp(UGeckoInstruction inst) if (const u32 imm = gpr.GetImm(b); imm != 0) SUB(CR, CR, imm); } + else if (gpr.IsImm(b) && (gpr.GetImm(b) & 0xFFF000) == gpr.GetImm(b)) + { + SXTW(CR, gpr.R(a)); + SUB(CR, CR, gpr.GetImm(b) >> 12, true); + } else { ARM64Reg RA = gpr.R(a); From 01eed0a7585fbd3031e850ee8f14778e9e46a0e4 Mon Sep 17 00:00:00 2001 From: Sintendo <3380580+Sintendo@users.noreply.github.com> Date: Sat, 1 Feb 2025 17:02:10 +0100 Subject: [PATCH 3/8] JitArm64_Integer: cmp - Add 12-bit constant You can encode a 12-bit immediate in an ADD instruction on ARM64. If the negated constant fits in this range, we can exploit this to avoid materializing the immediate. This approach saves an instruction if it does not need to be materialized in a register afterwards. Otherwise, we just materialize it later and the total number of instructions stays the same. Before: 0x12800019 mov w25, #-0x1 ; =-1 0x93407f5b sxtw x27, w26 0xcb39c37b sub x27, x27, w25, sxtw After: 0x93407f5b sxtw x27, w26 0x9100077b add x27, x27, #0x1 --- Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp index 3a56e58b84..2175f9a219 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp @@ -663,6 +663,11 @@ void JitArm64::cmp(UGeckoInstruction inst) SXTW(CR, gpr.R(a)); SUB(CR, CR, gpr.GetImm(b) >> 12, true); } + else if (gpr.IsImm(b) && (((~gpr.GetImm(b) + 1) & 0xFFF) == (~gpr.GetImm(b) + 1))) + { + SXTW(CR, gpr.R(a)); + ADD(CR, CR, ~gpr.GetImm(b) + 1); + } else { ARM64Reg RA = gpr.R(a); From 075c35602fd3d03b0f44363f01c44ff9cd81faa6 Mon Sep 17 00:00:00 2001 From: Sintendo <3380580+Sintendo@users.noreply.github.com> Date: Sat, 1 Feb 2025 17:58:14 +0100 Subject: [PATCH 4/8] JitArm64_Integer: cmp - Add shifted 12-bit constant You can encode a shifted 12-bit immediate in an ADD instruction on ARM64. If the negated constant fits in this range, we can exploit this to avoid materializing the immediate. This approach saves an instruction if it does not need to be materialized in a register afterwards. Otherwise, we just materialize it later and the total number of instructions stays the same. Before: 0x52bff01a mov w26, #-0x800000 ; =-8388608 0x93407f1b sxtw x27, w24 0xcb3ac37b sub x27, x27, w26, sxtw After: 0x93407f1b sxtw x27, w24 0x9160037b add x27, x27, #0x800, lsl #12 ; =0x800000 --- Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp index 2175f9a219..242ed1eb42 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp @@ -668,6 +668,11 @@ void JitArm64::cmp(UGeckoInstruction inst) SXTW(CR, gpr.R(a)); ADD(CR, CR, ~gpr.GetImm(b) + 1); } + else if (gpr.IsImm(b) && (((~gpr.GetImm(b) + 1) & 0xFFF000) == (~gpr.GetImm(b) + 1))) + { + SXTW(CR, gpr.R(a)); + ADD(CR, CR, (~gpr.GetImm(b) + 1) >> 12, true); + } else { ARM64Reg RA = gpr.R(a); From c5870ed0c71d3821b792543631937110e2d27202 Mon Sep 17 00:00:00 2001 From: Sintendo <3380580+Sintendo@users.noreply.github.com> Date: Sat, 1 Feb 2025 18:18:32 +0100 Subject: [PATCH 5/8] JitArm64_Integer: cmp - Skip sign extension if possible While we cannot always avoid materializing immediates, we can still inspect the most significant bit and potentially skip sign extension. This can sometimes save an instruction. Before: 0x5280003a mov w26, #0x1 ; =1 0x93407f5b sxtw x27, w26 0xcb38c37b sub x27, x27, w24, sxtw After: 0x5280003a mov w26, #0x1 ; =1 0xcb38c35b sub x27, x26, w24, sxtw Before: 0x52a20018 mov w24, #0x10000000 ; =268435456 0x93407f79 sxtw x25, w27 0xcb38c339 sub x25, x25, w24, sxtw After: 0x52a20018 mov w24, #0x10000000 ; =268435456 0x93407f79 sxtw x25, w27 0xcb180339 sub x25, x25, x24 --- .../PowerPC/JitArm64/JitArm64_Integer.cpp | 28 +++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp index 242ed1eb42..518922343d 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp @@ -675,11 +675,35 @@ void JitArm64::cmp(UGeckoInstruction inst) } else { + // If we're dealing with immediates, check their most significant bit to + // see if we can skip sign extension. + const auto should_sign_extend = [&](u32 reg) -> bool { + return !gpr.IsImm(reg) || (gpr.GetImm(reg) & (1U << 31)); + }; + bool sign_extend_a = should_sign_extend(a); + bool sign_extend_b = should_sign_extend(b); + ARM64Reg RA = gpr.R(a); ARM64Reg RB = gpr.R(b); - SXTW(CR, RA); - SUB(CR, CR, RB, ArithOption(RB, ExtendSpecifier::SXTW)); + if (sign_extend_a) + { + SXTW(CR, RA); + RA = CR; + } + else + { + RA = EncodeRegTo64(RA); + } + + auto opt = ArithOption(RB, ExtendSpecifier::SXTW); + if (!sign_extend_b) + { + opt = ArithOption(CR, ShiftType::LSL, 0); + RB = EncodeRegTo64(RB); + } + + SUB(CR, RA, RB, opt); } } From b7c3f91643535b760a71de5b1faaae2b91ac7af2 Mon Sep 17 00:00:00 2001 From: Sintendo <3380580+Sintendo@users.noreply.github.com> Date: Sat, 1 Feb 2025 21:11:23 +0100 Subject: [PATCH 6/8] JitArm64_Integer: cmpl - Subtract 12-bit constant You can encode a 12-bit immediate in a SUB instruction on ARM64. We can exploit this to avoid materializing the immediate. This approach saves an instruction if it does not need to be materialized in a register afterwards. Otherwise, we just materialize it later and the total number of instructions stays the same. Before: 0x5280003a mov w26, #0x1 ; =1 0xcb1a033b sub x27, x25, x26 After: 0xd100073b sub x27, x25, #0x1 --- Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp index 518922343d..ef44a72efd 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp @@ -728,9 +728,13 @@ void JitArm64::cmpl(UGeckoInstruction inst) { NEG(CR, EncodeRegTo64(gpr.R(b))); } - else if (gpr.IsImm(b) && !gpr.GetImm(b)) + else if (gpr.IsImm(b) && (gpr.GetImm(b) & 0xFFF) == gpr.GetImm(b)) { - MOV(EncodeRegTo32(CR), gpr.R(a)); + const u32 imm = gpr.GetImm(b); + if (imm == 0) + MOV(EncodeRegTo32(CR), gpr.R(a)); + else + SUB(CR, EncodeRegTo64(gpr.R(a)), imm); } else { From 7ce7da629e8059a7c819ea26436f4426648d22b5 Mon Sep 17 00:00:00 2001 From: Sintendo <3380580+Sintendo@users.noreply.github.com> Date: Sat, 1 Feb 2025 22:02:43 +0100 Subject: [PATCH 7/8] JitArm64_Integer: cmpl - Subtract shifted 12-bit constant You can encode a shifted 12-bit immediate in a SUB instruction on ARM64. We exploit this to avoid materializing the immediate. This approach saves an instruction if it does not need to be materialized in a register afterwards. Otherwise, we just materialize it later and the total number of instructions stays the same. Before: 0x52a00218 mov w24, #0x100000 ; =1048576 0xcb180379 sub x25, x27, x24 After: 0xd1440379 sub x25, x27, #0x100, lsl #12 ; =0x100000 --- Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp index ef44a72efd..58fab5cfea 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp @@ -736,6 +736,10 @@ void JitArm64::cmpl(UGeckoInstruction inst) else SUB(CR, EncodeRegTo64(gpr.R(a)), imm); } + else if (gpr.IsImm(b) && (gpr.GetImm(b) & 0xFFF000) == gpr.GetImm(b)) + { + SUB(CR, EncodeRegTo64(gpr.R(a)), gpr.GetImm(b) >> 12, true); + } else { SUB(CR, EncodeRegTo64(gpr.R(a)), EncodeRegTo64(gpr.R(b))); From 755c00326559904aee338d47a06409ec5b03b55e Mon Sep 17 00:00:00 2001 From: Sintendo <3380580+Sintendo@users.noreply.github.com> Date: Sat, 1 Feb 2025 22:05:24 +0100 Subject: [PATCH 8/8] JitArm64_RegCache: Const correctness Forgot this when I added it in #13120. --- Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h b/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h index b98e170531..5164745cd8 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h @@ -345,7 +345,7 @@ public: // Gets the immediate that a register is set to. Only valid for guest GPRs. u32 GetImm(size_t preg) const { return GetGuestGPROpArg(preg).GetImm(); } - bool IsImm(size_t preg, u32 imm) { return IsImm(preg) && GetImm(preg) == imm; } + bool IsImm(size_t preg, u32 imm) const { return IsImm(preg) && GetImm(preg) == imm; } // Binds a guest GPR to a host register, optionally loading its value. //