diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.h b/Source/Core/Core/PowerPC/JitArm64/Jit.h index e9abdbbc4b..9372fffd2d 100644 --- a/Source/Core/Core/PowerPC/JitArm64/Jit.h +++ b/Source/Core/Core/PowerPC/JitArm64/Jit.h @@ -332,6 +332,7 @@ protected: void (ARM64XEmitter::*op)(Arm64Gen::ARM64Reg, Arm64Gen::ARM64Reg, u64, Arm64Gen::ARM64Reg), bool Rc = false); + bool MultiplyImmediate(u32 imm, int a, int d, bool rc); void SetFPRFIfNeeded(bool single, Arm64Gen::ARM64Reg reg); void Force25BitPrecision(Arm64Gen::ARM64Reg output, Arm64Gen::ARM64Reg input); diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp index f315efb976..c1fddee166 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp @@ -7,6 +7,7 @@ #include "Common/Assert.h" #include "Common/BitUtils.h" #include "Common/CommonTypes.h" +#include "Common/MathUtil.h" #include "Core/Core.h" #include "Core/CoreTiming.h" @@ -883,6 +884,75 @@ void JitArm64::addic(UGeckoInstruction inst) } } +bool JitArm64::MultiplyImmediate(u32 imm, int a, int d, bool rc) +{ + if (imm == 0) + { + // Multiplication by zero (0). + gpr.SetImmediate(d, 0); + if (rc) + ComputeRC0(gpr.GetImm(d)); + } + else if (imm == 1) + { + // Multiplication by one (1). + if (d != a) + { + gpr.BindToRegister(d, false); + MOV(gpr.R(d), gpr.R(a)); + } + if (rc) + ComputeRC0(gpr.R(d)); + } + else if (MathUtil::IsPow2(imm)) + { + // Multiplication by a power of two (2^n). + const int shift = IntLog2(imm); + + gpr.BindToRegister(d, d == a); + LSL(gpr.R(d), gpr.R(a), shift); + if (rc) + ComputeRC0(gpr.R(d)); + } + else if (MathUtil::IsPow2(imm - 1)) + { + // Multiplication by a power of two plus one (2^n + 1). + const int shift = IntLog2(imm - 1); + + gpr.BindToRegister(d, d == a); + ADD(gpr.R(d), gpr.R(a), gpr.R(a), ArithOption(gpr.R(a), ShiftType::LSL, shift)); + if (rc) + ComputeRC0(gpr.R(d)); + } + else if (MathUtil::IsPow2(~imm + 1)) + { + // Multiplication by a negative power of two (-(2^n)). + const int shift = IntLog2(~imm + 1); + + gpr.BindToRegister(d, d == a); + NEG(gpr.R(d), gpr.R(a), ArithOption(gpr.R(a), ShiftType::LSL, shift)); + if (rc) + ComputeRC0(gpr.R(d)); + } + else if (MathUtil::IsPow2(~imm + 2)) + { + // Multiplication by a negative power of two plus one (-(2^n) + 1). + const int shift = IntLog2(~imm + 2); + + gpr.BindToRegister(d, d == a); + SUB(gpr.R(d), gpr.R(a), gpr.R(a), ArithOption(gpr.R(a), ShiftType::LSL, shift)); + if (rc) + ComputeRC0(gpr.R(d)); + } + else + { + // Immediate did not match any known special cases. + return false; + } + + return true; +} + void JitArm64::mulli(UGeckoInstruction inst) { INSTRUCTION_START @@ -895,13 +965,22 @@ void JitArm64::mulli(UGeckoInstruction inst) s32 i = (s32)gpr.GetImm(a); gpr.SetImmediate(d, i * inst.SIMM_16); } + else if (MultiplyImmediate((u32)(s32)inst.SIMM_16, a, d, false)) + { + // Code is generated inside MultiplyImmediate, nothing to be done here. + } else { - gpr.BindToRegister(d, d == a); - ARM64Reg WA = gpr.GetReg(); + const bool allocate_reg = d == a; + gpr.BindToRegister(d, allocate_reg); + + // Reuse d to hold the immediate if possible, allocate a register otherwise. + ARM64Reg WA = allocate_reg ? gpr.GetReg() : gpr.R(d); + MOVI2R(WA, (u32)(s32)inst.SIMM_16); MUL(gpr.R(d), gpr.R(a), WA); - gpr.Unlock(WA); + if (allocate_reg) + gpr.Unlock(WA); } } @@ -920,6 +999,11 @@ void JitArm64::mullwx(UGeckoInstruction inst) if (inst.Rc) ComputeRC0(gpr.GetImm(d)); } + else if ((gpr.IsImm(a) && MultiplyImmediate(gpr.GetImm(a), b, d, inst.Rc)) || + (gpr.IsImm(b) && MultiplyImmediate(gpr.GetImm(b), a, d, inst.Rc))) + { + // Code is generated inside MultiplyImmediate, nothing to be done here. + } else { gpr.BindToRegister(d, d == a || d == b);