From 79f856a8d1e0f85a006313fca96c96298730214f Mon Sep 17 00:00:00 2001 From: JosJuice Date: Tue, 18 Oct 2022 22:05:22 +0200 Subject: [PATCH] JitArm64: Keep float constants in fixed registers PR 9768 was an attempt to make the register cache keep track of float constants so that we would only have to emit them once per block instead of once per use. However, the code of that PR was a bit complicated. This PR offers a simpler solution: Since there are only three constants we want to keep in registers, simply reserve three registers for them. This has the advantage of letting us keep the constants in registers across blocks, decreasing code size even further. The downside is that fewer registers are available for the register cache, but with how many registers AArch64 has, I think it's a good tradeoff. --- .../Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp | 12 +++--------- .../PowerPC/JitArm64/JitArm64_LoadStorePaired.cpp | 4 ++-- .../Core/Core/PowerPC/JitArm64/JitArm64_RegCache.cpp | 3 --- .../Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h | 5 +++++ Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp | 6 ++++++ 5 files changed, 16 insertions(+), 14 deletions(-) diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp index 4f66b94702..8712fdd2a2 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp @@ -565,8 +565,6 @@ void JitArm64::fctiwx(UGeckoInstruction inst) if (single) { - const auto V0 = fpr.GetScopedReg(); - if (is_fctiwzx) { m_float_emit.FCVTS(EncodeRegToSingle(VD), EncodeRegToSingle(VB), RoundingMode::Z); @@ -577,11 +575,8 @@ void JitArm64::fctiwx(UGeckoInstruction inst) m_float_emit.FCVTS(EncodeRegToSingle(VD), EncodeRegToSingle(VD), RoundingMode::Z); } - // Generate 0xFFF8'0000'0000'0000ULL - m_float_emit.MOVI(64, EncodeRegToDouble(V0), 0xFFFF'0000'0000'0000ULL); - m_float_emit.BIC(16, EncodeRegToDouble(V0), 0x7); - - m_float_emit.ORR(EncodeRegToDouble(VD), EncodeRegToDouble(VD), EncodeRegToDouble(V0)); + m_float_emit.ORR(EncodeRegToDouble(VD), EncodeRegToDouble(VD), + EncodeRegToDouble(FPR_CONSTANT_FFF8_0000_0000_0000)); } else { @@ -796,9 +791,8 @@ void JitArm64::ConvertSingleToDoublePair(size_t guest_reg, ARM64Reg dest_reg, AR { // Set each 32-bit element of scratch_reg to 0x0000'0000 or 0xFFFF'FFFF depending on whether // the absolute value of the corresponding element in src_reg compares greater than 0 - m_float_emit.MOVI(64, EncodeRegToDouble(scratch_reg), 0); m_float_emit.FACGT(32, EncodeRegToDouble(scratch_reg), EncodeRegToDouble(src_reg), - EncodeRegToDouble(scratch_reg)); + EncodeRegToDouble(FPR_CONSTANT_0000_0000_0000_0000)); // 0x0000'0000'0000'0000 (zero) -> 0x0000'0000'0000'0000 (zero) // 0x0000'0000'FFFF'FFFF (denormal) -> 0xFF00'0000'FFFF'FFFF (normal) diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStorePaired.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStorePaired.cpp index eb8b4d015c..4fee7f3454 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStorePaired.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStorePaired.cpp @@ -120,8 +120,8 @@ void JitArm64::psq_lXX(UGeckoInstruction inst) if (w) { - m_float_emit.FMOV(ARM64Reg::S0, 0x70); // 1.0 as a Single - m_float_emit.INS(32, VS, 1, ARM64Reg::Q0, 0); + // Set ps1 to 1.0 + m_float_emit.INS(32, VS, 1, FPR_CONSTANT_0000_0000_3F80_0000, 0); } const ARM64Reg VS_again = fpr.RW(inst.RS, RegType::Single, true); diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.cpp index b93b30153f..236f95dbed 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.cpp @@ -726,9 +726,6 @@ void Arm64FPRCache::GetAllocationOrder() ARM64Reg::Q10, ARM64Reg::Q11, ARM64Reg::Q12, - ARM64Reg::Q13, - ARM64Reg::Q14, - ARM64Reg::Q15, // Caller saved ARM64Reg::Q16, diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h b/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h index 159cfb6836..3534d335a7 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h @@ -25,6 +25,11 @@ constexpr Arm64Gen::ARM64Reg PPC_REG = Arm64Gen::ARM64Reg::X29; // PC register when calling the dispatcher constexpr Arm64Gen::ARM64Reg DISPATCHER_PC = Arm64Gen::ARM64Reg::W26; +// FPR constants +constexpr Arm64Gen::ARM64Reg FPR_CONSTANT_0000_0000_0000_0000 = Arm64Gen::ARM64Reg::Q13; +constexpr Arm64Gen::ARM64Reg FPR_CONSTANT_0000_0000_3F80_0000 = Arm64Gen::ARM64Reg::Q14; +constexpr Arm64Gen::ARM64Reg FPR_CONSTANT_FFF8_0000_0000_0000 = Arm64Gen::ARM64Reg::Q15; + #ifdef __GNUC__ #define PPCSTATE_OFF(elem) \ ([]() consteval { \ diff --git a/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp b/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp index 56c26739a3..063c10b380 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp @@ -44,6 +44,12 @@ void JitArm64::GenerateAsm() ABI_PushRegisters(regs_to_save); m_float_emit.ABI_PushRegisters(regs_to_save_fpr, ARM64Reg::X8); + // Generate FPR constants + m_float_emit.MOVI(8, EncodeRegToDouble(FPR_CONSTANT_0000_0000_0000_0000), 0); + m_float_emit.FMOV(EncodeRegToSingle(FPR_CONSTANT_0000_0000_3F80_0000), 0x70); + MOVI2R(ARM64Reg::X30, 0xFFF8'0000'0000'0000ULL); + m_float_emit.FMOV(EncodeRegToDouble(FPR_CONSTANT_FFF8_0000_0000_0000), ARM64Reg::X30); + MOVP2R(PPC_REG, &m_ppc_state); // Store the stack pointer, so we can reset it if the BLR optimization fails.