diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp index 4f66b94702..8712fdd2a2 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp @@ -565,8 +565,6 @@ void JitArm64::fctiwx(UGeckoInstruction inst) if (single) { - const auto V0 = fpr.GetScopedReg(); - if (is_fctiwzx) { m_float_emit.FCVTS(EncodeRegToSingle(VD), EncodeRegToSingle(VB), RoundingMode::Z); @@ -577,11 +575,8 @@ void JitArm64::fctiwx(UGeckoInstruction inst) m_float_emit.FCVTS(EncodeRegToSingle(VD), EncodeRegToSingle(VD), RoundingMode::Z); } - // Generate 0xFFF8'0000'0000'0000ULL - m_float_emit.MOVI(64, EncodeRegToDouble(V0), 0xFFFF'0000'0000'0000ULL); - m_float_emit.BIC(16, EncodeRegToDouble(V0), 0x7); - - m_float_emit.ORR(EncodeRegToDouble(VD), EncodeRegToDouble(VD), EncodeRegToDouble(V0)); + m_float_emit.ORR(EncodeRegToDouble(VD), EncodeRegToDouble(VD), + EncodeRegToDouble(FPR_CONSTANT_FFF8_0000_0000_0000)); } else { @@ -796,9 +791,8 @@ void JitArm64::ConvertSingleToDoublePair(size_t guest_reg, ARM64Reg dest_reg, AR { // Set each 32-bit element of scratch_reg to 0x0000'0000 or 0xFFFF'FFFF depending on whether // the absolute value of the corresponding element in src_reg compares greater than 0 - m_float_emit.MOVI(64, EncodeRegToDouble(scratch_reg), 0); m_float_emit.FACGT(32, EncodeRegToDouble(scratch_reg), EncodeRegToDouble(src_reg), - EncodeRegToDouble(scratch_reg)); + EncodeRegToDouble(FPR_CONSTANT_0000_0000_0000_0000)); // 0x0000'0000'0000'0000 (zero) -> 0x0000'0000'0000'0000 (zero) // 0x0000'0000'FFFF'FFFF (denormal) -> 0xFF00'0000'FFFF'FFFF (normal) diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStorePaired.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStorePaired.cpp index eb8b4d015c..4fee7f3454 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStorePaired.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStorePaired.cpp @@ -120,8 +120,8 @@ void JitArm64::psq_lXX(UGeckoInstruction inst) if (w) { - m_float_emit.FMOV(ARM64Reg::S0, 0x70); // 1.0 as a Single - m_float_emit.INS(32, VS, 1, ARM64Reg::Q0, 0); + // Set ps1 to 1.0 + m_float_emit.INS(32, VS, 1, FPR_CONSTANT_0000_0000_3F80_0000, 0); } const ARM64Reg VS_again = fpr.RW(inst.RS, RegType::Single, true); diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.cpp index b93b30153f..236f95dbed 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.cpp @@ -726,9 +726,6 @@ void Arm64FPRCache::GetAllocationOrder() ARM64Reg::Q10, ARM64Reg::Q11, ARM64Reg::Q12, - ARM64Reg::Q13, - ARM64Reg::Q14, - ARM64Reg::Q15, // Caller saved ARM64Reg::Q16, diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h b/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h index 159cfb6836..3534d335a7 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h @@ -25,6 +25,11 @@ constexpr Arm64Gen::ARM64Reg PPC_REG = Arm64Gen::ARM64Reg::X29; // PC register when calling the dispatcher constexpr Arm64Gen::ARM64Reg DISPATCHER_PC = Arm64Gen::ARM64Reg::W26; +// FPR constants +constexpr Arm64Gen::ARM64Reg FPR_CONSTANT_0000_0000_0000_0000 = Arm64Gen::ARM64Reg::Q13; +constexpr Arm64Gen::ARM64Reg FPR_CONSTANT_0000_0000_3F80_0000 = Arm64Gen::ARM64Reg::Q14; +constexpr Arm64Gen::ARM64Reg FPR_CONSTANT_FFF8_0000_0000_0000 = Arm64Gen::ARM64Reg::Q15; + #ifdef __GNUC__ #define PPCSTATE_OFF(elem) \ ([]() consteval { \ diff --git a/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp b/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp index 56c26739a3..063c10b380 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp @@ -44,6 +44,12 @@ void JitArm64::GenerateAsm() ABI_PushRegisters(regs_to_save); m_float_emit.ABI_PushRegisters(regs_to_save_fpr, ARM64Reg::X8); + // Generate FPR constants + m_float_emit.MOVI(8, EncodeRegToDouble(FPR_CONSTANT_0000_0000_0000_0000), 0); + m_float_emit.FMOV(EncodeRegToSingle(FPR_CONSTANT_0000_0000_3F80_0000), 0x70); + MOVI2R(ARM64Reg::X30, 0xFFF8'0000'0000'0000ULL); + m_float_emit.FMOV(EncodeRegToDouble(FPR_CONSTANT_FFF8_0000_0000_0000), ARM64Reg::X30); + MOVP2R(PPC_REG, &m_ppc_state); // Store the stack pointer, so we can reset it if the BLR optimization fails.