JitArm64: Keep float constants in fixed registers

PR 9768 was an attempt to make the register cache keep track of float
constants so that we would only have to emit them once per block instead
of once per use. However, the code of that PR was a bit complicated.

This PR offers a simpler solution: Since there are only three constants
we want to keep in registers, simply reserve three registers for them.
This has the advantage of letting us keep the constants in registers
across blocks, decreasing code size even further. The downside is that
fewer registers are available for the register cache, but with how many
registers AArch64 has, I think it's a good tradeoff.
This commit is contained in:
JosJuice 2022-10-18 22:05:22 +02:00
parent 54b37f6bc4
commit 79f856a8d1
5 changed files with 16 additions and 14 deletions

View File

@ -565,8 +565,6 @@ void JitArm64::fctiwx(UGeckoInstruction inst)
if (single)
{
const auto V0 = fpr.GetScopedReg();
if (is_fctiwzx)
{
m_float_emit.FCVTS(EncodeRegToSingle(VD), EncodeRegToSingle(VB), RoundingMode::Z);
@ -577,11 +575,8 @@ void JitArm64::fctiwx(UGeckoInstruction inst)
m_float_emit.FCVTS(EncodeRegToSingle(VD), EncodeRegToSingle(VD), RoundingMode::Z);
}
// Generate 0xFFF8'0000'0000'0000ULL
m_float_emit.MOVI(64, EncodeRegToDouble(V0), 0xFFFF'0000'0000'0000ULL);
m_float_emit.BIC(16, EncodeRegToDouble(V0), 0x7);
m_float_emit.ORR(EncodeRegToDouble(VD), EncodeRegToDouble(VD), EncodeRegToDouble(V0));
m_float_emit.ORR(EncodeRegToDouble(VD), EncodeRegToDouble(VD),
EncodeRegToDouble(FPR_CONSTANT_FFF8_0000_0000_0000));
}
else
{
@ -796,9 +791,8 @@ void JitArm64::ConvertSingleToDoublePair(size_t guest_reg, ARM64Reg dest_reg, AR
{
// Set each 32-bit element of scratch_reg to 0x0000'0000 or 0xFFFF'FFFF depending on whether
// the absolute value of the corresponding element in src_reg compares greater than 0
m_float_emit.MOVI(64, EncodeRegToDouble(scratch_reg), 0);
m_float_emit.FACGT(32, EncodeRegToDouble(scratch_reg), EncodeRegToDouble(src_reg),
EncodeRegToDouble(scratch_reg));
EncodeRegToDouble(FPR_CONSTANT_0000_0000_0000_0000));
// 0x0000'0000'0000'0000 (zero) -> 0x0000'0000'0000'0000 (zero)
// 0x0000'0000'FFFF'FFFF (denormal) -> 0xFF00'0000'FFFF'FFFF (normal)

View File

@ -120,8 +120,8 @@ void JitArm64::psq_lXX(UGeckoInstruction inst)
if (w)
{
m_float_emit.FMOV(ARM64Reg::S0, 0x70); // 1.0 as a Single
m_float_emit.INS(32, VS, 1, ARM64Reg::Q0, 0);
// Set ps1 to 1.0
m_float_emit.INS(32, VS, 1, FPR_CONSTANT_0000_0000_3F80_0000, 0);
}
const ARM64Reg VS_again = fpr.RW(inst.RS, RegType::Single, true);

View File

@ -726,9 +726,6 @@ void Arm64FPRCache::GetAllocationOrder()
ARM64Reg::Q10,
ARM64Reg::Q11,
ARM64Reg::Q12,
ARM64Reg::Q13,
ARM64Reg::Q14,
ARM64Reg::Q15,
// Caller saved
ARM64Reg::Q16,

View File

@ -25,6 +25,11 @@ constexpr Arm64Gen::ARM64Reg PPC_REG = Arm64Gen::ARM64Reg::X29;
// PC register when calling the dispatcher
constexpr Arm64Gen::ARM64Reg DISPATCHER_PC = Arm64Gen::ARM64Reg::W26;
// FPR constants
constexpr Arm64Gen::ARM64Reg FPR_CONSTANT_0000_0000_0000_0000 = Arm64Gen::ARM64Reg::Q13;
constexpr Arm64Gen::ARM64Reg FPR_CONSTANT_0000_0000_3F80_0000 = Arm64Gen::ARM64Reg::Q14;
constexpr Arm64Gen::ARM64Reg FPR_CONSTANT_FFF8_0000_0000_0000 = Arm64Gen::ARM64Reg::Q15;
#ifdef __GNUC__
#define PPCSTATE_OFF(elem) \
([]() consteval { \

View File

@ -44,6 +44,12 @@ void JitArm64::GenerateAsm()
ABI_PushRegisters(regs_to_save);
m_float_emit.ABI_PushRegisters(regs_to_save_fpr, ARM64Reg::X8);
// Generate FPR constants
m_float_emit.MOVI(8, EncodeRegToDouble(FPR_CONSTANT_0000_0000_0000_0000), 0);
m_float_emit.FMOV(EncodeRegToSingle(FPR_CONSTANT_0000_0000_3F80_0000), 0x70);
MOVI2R(ARM64Reg::X30, 0xFFF8'0000'0000'0000ULL);
m_float_emit.FMOV(EncodeRegToDouble(FPR_CONSTANT_FFF8_0000_0000_0000), ARM64Reg::X30);
MOVP2R(PPC_REG, &m_ppc_state);
// Store the stack pointer, so we can reset it if the BLR optimization fails.