JitArm64: Check GPRs/FPRs to push inside EmitBackpatchRoutine

Preparation for the next commit, which will make EmitBackpatchRoutine
allocate registers on its own. Because the register allocation will
change during the call to EmitBackpatchRoutine, the set of GPRs/FPRs to
push can't be computed prior to the call, so let's compute them during
the call instead.
This commit is contained in:
JosJuice 2024-12-28 20:39:27 +01:00
parent c528a70e64
commit 9ab2751229
6 changed files with 137 additions and 130 deletions

View File

@ -273,11 +273,11 @@ protected:
// !emitting_routine && mode != AlwaysSlowAccess && !jo.fastmem: X30 // !emitting_routine && mode != AlwaysSlowAccess && !jo.fastmem: X30
// !emitting_routine && mode == Auto && jo.fastmem: X30 // !emitting_routine && mode == Auto && jo.fastmem: X30
// //
// Furthermore, any callee-saved register which isn't marked in gprs_to_push/fprs_to_push // If there are any other registers that the caller doesn't mind being overwritten,
// may be clobbered if mode != AlwaysFastAccess. // these can be indicated in scratch_gprs and scratch_fprs.
void EmitBackpatchRoutine(u32 flags, MemAccessMode mode, Arm64Gen::ARM64Reg RS, void EmitBackpatchRoutine(u32 flags, MemAccessMode mode, Arm64Gen::ARM64Reg RS,
Arm64Gen::ARM64Reg addr, BitSet32 gprs_to_push = BitSet32(0), Arm64Gen::ARM64Reg addr, BitSet32 scratch_gprs = BitSet32(0),
BitSet32 fprs_to_push = BitSet32(0), bool emitting_routine = false); BitSet32 scratch_fprs = BitSet32(0), bool emitting_routine = false);
// Loadstore routines // Loadstore routines
void SafeLoadToReg(u32 dest, s32 addr, s32 offsetReg, u32 flags, s32 offset, bool update); void SafeLoadToReg(u32 dest, s32 addr, s32 offsetReg, u32 flags, s32 offset, bool update);

View File

@ -54,7 +54,7 @@ void JitArm64::DoBacktrace(uintptr_t access_address, SContext* ctx)
} }
void JitArm64::EmitBackpatchRoutine(u32 flags, MemAccessMode mode, ARM64Reg RS, ARM64Reg addr, void JitArm64::EmitBackpatchRoutine(u32 flags, MemAccessMode mode, ARM64Reg RS, ARM64Reg addr,
BitSet32 gprs_to_push, BitSet32 fprs_to_push, BitSet32 scratch_gprs, BitSet32 scratch_fprs,
bool emitting_routine) bool emitting_routine)
{ {
const u32 access_size = BackPatchInfo::GetFlagSize(flags); const u32 access_size = BackPatchInfo::GetFlagSize(flags);
@ -65,6 +65,11 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, MemAccessMode mode, ARM64Reg RS,
const bool emit_fast_access = mode != MemAccessMode::AlwaysSlowAccess; const bool emit_fast_access = mode != MemAccessMode::AlwaysSlowAccess;
const bool emit_slow_access = mode != MemAccessMode::AlwaysFastAccess; const bool emit_slow_access = mode != MemAccessMode::AlwaysFastAccess;
const BitSet32 gprs_to_push =
(emitting_routine ? CALLER_SAVED_GPRS : gpr.GetCallerSavedUsed()) & ~scratch_gprs;
const BitSet32 fprs_to_push =
(emitting_routine ? BitSet32(0xFFFFFFFF) : fpr.GetCallerSavedUsed()) & ~scratch_fprs;
bool in_far_code = false; bool in_far_code = false;
const u8* fast_access_start = GetCodePtr(); const u8* fast_access_start = GetCodePtr();
std::optional<FixupBranch> slow_access_fixup; std::optional<FixupBranch> slow_access_fixup;

View File

@ -123,14 +123,14 @@ void JitArm64::SafeLoadToReg(u32 dest, s32 addr, s32 offsetReg, u32 flags, s32 o
MOV(gpr.R(addr), addr_reg); MOV(gpr.R(addr), addr_reg);
} }
BitSet32 regs_in_use = gpr.GetCallerSavedUsed(); BitSet32 scratch_gprs;
BitSet32 fprs_in_use = fpr.GetCallerSavedUsed(); BitSet32 scratch_fprs;
if (!update || early_update) if (!update || early_update)
regs_in_use[DecodeReg(ARM64Reg::W1)] = 0; scratch_gprs[DecodeReg(ARM64Reg::W1)] = true;
if (jo.memcheck || !jo.fastmem) if (jo.memcheck || !jo.fastmem)
regs_in_use[DecodeReg(ARM64Reg::W0)] = 0; scratch_gprs[DecodeReg(ARM64Reg::W0)] = true;
if (!jo.memcheck) if (!jo.memcheck)
regs_in_use[DecodeReg(dest_reg)] = 0; scratch_gprs[DecodeReg(dest_reg)] = true;
u32 access_size = BackPatchInfo::GetFlagSize(flags); u32 access_size = BackPatchInfo::GetFlagSize(flags);
u32 mmio_address = 0; u32 mmio_address = 0;
@ -140,22 +140,23 @@ void JitArm64::SafeLoadToReg(u32 dest, s32 addr, s32 offsetReg, u32 flags, s32 o
if (is_immediate && m_mmu.IsOptimizableRAMAddress(imm_addr, access_size)) if (is_immediate && m_mmu.IsOptimizableRAMAddress(imm_addr, access_size))
{ {
set_addr_reg_if_needed(); set_addr_reg_if_needed();
EmitBackpatchRoutine(flags, MemAccessMode::AlwaysFastAccess, dest_reg, XA, regs_in_use, EmitBackpatchRoutine(flags, MemAccessMode::AlwaysFastAccess, dest_reg, XA, scratch_gprs,
fprs_in_use); scratch_fprs);
} }
else if (mmio_address) else if (mmio_address)
{ {
regs_in_use[DecodeReg(ARM64Reg::W1)] = 0; scratch_gprs[DecodeReg(ARM64Reg::W1)] = true;
regs_in_use[DecodeReg(ARM64Reg::W30)] = 0; scratch_gprs[DecodeReg(ARM64Reg::W30)] = true;
regs_in_use[DecodeReg(dest_reg)] = 0; scratch_gprs[DecodeReg(dest_reg)] = true;
MMIOLoadToReg(m_system, m_system.GetMemory().GetMMIOMapping(), this, &m_float_emit, regs_in_use, MMIOLoadToReg(m_system, m_system.GetMemory().GetMMIOMapping(), this, &m_float_emit,
fprs_in_use, dest_reg, mmio_address, flags); gpr.GetCallerSavedUsed() & ~scratch_gprs,
fpr.GetCallerSavedUsed() & ~scratch_fprs, dest_reg, mmio_address, flags);
addr_reg_set = false; addr_reg_set = false;
} }
else else
{ {
set_addr_reg_if_needed(); set_addr_reg_if_needed();
EmitBackpatchRoutine(flags, MemAccessMode::Auto, dest_reg, XA, regs_in_use, fprs_in_use); EmitBackpatchRoutine(flags, MemAccessMode::Auto, dest_reg, XA, scratch_gprs, scratch_fprs);
} }
gpr.BindToRegister(dest, false, true); gpr.BindToRegister(dest, false, true);
@ -271,13 +272,13 @@ void JitArm64::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s
MOV(gpr.R(dest), addr_reg); MOV(gpr.R(dest), addr_reg);
} }
BitSet32 regs_in_use = gpr.GetCallerSavedUsed(); BitSet32 scratch_gprs;
BitSet32 fprs_in_use = fpr.GetCallerSavedUsed(); BitSet32 scratch_fprs;
regs_in_use[DecodeReg(ARM64Reg::W1)] = 0; scratch_gprs[DecodeReg(ARM64Reg::W1)] = true;
if (!update || early_update) if (!update || early_update)
regs_in_use[DecodeReg(ARM64Reg::W2)] = 0; scratch_gprs[DecodeReg(ARM64Reg::W2)] = true;
if (!jo.fastmem) if (!jo.fastmem)
regs_in_use[DecodeReg(ARM64Reg::W0)] = 0; scratch_gprs[DecodeReg(ARM64Reg::W0)] = true;
u32 access_size = BackPatchInfo::GetFlagSize(flags); u32 access_size = BackPatchInfo::GetFlagSize(flags);
u32 mmio_address = 0; u32 mmio_address = 0;
@ -313,22 +314,24 @@ void JitArm64::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s
else if (is_immediate && m_mmu.IsOptimizableRAMAddress(imm_addr, access_size)) else if (is_immediate && m_mmu.IsOptimizableRAMAddress(imm_addr, access_size))
{ {
set_addr_reg_if_needed(); set_addr_reg_if_needed();
EmitBackpatchRoutine(flags, MemAccessMode::AlwaysFastAccess, RS, XA, regs_in_use, fprs_in_use); EmitBackpatchRoutine(flags, MemAccessMode::AlwaysFastAccess, RS, XA, scratch_gprs,
scratch_fprs);
} }
else if (mmio_address) else if (mmio_address)
{ {
regs_in_use[DecodeReg(ARM64Reg::W1)] = 0; scratch_gprs[DecodeReg(ARM64Reg::W1)] = true;
regs_in_use[DecodeReg(ARM64Reg::W2)] = 0; scratch_gprs[DecodeReg(ARM64Reg::W2)] = true;
regs_in_use[DecodeReg(ARM64Reg::W30)] = 0; scratch_gprs[DecodeReg(ARM64Reg::W30)] = true;
regs_in_use[DecodeReg(RS)] = 0; scratch_gprs[DecodeReg(RS)] = 0;
MMIOWriteRegToAddr(m_system, m_system.GetMemory().GetMMIOMapping(), this, &m_float_emit, MMIOWriteRegToAddr(m_system, m_system.GetMemory().GetMMIOMapping(), this, &m_float_emit,
regs_in_use, fprs_in_use, RS, mmio_address, flags); gpr.GetCallerSavedUsed() & ~scratch_gprs,
fpr.GetCallerSavedUsed() & ~scratch_fprs, RS, mmio_address, flags);
addr_reg_set = false; addr_reg_set = false;
} }
else else
{ {
set_addr_reg_if_needed(); set_addr_reg_if_needed();
EmitBackpatchRoutine(flags, MemAccessMode::Auto, RS, XA, regs_in_use, fprs_in_use); EmitBackpatchRoutine(flags, MemAccessMode::Auto, RS, XA, scratch_gprs, scratch_fprs);
} }
if (update && !early_update) if (update && !early_update)
@ -592,16 +595,16 @@ void JitArm64::lmw(UGeckoInstruction inst)
else if (i != d) else if (i != d)
ADDI2R(addr_reg, addr_base_reg, (i - d) * 4); ADDI2R(addr_reg, addr_base_reg, (i - d) * 4);
BitSet32 regs_in_use = gpr.GetCallerSavedUsed(); BitSet32 scratch_gprs;
BitSet32 fprs_in_use = fpr.GetCallerSavedUsed(); BitSet32 scratch_fprs;
regs_in_use[DecodeReg(addr_reg)] = 0; scratch_gprs[DecodeReg(addr_reg)] = true;
if (jo.memcheck || !jo.fastmem) if (jo.memcheck || !jo.fastmem)
regs_in_use[DecodeReg(ARM64Reg::W0)] = 0; scratch_gprs[DecodeReg(ARM64Reg::W0)] = true;
if (!jo.memcheck) if (!jo.memcheck)
regs_in_use[DecodeReg(dest_reg)] = 0; scratch_gprs[DecodeReg(dest_reg)] = true;
EmitBackpatchRoutine(flags, MemAccessMode::Auto, dest_reg, EncodeRegTo64(addr_reg), regs_in_use, EmitBackpatchRoutine(flags, MemAccessMode::Auto, dest_reg, EncodeRegTo64(addr_reg),
fprs_in_use); scratch_gprs, scratch_fprs);
gpr.BindToRegister(i, false, true); gpr.BindToRegister(i, false, true);
ASSERT(dest_reg == gpr.R(i)); ASSERT(dest_reg == gpr.R(i));
@ -710,15 +713,15 @@ void JitArm64::stmw(UGeckoInstruction inst)
else if (i != s) else if (i != s)
ADDI2R(addr_reg, addr_base_reg, (i - s) * 4); ADDI2R(addr_reg, addr_base_reg, (i - s) * 4);
BitSet32 regs_in_use = gpr.GetCallerSavedUsed(); BitSet32 scratch_gprs;
BitSet32 fprs_in_use = fpr.GetCallerSavedUsed(); BitSet32 scratch_fprs;
regs_in_use[DecodeReg(ARM64Reg::W1)] = 0; scratch_gprs[DecodeReg(ARM64Reg::W1)] = true;
regs_in_use[DecodeReg(addr_reg)] = 0; scratch_gprs[DecodeReg(addr_reg)] = true;
if (!jo.fastmem) if (!jo.fastmem)
regs_in_use[DecodeReg(ARM64Reg::W0)] = 0; scratch_gprs[DecodeReg(ARM64Reg::W0)] = true;
EmitBackpatchRoutine(flags, MemAccessMode::Auto, src_reg, EncodeRegTo64(addr_reg), regs_in_use, EmitBackpatchRoutine(flags, MemAccessMode::Auto, src_reg, EncodeRegTo64(addr_reg), scratch_gprs,
fprs_in_use); scratch_fprs);
// To reduce register pressure and to avoid getting a pipeline-unfriendly long run of stores // To reduce register pressure and to avoid getting a pipeline-unfriendly long run of stores
// after this instruction, flush registers that would be flushed after this instruction anyway. // after this instruction, flush registers that would be flushed after this instruction anyway.
@ -1044,14 +1047,14 @@ void JitArm64::dcbz(UGeckoInstruction inst)
} }
} }
BitSet32 gprs_to_push = gpr.GetCallerSavedUsed(); BitSet32 scratch_gprs;
BitSet32 fprs_to_push = fpr.GetCallerSavedUsed(); BitSet32 scratch_fprs;
gprs_to_push[DecodeReg(ARM64Reg::W1)] = 0; scratch_gprs[DecodeReg(ARM64Reg::W1)] = true;
if (!jo.fastmem) if (!jo.fastmem)
gprs_to_push[DecodeReg(ARM64Reg::W0)] = 0; scratch_gprs[DecodeReg(ARM64Reg::W0)] = true;
EmitBackpatchRoutine(BackPatchInfo::FLAG_ZERO_256, MemAccessMode::Auto, ARM64Reg::W1, EmitBackpatchRoutine(BackPatchInfo::FLAG_ZERO_256, MemAccessMode::Auto, ARM64Reg::W1,
EncodeRegTo64(addr_reg), gprs_to_push, fprs_to_push); EncodeRegTo64(addr_reg), scratch_gprs, scratch_fprs);
if (using_dcbz_hack) if (using_dcbz_hack)
SetJumpTarget(end_dcbz_hack); SetJumpTarget(end_dcbz_hack);

View File

@ -164,23 +164,24 @@ void JitArm64::lfXX(UGeckoInstruction inst)
MOV(gpr.R(a), addr_reg); MOV(gpr.R(a), addr_reg);
} }
BitSet32 regs_in_use = gpr.GetCallerSavedUsed(); BitSet32 scratch_gprs;
BitSet32 fprs_in_use = fpr.GetCallerSavedUsed(); BitSet32 scratch_fprs;
if (!update || early_update) if (!update || early_update)
regs_in_use[DecodeReg(ARM64Reg::W1)] = 0; scratch_gprs[DecodeReg(ARM64Reg::W1)] = true;
if (jo.memcheck || !jo.fastmem) if (jo.memcheck || !jo.fastmem)
regs_in_use[DecodeReg(ARM64Reg::W0)] = 0; scratch_gprs[DecodeReg(ARM64Reg::W0)] = true;
fprs_in_use[DecodeReg(ARM64Reg::Q0)] = 0; scratch_gprs[DecodeReg(ARM64Reg::Q0)] = true;
if (!jo.memcheck) if (!jo.memcheck)
fprs_in_use[DecodeReg(VD)] = 0; scratch_fprs[DecodeReg(VD)] = true;
if (is_immediate && m_mmu.IsOptimizableRAMAddress(imm_addr, BackPatchInfo::GetFlagSize(flags))) if (is_immediate && m_mmu.IsOptimizableRAMAddress(imm_addr, BackPatchInfo::GetFlagSize(flags)))
{ {
EmitBackpatchRoutine(flags, MemAccessMode::AlwaysFastAccess, VD, XA, regs_in_use, fprs_in_use); EmitBackpatchRoutine(flags, MemAccessMode::AlwaysFastAccess, VD, XA, scratch_gprs,
scratch_fprs);
} }
else else
{ {
EmitBackpatchRoutine(flags, MemAccessMode::Auto, VD, XA, regs_in_use, fprs_in_use); EmitBackpatchRoutine(flags, MemAccessMode::Auto, VD, XA, scratch_gprs, scratch_fprs);
} }
const ARM64Reg VD_again = fpr.RW(inst.FD, type, true); const ARM64Reg VD_again = fpr.RW(inst.FD, type, true);
@ -367,14 +368,14 @@ void JitArm64::stfXX(UGeckoInstruction inst)
MOV(gpr.R(a), addr_reg); MOV(gpr.R(a), addr_reg);
} }
BitSet32 regs_in_use = gpr.GetCallerSavedUsed(); BitSet32 scratch_gprs;
BitSet32 fprs_in_use = fpr.GetCallerSavedUsed(); BitSet32 scratch_fprs;
regs_in_use[DecodeReg(ARM64Reg::W1)] = 0; scratch_gprs[DecodeReg(ARM64Reg::W1)] = true;
if (!update || early_update) if (!update || early_update)
regs_in_use[DecodeReg(ARM64Reg::W2)] = 0; scratch_gprs[DecodeReg(ARM64Reg::W2)] = true;
if (!jo.fastmem) if (!jo.fastmem)
regs_in_use[DecodeReg(ARM64Reg::W0)] = 0; scratch_gprs[DecodeReg(ARM64Reg::W0)] = true;
fprs_in_use[DecodeReg(ARM64Reg::Q0)] = 0; scratch_fprs[DecodeReg(ARM64Reg::Q0)] = true;
if (is_immediate) if (is_immediate)
{ {
@ -402,20 +403,20 @@ void JitArm64::stfXX(UGeckoInstruction inst)
else if (m_mmu.IsOptimizableRAMAddress(imm_addr, BackPatchInfo::GetFlagSize(flags))) else if (m_mmu.IsOptimizableRAMAddress(imm_addr, BackPatchInfo::GetFlagSize(flags)))
{ {
set_addr_reg_if_needed(); set_addr_reg_if_needed();
EmitBackpatchRoutine(flags, MemAccessMode::AlwaysFastAccess, V0, XA, regs_in_use, EmitBackpatchRoutine(flags, MemAccessMode::AlwaysFastAccess, V0, XA, scratch_gprs,
fprs_in_use); scratch_fprs);
} }
else else
{ {
set_addr_reg_if_needed(); set_addr_reg_if_needed();
EmitBackpatchRoutine(flags, MemAccessMode::AlwaysSlowAccess, V0, XA, regs_in_use, EmitBackpatchRoutine(flags, MemAccessMode::AlwaysSlowAccess, V0, XA, scratch_gprs,
fprs_in_use); scratch_fprs);
} }
} }
else else
{ {
set_addr_reg_if_needed(); set_addr_reg_if_needed();
EmitBackpatchRoutine(flags, MemAccessMode::Auto, V0, XA, regs_in_use, fprs_in_use); EmitBackpatchRoutine(flags, MemAccessMode::Auto, V0, XA, scratch_gprs, scratch_fprs);
} }
if (update && !early_update) if (update && !early_update)

View File

@ -79,24 +79,23 @@ void JitArm64::psq_lXX(UGeckoInstruction inst)
if (js.assumeNoPairedQuantize) if (js.assumeNoPairedQuantize)
{ {
BitSet32 gprs_in_use = gpr.GetCallerSavedUsed(); BitSet32 scratch_gprs;
BitSet32 fprs_in_use = fpr.GetCallerSavedUsed(); BitSet32 scratch_fprs;
// Wipe the registers we are using as temporaries
if (!update || early_update) if (!update || early_update)
gprs_in_use[DecodeReg(ARM64Reg::W1)] = false; scratch_gprs[DecodeReg(ARM64Reg::W1)] = true;
if (jo.memcheck || !jo.fastmem) if (jo.memcheck || !jo.fastmem)
gprs_in_use[DecodeReg(ARM64Reg::W0)] = false; scratch_gprs[DecodeReg(ARM64Reg::W0)] = true;
fprs_in_use[DecodeReg(ARM64Reg::Q0)] = false; scratch_fprs[DecodeReg(ARM64Reg::Q0)] = true;
if (!jo.memcheck) if (!jo.memcheck)
fprs_in_use[DecodeReg(VS)] = 0; scratch_fprs[DecodeReg(VS)] = true;
u32 flags = BackPatchInfo::FLAG_LOAD | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_32; u32 flags = BackPatchInfo::FLAG_LOAD | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_32;
if (!w) if (!w)
flags |= BackPatchInfo::FLAG_PAIR; flags |= BackPatchInfo::FLAG_PAIR;
EmitBackpatchRoutine(flags, MemAccessMode::Auto, VS, EncodeRegTo64(addr_reg), gprs_in_use, EmitBackpatchRoutine(flags, MemAccessMode::Auto, VS, EncodeRegTo64(addr_reg), scratch_gprs,
fprs_in_use); scratch_fprs);
} }
else else
{ {
@ -239,22 +238,21 @@ void JitArm64::psq_stXX(UGeckoInstruction inst)
if (js.assumeNoPairedQuantize) if (js.assumeNoPairedQuantize)
{ {
BitSet32 gprs_in_use = gpr.GetCallerSavedUsed(); BitSet32 scratch_gprs;
BitSet32 fprs_in_use = fpr.GetCallerSavedUsed(); BitSet32 scratch_fprs;
// Wipe the registers we are using as temporaries scratch_gprs[DecodeReg(ARM64Reg::W1)] = true;
gprs_in_use[DecodeReg(ARM64Reg::W1)] = false;
if (!update || early_update) if (!update || early_update)
gprs_in_use[DecodeReg(ARM64Reg::W2)] = false; scratch_gprs[DecodeReg(ARM64Reg::W2)] = true;
if (!jo.fastmem) if (!jo.fastmem)
gprs_in_use[DecodeReg(ARM64Reg::W0)] = false; scratch_gprs[DecodeReg(ARM64Reg::W0)] = true;
u32 flags = BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_32; u32 flags = BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_32;
if (!w) if (!w)
flags |= BackPatchInfo::FLAG_PAIR; flags |= BackPatchInfo::FLAG_PAIR;
EmitBackpatchRoutine(flags, MemAccessMode::Auto, VS, EncodeRegTo64(addr_reg), gprs_in_use, EmitBackpatchRoutine(flags, MemAccessMode::Auto, VS, EncodeRegTo64(addr_reg), scratch_gprs,
fprs_in_use); scratch_fprs);
} }
else else
{ {

View File

@ -526,10 +526,10 @@ void JitArm64::GenerateQuantizedLoads()
ARM64Reg temp_reg = ARM64Reg::X0; ARM64Reg temp_reg = ARM64Reg::X0;
ARM64Reg addr_reg = ARM64Reg::X1; ARM64Reg addr_reg = ARM64Reg::X1;
ARM64Reg scale_reg = ARM64Reg::X2; ARM64Reg scale_reg = ARM64Reg::X2;
BitSet32 gprs_to_push = CALLER_SAVED_GPRS & ~BitSet32{0, 3}; BitSet32 scratch_gprs{0, 3};
if (!jo.memcheck) if (!jo.memcheck)
gprs_to_push &= ~BitSet32{1}; scratch_gprs[1] = true;
BitSet32 fprs_to_push = BitSet32(0xFFFFFFFF) & ~BitSet32{0, 1}; BitSet32 scratch_fprs{0, 1};
ARM64FloatEmitter float_emit(this); ARM64FloatEmitter float_emit(this);
const u8* start = GetCodePtr(); const u8* start = GetCodePtr();
@ -541,7 +541,7 @@ void JitArm64::GenerateQuantizedLoads()
BackPatchInfo::FLAG_PAIR | BackPatchInfo::FLAG_SIZE_32; BackPatchInfo::FLAG_PAIR | BackPatchInfo::FLAG_SIZE_32;
EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg,
gprs_to_push & ~BitSet32{DecodeReg(scale_reg)}, fprs_to_push, true); scratch_gprs | BitSet32{DecodeReg(scale_reg)}, scratch_fprs, true);
RET(ARM64Reg::X30); RET(ARM64Reg::X30);
} }
@ -550,8 +550,8 @@ void JitArm64::GenerateQuantizedLoads()
constexpr u32 flags = BackPatchInfo::FLAG_LOAD | BackPatchInfo::FLAG_FLOAT | constexpr u32 flags = BackPatchInfo::FLAG_LOAD | BackPatchInfo::FLAG_FLOAT |
BackPatchInfo::FLAG_PAIR | BackPatchInfo::FLAG_SIZE_8; BackPatchInfo::FLAG_PAIR | BackPatchInfo::FLAG_SIZE_8;
EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push, EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, scratch_gprs,
fprs_to_push, true); scratch_fprs, true);
float_emit.UXTL(8, ARM64Reg::D0, ARM64Reg::D0); float_emit.UXTL(8, ARM64Reg::D0, ARM64Reg::D0);
float_emit.UXTL(16, ARM64Reg::D0, ARM64Reg::D0); float_emit.UXTL(16, ARM64Reg::D0, ARM64Reg::D0);
@ -568,8 +568,8 @@ void JitArm64::GenerateQuantizedLoads()
constexpr u32 flags = BackPatchInfo::FLAG_LOAD | BackPatchInfo::FLAG_FLOAT | constexpr u32 flags = BackPatchInfo::FLAG_LOAD | BackPatchInfo::FLAG_FLOAT |
BackPatchInfo::FLAG_PAIR | BackPatchInfo::FLAG_SIZE_8; BackPatchInfo::FLAG_PAIR | BackPatchInfo::FLAG_SIZE_8;
EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push, EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, scratch_gprs,
fprs_to_push, true); scratch_fprs, true);
float_emit.SXTL(8, ARM64Reg::D0, ARM64Reg::D0); float_emit.SXTL(8, ARM64Reg::D0, ARM64Reg::D0);
float_emit.SXTL(16, ARM64Reg::D0, ARM64Reg::D0); float_emit.SXTL(16, ARM64Reg::D0, ARM64Reg::D0);
@ -586,8 +586,8 @@ void JitArm64::GenerateQuantizedLoads()
constexpr u32 flags = BackPatchInfo::FLAG_LOAD | BackPatchInfo::FLAG_FLOAT | constexpr u32 flags = BackPatchInfo::FLAG_LOAD | BackPatchInfo::FLAG_FLOAT |
BackPatchInfo::FLAG_PAIR | BackPatchInfo::FLAG_SIZE_16; BackPatchInfo::FLAG_PAIR | BackPatchInfo::FLAG_SIZE_16;
EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push, EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, scratch_gprs,
fprs_to_push, true); scratch_fprs, true);
float_emit.UXTL(16, ARM64Reg::D0, ARM64Reg::D0); float_emit.UXTL(16, ARM64Reg::D0, ARM64Reg::D0);
float_emit.UCVTF(32, ARM64Reg::D0, ARM64Reg::D0); float_emit.UCVTF(32, ARM64Reg::D0, ARM64Reg::D0);
@ -603,8 +603,8 @@ void JitArm64::GenerateQuantizedLoads()
constexpr u32 flags = BackPatchInfo::FLAG_LOAD | BackPatchInfo::FLAG_FLOAT | constexpr u32 flags = BackPatchInfo::FLAG_LOAD | BackPatchInfo::FLAG_FLOAT |
BackPatchInfo::FLAG_PAIR | BackPatchInfo::FLAG_SIZE_16; BackPatchInfo::FLAG_PAIR | BackPatchInfo::FLAG_SIZE_16;
EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push, EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, scratch_gprs,
fprs_to_push, true); scratch_fprs, true);
float_emit.SXTL(16, ARM64Reg::D0, ARM64Reg::D0); float_emit.SXTL(16, ARM64Reg::D0, ARM64Reg::D0);
float_emit.SCVTF(32, ARM64Reg::D0, ARM64Reg::D0); float_emit.SCVTF(32, ARM64Reg::D0, ARM64Reg::D0);
@ -622,7 +622,7 @@ void JitArm64::GenerateQuantizedLoads()
BackPatchInfo::FLAG_LOAD | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_32; BackPatchInfo::FLAG_LOAD | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_32;
EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg,
gprs_to_push & ~BitSet32{DecodeReg(scale_reg)}, fprs_to_push, true); scratch_gprs | BitSet32{DecodeReg(scale_reg)}, scratch_fprs, true);
RET(ARM64Reg::X30); RET(ARM64Reg::X30);
} }
@ -631,8 +631,8 @@ void JitArm64::GenerateQuantizedLoads()
constexpr u32 flags = constexpr u32 flags =
BackPatchInfo::FLAG_LOAD | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_8; BackPatchInfo::FLAG_LOAD | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_8;
EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push, EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, scratch_gprs,
fprs_to_push, true); scratch_fprs, true);
float_emit.UXTL(8, ARM64Reg::D0, ARM64Reg::D0); float_emit.UXTL(8, ARM64Reg::D0, ARM64Reg::D0);
float_emit.UXTL(16, ARM64Reg::D0, ARM64Reg::D0); float_emit.UXTL(16, ARM64Reg::D0, ARM64Reg::D0);
@ -649,8 +649,8 @@ void JitArm64::GenerateQuantizedLoads()
constexpr u32 flags = constexpr u32 flags =
BackPatchInfo::FLAG_LOAD | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_8; BackPatchInfo::FLAG_LOAD | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_8;
EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push, EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, scratch_gprs,
fprs_to_push, true); scratch_fprs, true);
float_emit.SXTL(8, ARM64Reg::D0, ARM64Reg::D0); float_emit.SXTL(8, ARM64Reg::D0, ARM64Reg::D0);
float_emit.SXTL(16, ARM64Reg::D0, ARM64Reg::D0); float_emit.SXTL(16, ARM64Reg::D0, ARM64Reg::D0);
@ -667,8 +667,8 @@ void JitArm64::GenerateQuantizedLoads()
constexpr u32 flags = constexpr u32 flags =
BackPatchInfo::FLAG_LOAD | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_16; BackPatchInfo::FLAG_LOAD | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_16;
EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push, EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, scratch_gprs,
fprs_to_push, true); scratch_fprs, true);
float_emit.UXTL(16, ARM64Reg::D0, ARM64Reg::D0); float_emit.UXTL(16, ARM64Reg::D0, ARM64Reg::D0);
float_emit.UCVTF(32, ARM64Reg::D0, ARM64Reg::D0); float_emit.UCVTF(32, ARM64Reg::D0, ARM64Reg::D0);
@ -684,8 +684,8 @@ void JitArm64::GenerateQuantizedLoads()
constexpr u32 flags = constexpr u32 flags =
BackPatchInfo::FLAG_LOAD | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_16; BackPatchInfo::FLAG_LOAD | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_16;
EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push, EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, scratch_gprs,
fprs_to_push, true); scratch_fprs, true);
float_emit.SXTL(16, ARM64Reg::D0, ARM64Reg::D0); float_emit.SXTL(16, ARM64Reg::D0, ARM64Reg::D0);
float_emit.SCVTF(32, ARM64Reg::D0, ARM64Reg::D0); float_emit.SCVTF(32, ARM64Reg::D0, ARM64Reg::D0);
@ -736,12 +736,12 @@ void JitArm64::GenerateQuantizedStores()
ARM64Reg temp_reg = ARM64Reg::X0; ARM64Reg temp_reg = ARM64Reg::X0;
ARM64Reg scale_reg = ARM64Reg::X1; ARM64Reg scale_reg = ARM64Reg::X1;
ARM64Reg addr_reg = ARM64Reg::X2; ARM64Reg addr_reg = ARM64Reg::X2;
BitSet32 gprs_to_push = CALLER_SAVED_GPRS & ~BitSet32{0, 1}; BitSet32 scratch_gprs{0, 1};
if (!jo.memcheck) if (!jo.memcheck)
gprs_to_push &= ~BitSet32{2}; scratch_gprs[2] = true;
if (!jo.fastmem) if (!jo.fastmem)
gprs_to_push &= ~BitSet32{3}; scratch_gprs[3] = true;
BitSet32 fprs_to_push = BitSet32(0xFFFFFFFF) & ~BitSet32{0, 1}; BitSet32 scratch_fprs{0, 1};
ARM64FloatEmitter float_emit(this); ARM64FloatEmitter float_emit(this);
const u8* start = GetCodePtr(); const u8* start = GetCodePtr();
@ -752,8 +752,8 @@ void JitArm64::GenerateQuantizedStores()
constexpr u32 flags = BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT | constexpr u32 flags = BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT |
BackPatchInfo::FLAG_PAIR | BackPatchInfo::FLAG_SIZE_32; BackPatchInfo::FLAG_PAIR | BackPatchInfo::FLAG_SIZE_32;
EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push, EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, scratch_gprs,
fprs_to_push, true); scratch_fprs, true);
RET(ARM64Reg::X30); RET(ARM64Reg::X30);
} }
@ -771,8 +771,8 @@ void JitArm64::GenerateQuantizedStores()
constexpr u32 flags = BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT | constexpr u32 flags = BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT |
BackPatchInfo::FLAG_PAIR | BackPatchInfo::FLAG_SIZE_8; BackPatchInfo::FLAG_PAIR | BackPatchInfo::FLAG_SIZE_8;
EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push, EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, scratch_gprs,
fprs_to_push, true); scratch_fprs, true);
RET(ARM64Reg::X30); RET(ARM64Reg::X30);
} }
@ -790,8 +790,8 @@ void JitArm64::GenerateQuantizedStores()
constexpr u32 flags = BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT | constexpr u32 flags = BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT |
BackPatchInfo::FLAG_PAIR | BackPatchInfo::FLAG_SIZE_8; BackPatchInfo::FLAG_PAIR | BackPatchInfo::FLAG_SIZE_8;
EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push, EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, scratch_gprs,
fprs_to_push, true); scratch_fprs, true);
RET(ARM64Reg::X30); RET(ARM64Reg::X30);
} }
@ -808,8 +808,8 @@ void JitArm64::GenerateQuantizedStores()
constexpr u32 flags = BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT | constexpr u32 flags = BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT |
BackPatchInfo::FLAG_PAIR | BackPatchInfo::FLAG_SIZE_16; BackPatchInfo::FLAG_PAIR | BackPatchInfo::FLAG_SIZE_16;
EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push, EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, scratch_gprs,
fprs_to_push, true); scratch_fprs, true);
RET(ARM64Reg::X30); RET(ARM64Reg::X30);
} }
@ -826,8 +826,8 @@ void JitArm64::GenerateQuantizedStores()
constexpr u32 flags = BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT | constexpr u32 flags = BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT |
BackPatchInfo::FLAG_PAIR | BackPatchInfo::FLAG_SIZE_16; BackPatchInfo::FLAG_PAIR | BackPatchInfo::FLAG_SIZE_16;
EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push, EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, scratch_gprs,
fprs_to_push, true); scratch_fprs, true);
RET(ARM64Reg::X30); RET(ARM64Reg::X30);
} }
@ -837,8 +837,8 @@ void JitArm64::GenerateQuantizedStores()
constexpr u32 flags = constexpr u32 flags =
BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_32; BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_32;
EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push, EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, scratch_gprs,
fprs_to_push, true); scratch_fprs, true);
RET(ARM64Reg::X30); RET(ARM64Reg::X30);
} }
@ -856,8 +856,8 @@ void JitArm64::GenerateQuantizedStores()
constexpr u32 flags = constexpr u32 flags =
BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_8; BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_8;
EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push, EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, scratch_gprs,
fprs_to_push, true); scratch_fprs, true);
RET(ARM64Reg::X30); RET(ARM64Reg::X30);
} }
@ -875,8 +875,8 @@ void JitArm64::GenerateQuantizedStores()
constexpr u32 flags = constexpr u32 flags =
BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_8; BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_8;
EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push, EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, scratch_gprs,
fprs_to_push, true); scratch_fprs, true);
RET(ARM64Reg::X30); RET(ARM64Reg::X30);
} }
@ -893,8 +893,8 @@ void JitArm64::GenerateQuantizedStores()
constexpr u32 flags = constexpr u32 flags =
BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_16; BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_16;
EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push, EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, scratch_gprs,
fprs_to_push, true); scratch_fprs, true);
RET(ARM64Reg::X30); RET(ARM64Reg::X30);
} }
@ -911,8 +911,8 @@ void JitArm64::GenerateQuantizedStores()
constexpr u32 flags = constexpr u32 flags =
BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_16; BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_16;
EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push, EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, scratch_gprs,
fprs_to_push, true); scratch_fprs, true);
RET(ARM64Reg::X30); RET(ARM64Reg::X30);
} }