mirror of
https://github.com/dolphin-emu/dolphin.git
synced 2025-01-25 15:31:17 +01:00
[AArch64] Optimize lfd instructions if possible.
If we are going to be using lfd, then chances are it is going to be used in double heavy areas of code. If we only need to load the lower register, then we should also not worry about having to insert in to the low 64bits of the guest register. So add a new flag to the backpatching to handle lfd to directly to the destination register. This gives ~3% performance improvement to Povray.
This commit is contained in:
parent
6cb87a9227
commit
df53b37253
@ -71,9 +71,22 @@ bool JitArm64::DisasmLoadStore(const u8* ptr, u32* flags, ARM64Reg* reg)
|
||||
}
|
||||
else // 64-bit float
|
||||
{
|
||||
// Real register is in the INS instruction
|
||||
u32 ins_inst = *(u32*)(ptr + 8);
|
||||
*reg = (ARM64Reg)(ins_inst & 0x1F);
|
||||
u32 ldr_reg = inst & 0x1F;
|
||||
|
||||
if (ldr_reg)
|
||||
{
|
||||
// Loads directly in to the target register
|
||||
// No need to dump the flag in to flags here
|
||||
// The slowmem path always first returns in Q0
|
||||
// then moves to the destination register
|
||||
*reg = (ARM64Reg)(ldr_reg);
|
||||
}
|
||||
else
|
||||
{
|
||||
// Real register is in the INS instruction
|
||||
u32 ins_inst = *(u32*)(ptr + 8);
|
||||
*reg = (ARM64Reg)(ins_inst & 0x1F);
|
||||
}
|
||||
}
|
||||
*flags |= BackPatchInfo::FLAG_LOAD;
|
||||
return true;
|
||||
@ -165,9 +178,17 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, bool fastmem, bool do_farcode,
|
||||
}
|
||||
else
|
||||
{
|
||||
m_float_emit.LDR(64, INDEX_UNSIGNED, Q0, addr, 0);
|
||||
m_float_emit.REV64(8, D0, D0);
|
||||
m_float_emit.INS(64, RS, 0, Q0, 0);
|
||||
if (flags & BackPatchInfo::FLAG_ONLY_LOWER)
|
||||
{
|
||||
m_float_emit.LDR(64, INDEX_UNSIGNED, EncodeRegToDouble(RS), addr, 0);
|
||||
m_float_emit.REV64(8, EncodeRegToDouble(RS), EncodeRegToDouble(RS));
|
||||
}
|
||||
else
|
||||
{
|
||||
m_float_emit.LDR(64, INDEX_UNSIGNED, Q0, addr, 0);
|
||||
m_float_emit.REV64(8, D0, D0);
|
||||
m_float_emit.INS(64, RS, 0, Q0, 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (flags & BackPatchInfo::FLAG_STORE)
|
||||
@ -217,7 +238,7 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, bool fastmem, bool do_farcode,
|
||||
handler.addr_reg = addr;
|
||||
handler.gprs = gprs_to_push;
|
||||
handler.fprs = fprs_to_push;
|
||||
handler.flags = flags;
|
||||
handler.flags = flags & ~BackPatchInfo::FLAG_ONLY_LOWER;
|
||||
|
||||
FastmemArea* fastmem_area = &m_fault_to_handler[fastmem_start];
|
||||
auto handler_loc_iter = m_handler_to_loc.find(handler);
|
||||
|
@ -71,12 +71,19 @@ void JitArm64::lfXX(UGeckoInstruction inst)
|
||||
u32 imm_addr = 0;
|
||||
bool is_immediate = false;
|
||||
|
||||
// 64 bit loads only load PSR0
|
||||
fpr.BindToRegister(inst.FD, flags & BackPatchInfo::FLAG_SIZE_F64, flags & BackPatchInfo::FLAG_SIZE_F64);
|
||||
bool only_lower = !!(flags & BackPatchInfo::FLAG_SIZE_F64);
|
||||
|
||||
ARM64Reg VD = fpr.R(inst.FD, flags & BackPatchInfo::FLAG_SIZE_F64);
|
||||
fpr.BindToRegister(inst.FD, false, only_lower);
|
||||
|
||||
ARM64Reg VD = fpr.R(inst.FD, only_lower);
|
||||
ARM64Reg addr_reg = W0;
|
||||
|
||||
if (!fpr.IsLower(inst.FD))
|
||||
only_lower = false;
|
||||
|
||||
if (only_lower)
|
||||
flags |= BackPatchInfo::FLAG_ONLY_LOWER;
|
||||
|
||||
gpr.Lock(W0, W30);
|
||||
fpr.Lock(Q0);
|
||||
|
||||
|
@ -18,6 +18,7 @@ struct BackPatchInfo
|
||||
FLAG_SIZE_F64 = (1 << 6),
|
||||
FLAG_REVERSE = (1 << 7),
|
||||
FLAG_EXTEND = (1 << 8),
|
||||
FLAG_ONLY_LOWER = (1 << 9),
|
||||
};
|
||||
|
||||
static u32 GetFlagSize(u32 flags)
|
||||
|
Loading…
x
Reference in New Issue
Block a user