JitArm64: Remove now unnecessary locking of temp registers

This commit is contained in:
JosJuice 2024-12-29 17:07:00 +01:00
parent 527ad0b99b
commit 53770f4abe
3 changed files with 36 additions and 78 deletions

View File

@ -31,7 +31,7 @@ void JitArm64::SafeLoadToReg(u32 dest, s32 addr, s32 offsetReg, u32 flags, s32 o
{
// We want to make sure to not get LR as a temp register
gpr.Lock(ARM64Reg::W1, ARM64Reg::W30);
if (jo.memcheck || !jo.fastmem)
if (jo.memcheck)
gpr.Lock(ARM64Reg::W0);
gpr.BindToRegister(dest, dest == (u32)addr || dest == (u32)offsetReg, false);
@ -127,7 +127,7 @@ void JitArm64::SafeLoadToReg(u32 dest, s32 addr, s32 offsetReg, u32 flags, s32 o
BitSet32 scratch_fprs;
if (!update || early_update)
scratch_gprs[DecodeReg(ARM64Reg::W1)] = true;
if (jo.memcheck || !jo.fastmem)
if (jo.memcheck)
scratch_gprs[DecodeReg(ARM64Reg::W0)] = true;
if (!jo.memcheck)
scratch_gprs[DecodeReg(dest_reg)] = true;
@ -170,7 +170,7 @@ void JitArm64::SafeLoadToReg(u32 dest, s32 addr, s32 offsetReg, u32 flags, s32 o
}
gpr.Unlock(ARM64Reg::W1, ARM64Reg::W30);
if (jo.memcheck || !jo.fastmem)
if (jo.memcheck)
gpr.Unlock(ARM64Reg::W0);
}
@ -178,9 +178,7 @@ void JitArm64::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s
bool update)
{
// We want to make sure to not get LR as a temp register
gpr.Lock(ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W30);
if (!jo.fastmem)
gpr.Lock(ARM64Reg::W0);
gpr.Lock(ARM64Reg::W2, ARM64Reg::W30);
// Don't materialize zero.
ARM64Reg RS = gpr.IsImm(value, 0) ? ARM64Reg::WZR : gpr.R(value);
@ -274,11 +272,8 @@ void JitArm64::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s
BitSet32 scratch_gprs;
BitSet32 scratch_fprs;
scratch_gprs[DecodeReg(ARM64Reg::W1)] = true;
if (!update || early_update)
scratch_gprs[DecodeReg(ARM64Reg::W2)] = true;
if (!jo.fastmem)
scratch_gprs[DecodeReg(ARM64Reg::W0)] = true;
u32 access_size = BackPatchInfo::GetFlagSize(flags);
u32 mmio_address = 0;
@ -319,7 +314,6 @@ void JitArm64::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s
}
else if (mmio_address)
{
scratch_gprs[DecodeReg(ARM64Reg::W1)] = true;
scratch_gprs[DecodeReg(ARM64Reg::W2)] = true;
scratch_gprs[DecodeReg(ARM64Reg::W30)] = true;
scratch_gprs[DecodeReg(RS)] = 0;
@ -341,9 +335,7 @@ void JitArm64::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s
MOV(gpr.R(dest), addr_reg);
}
gpr.Unlock(ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W30);
if (!jo.fastmem)
gpr.Unlock(ARM64Reg::W0);
gpr.Unlock(ARM64Reg::W2, ARM64Reg::W30);
}
FixupBranch JitArm64::BATAddressLookup(ARM64Reg addr_out, ARM64Reg addr_in, ARM64Reg tmp,
@ -526,7 +518,7 @@ void JitArm64::lmw(UGeckoInstruction inst)
s32 offset = inst.SIMM_16;
gpr.Lock(ARM64Reg::W1, ARM64Reg::W30);
if (jo.memcheck || !jo.fastmem)
if (jo.memcheck)
gpr.Lock(ARM64Reg::W0);
// MMU games make use of a >= d despite this being invalid according to the PEM.
@ -598,7 +590,7 @@ void JitArm64::lmw(UGeckoInstruction inst)
BitSet32 scratch_gprs;
BitSet32 scratch_fprs;
scratch_gprs[DecodeReg(addr_reg)] = true;
if (jo.memcheck || !jo.fastmem)
if (jo.memcheck)
scratch_gprs[DecodeReg(ARM64Reg::W0)] = true;
if (!jo.memcheck)
scratch_gprs[DecodeReg(dest_reg)] = true;
@ -633,7 +625,7 @@ void JitArm64::lmw(UGeckoInstruction inst)
}
gpr.Unlock(ARM64Reg::W1, ARM64Reg::W30);
if (jo.memcheck || !jo.fastmem)
if (jo.memcheck)
gpr.Unlock(ARM64Reg::W0);
}
@ -645,9 +637,7 @@ void JitArm64::stmw(UGeckoInstruction inst)
u32 a = inst.RA, s = inst.RS;
s32 offset = inst.SIMM_16;
gpr.Lock(ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W30);
if (!jo.fastmem)
gpr.Lock(ARM64Reg::W0);
gpr.Lock(ARM64Reg::W2, ARM64Reg::W30);
ARM64Reg addr_reg = ARM64Reg::W2;
bool a_is_addr_base_reg = false;
@ -715,10 +705,7 @@ void JitArm64::stmw(UGeckoInstruction inst)
BitSet32 scratch_gprs;
BitSet32 scratch_fprs;
scratch_gprs[DecodeReg(ARM64Reg::W1)] = true;
scratch_gprs[DecodeReg(addr_reg)] = true;
if (!jo.fastmem)
scratch_gprs[DecodeReg(ARM64Reg::W0)] = true;
EmitBackpatchRoutine(flags, MemAccessMode::Auto, src_reg, EncodeRegTo64(addr_reg), scratch_gprs,
scratch_fprs);
@ -753,9 +740,7 @@ void JitArm64::stmw(UGeckoInstruction inst)
}
}
gpr.Unlock(ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W30);
if (!jo.fastmem)
gpr.Unlock(ARM64Reg::W0);
gpr.Unlock(ARM64Reg::W2, ARM64Reg::W30);
}
void JitArm64::dcbx(UGeckoInstruction inst)
@ -976,14 +961,8 @@ void JitArm64::dcbz(UGeckoInstruction inst)
int a = inst.RA, b = inst.RB;
gpr.Lock(ARM64Reg::W1, ARM64Reg::W30);
if (!jo.fastmem)
gpr.Lock(ARM64Reg::W0);
Common::ScopeGuard register_guard([&] {
gpr.Unlock(ARM64Reg::W1, ARM64Reg::W30);
if (!jo.fastmem)
gpr.Unlock(ARM64Reg::W0);
});
Common::ScopeGuard register_guard([&] { gpr.Unlock(ARM64Reg::W1, ARM64Reg::W30); });
constexpr ARM64Reg addr_reg = ARM64Reg::W1;
constexpr ARM64Reg temp_reg = ARM64Reg::W30;
@ -1050,8 +1029,6 @@ void JitArm64::dcbz(UGeckoInstruction inst)
BitSet32 scratch_gprs;
BitSet32 scratch_fprs;
scratch_gprs[DecodeReg(ARM64Reg::W1)] = true;
if (!jo.fastmem)
scratch_gprs[DecodeReg(ARM64Reg::W0)] = true;
EmitBackpatchRoutine(BackPatchInfo::FLAG_ZERO_256, MemAccessMode::Auto, ARM64Reg::W1,
EncodeRegTo64(addr_reg), scratch_gprs, scratch_fprs);

View File

@ -78,8 +78,7 @@ void JitArm64::lfXX(UGeckoInstruction inst)
(flags & BackPatchInfo::FLAG_SIZE_64) != 0 ? RegType::LowerPair : RegType::DuplicatedSingle;
gpr.Lock(ARM64Reg::W1, ARM64Reg::W30);
fpr.Lock(ARM64Reg::Q0);
if (jo.memcheck || !jo.fastmem)
if (jo.memcheck)
gpr.Lock(ARM64Reg::W0);
const ARM64Reg VD = fpr.RW(inst.FD, type, false);
@ -168,9 +167,8 @@ void JitArm64::lfXX(UGeckoInstruction inst)
BitSet32 scratch_fprs;
if (!update || early_update)
scratch_gprs[DecodeReg(ARM64Reg::W1)] = true;
if (jo.memcheck || !jo.fastmem)
if (jo.memcheck)
scratch_gprs[DecodeReg(ARM64Reg::W0)] = true;
scratch_gprs[DecodeReg(ARM64Reg::Q0)] = true;
if (!jo.memcheck)
scratch_fprs[DecodeReg(VD)] = true;
@ -194,8 +192,7 @@ void JitArm64::lfXX(UGeckoInstruction inst)
}
gpr.Unlock(ARM64Reg::W1, ARM64Reg::W30);
fpr.Unlock(ARM64Reg::Q0);
if (jo.memcheck || !jo.fastmem)
if (jo.memcheck)
gpr.Unlock(ARM64Reg::W0);
}
@ -265,8 +262,6 @@ void JitArm64::stfXX(UGeckoInstruction inst)
u32 imm_addr = 0;
bool is_immediate = false;
fpr.Lock(ARM64Reg::Q0);
const bool have_single = fpr.IsSingle(inst.FS, true);
Arm64FPRCache::ScopedARM64Reg V0 =
@ -279,9 +274,7 @@ void JitArm64::stfXX(UGeckoInstruction inst)
V0 = std::move(single_reg);
}
gpr.Lock(ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W30);
if (!jo.fastmem)
gpr.Lock(ARM64Reg::W0);
gpr.Lock(ARM64Reg::W2, ARM64Reg::W30);
ARM64Reg addr_reg = ARM64Reg::W2;
@ -370,12 +363,8 @@ void JitArm64::stfXX(UGeckoInstruction inst)
BitSet32 scratch_gprs;
BitSet32 scratch_fprs;
scratch_gprs[DecodeReg(ARM64Reg::W1)] = true;
if (!update || early_update)
scratch_gprs[DecodeReg(ARM64Reg::W2)] = true;
if (!jo.fastmem)
scratch_gprs[DecodeReg(ARM64Reg::W0)] = true;
scratch_fprs[DecodeReg(ARM64Reg::Q0)] = true;
if (is_immediate)
{
@ -426,8 +415,5 @@ void JitArm64::stfXX(UGeckoInstruction inst)
MOV(gpr.R(a), addr_reg);
}
gpr.Unlock(ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W30);
fpr.Unlock(ARM64Reg::Q0);
if (!jo.fastmem)
gpr.Unlock(ARM64Reg::W0);
gpr.Unlock(ARM64Reg::W2, ARM64Reg::W30);
}

View File

@ -39,13 +39,12 @@ void JitArm64::psq_lXX(UGeckoInstruction inst)
const int w = indexed ? inst.Wx : inst.W;
gpr.Lock(ARM64Reg::W1, ARM64Reg::W30);
fpr.Lock(ARM64Reg::Q0);
if (!js.assumeNoPairedQuantize)
{
gpr.Lock(ARM64Reg::W0, ARM64Reg::W2, ARM64Reg::W3);
fpr.Lock(ARM64Reg::Q1);
fpr.Lock(ARM64Reg::Q0, ARM64Reg::Q1);
}
else if (jo.memcheck || !jo.fastmem)
else if (jo.memcheck)
{
gpr.Lock(ARM64Reg::W0);
}
@ -84,9 +83,8 @@ void JitArm64::psq_lXX(UGeckoInstruction inst)
if (!update || early_update)
scratch_gprs[DecodeReg(ARM64Reg::W1)] = true;
if (jo.memcheck || !jo.fastmem)
if (jo.memcheck)
scratch_gprs[DecodeReg(ARM64Reg::W0)] = true;
scratch_fprs[DecodeReg(ARM64Reg::Q0)] = true;
if (!jo.memcheck)
scratch_fprs[DecodeReg(VS)] = true;
@ -133,13 +131,12 @@ void JitArm64::psq_lXX(UGeckoInstruction inst)
}
gpr.Unlock(ARM64Reg::W1, ARM64Reg::W30);
fpr.Unlock(ARM64Reg::Q0);
if (!js.assumeNoPairedQuantize)
{
gpr.Unlock(ARM64Reg::W0, ARM64Reg::W2, ARM64Reg::W3);
fpr.Unlock(ARM64Reg::Q1);
fpr.Unlock(ARM64Reg::Q0, ARM64Reg::Q1);
}
else if (jo.memcheck || !jo.fastmem)
else if (jo.memcheck)
{
gpr.Unlock(ARM64Reg::W0);
}
@ -166,9 +163,8 @@ void JitArm64::psq_stXX(UGeckoInstruction inst)
const int i = indexed ? inst.Ix : inst.I;
const int w = indexed ? inst.Wx : inst.W;
fpr.Lock(ARM64Reg::Q0);
if (!js.assumeNoPairedQuantize)
fpr.Lock(ARM64Reg::Q1);
fpr.Lock(ARM64Reg::Q0, ARM64Reg::Q1);
const bool have_single = fpr.IsSingle(inst.RS);
@ -204,11 +200,13 @@ void JitArm64::psq_stXX(UGeckoInstruction inst)
}
}
gpr.Lock(ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W30);
if (!js.assumeNoPairedQuantize || !jo.fastmem)
gpr.Lock(ARM64Reg::W0);
if (!js.assumeNoPairedQuantize && !jo.fastmem)
gpr.Lock(ARM64Reg::W3);
gpr.Lock(ARM64Reg::W2, ARM64Reg::W30);
if (!js.assumeNoPairedQuantize)
{
gpr.Lock(ARM64Reg::W0, ARM64Reg::W1);
if (!jo.fastmem)
gpr.Lock(ARM64Reg::W3);
}
constexpr ARM64Reg type_reg = ARM64Reg::W0;
constexpr ARM64Reg scale_reg = ARM64Reg::W1;
@ -241,11 +239,8 @@ void JitArm64::psq_stXX(UGeckoInstruction inst)
BitSet32 scratch_gprs;
BitSet32 scratch_fprs;
scratch_gprs[DecodeReg(ARM64Reg::W1)] = true;
if (!update || early_update)
scratch_gprs[DecodeReg(ARM64Reg::W2)] = true;
if (!jo.fastmem)
scratch_gprs[DecodeReg(ARM64Reg::W0)] = true;
u32 flags = BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_32;
if (!w)
@ -278,12 +273,12 @@ void JitArm64::psq_stXX(UGeckoInstruction inst)
MOV(gpr.R(inst.RA), addr_reg);
}
gpr.Unlock(ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W30);
fpr.Unlock(ARM64Reg::Q0);
if (!js.assumeNoPairedQuantize || !jo.fastmem)
gpr.Unlock(ARM64Reg::W0);
if (!js.assumeNoPairedQuantize && !jo.fastmem)
gpr.Unlock(ARM64Reg::W3);
gpr.Unlock(ARM64Reg::W2, ARM64Reg::W30);
if (!js.assumeNoPairedQuantize)
fpr.Unlock(ARM64Reg::Q1);
{
gpr.Unlock(ARM64Reg::W0, ARM64Reg::W1);
if (!jo.fastmem)
gpr.Unlock(ARM64Reg::W3);
fpr.Unlock(ARM64Reg::Q0, ARM64Reg::Q1);
}
}