JitArm64: Skip locking Q0 for most FPR loads

Normal float loads never use Q0, and paired float loads can skip using
Q0 in the simplest case.
This commit is contained in:
JosJuice 2022-04-16 13:37:38 +02:00
parent 5f7e9d3bf1
commit 5a09a432fd
2 changed files with 4 additions and 6 deletions

View File

@ -78,7 +78,6 @@ void JitArm64::lfXX(UGeckoInstruction inst)
(flags & BackPatchInfo::FLAG_SIZE_64) != 0 ? RegType::LowerPair : RegType::DuplicatedSingle; (flags & BackPatchInfo::FLAG_SIZE_64) != 0 ? RegType::LowerPair : RegType::DuplicatedSingle;
gpr.Lock(ARM64Reg::W1, ARM64Reg::W30); gpr.Lock(ARM64Reg::W1, ARM64Reg::W30);
fpr.Lock(ARM64Reg::Q0);
if (jo.memcheck || !jo.fastmem) if (jo.memcheck || !jo.fastmem)
gpr.Lock(ARM64Reg::W0); gpr.Lock(ARM64Reg::W0);
@ -170,7 +169,6 @@ void JitArm64::lfXX(UGeckoInstruction inst)
regs_in_use[DecodeReg(ARM64Reg::W1)] = 0; regs_in_use[DecodeReg(ARM64Reg::W1)] = 0;
if (jo.memcheck || !jo.fastmem) if (jo.memcheck || !jo.fastmem)
regs_in_use[DecodeReg(ARM64Reg::W0)] = 0; regs_in_use[DecodeReg(ARM64Reg::W0)] = 0;
fprs_in_use[DecodeReg(ARM64Reg::Q0)] = 0;
if (!jo.memcheck) if (!jo.memcheck)
fprs_in_use[DecodeReg(VD)] = 0; fprs_in_use[DecodeReg(VD)] = 0;
@ -193,7 +191,6 @@ void JitArm64::lfXX(UGeckoInstruction inst)
} }
gpr.Unlock(ARM64Reg::W1, ARM64Reg::W30); gpr.Unlock(ARM64Reg::W1, ARM64Reg::W30);
fpr.Unlock(ARM64Reg::Q0);
if (jo.memcheck || !jo.fastmem) if (jo.memcheck || !jo.fastmem)
gpr.Unlock(ARM64Reg::W0); gpr.Unlock(ARM64Reg::W0);
} }

View File

@ -38,7 +38,6 @@ void JitArm64::psq_lXX(UGeckoInstruction inst)
const int w = indexed ? inst.Wx : inst.W; const int w = indexed ? inst.Wx : inst.W;
gpr.Lock(ARM64Reg::W1, ARM64Reg::W30); gpr.Lock(ARM64Reg::W1, ARM64Reg::W30);
fpr.Lock(ARM64Reg::Q0);
if (!js.assumeNoPairedQuantize) if (!js.assumeNoPairedQuantize)
{ {
gpr.Lock(ARM64Reg::W0, ARM64Reg::W2, ARM64Reg::W3); gpr.Lock(ARM64Reg::W0, ARM64Reg::W2, ARM64Reg::W3);
@ -48,6 +47,8 @@ void JitArm64::psq_lXX(UGeckoInstruction inst)
{ {
gpr.Lock(ARM64Reg::W0); gpr.Lock(ARM64Reg::W0);
} }
if (!js.assumeNoPairedQuantize || w)
fpr.Lock(ARM64Reg::Q0);
constexpr ARM64Reg type_reg = ARM64Reg::W0; constexpr ARM64Reg type_reg = ARM64Reg::W0;
constexpr ARM64Reg addr_reg = ARM64Reg::W1; constexpr ARM64Reg addr_reg = ARM64Reg::W1;
@ -88,7 +89,6 @@ void JitArm64::psq_lXX(UGeckoInstruction inst)
gprs_in_use[DecodeReg(ARM64Reg::W1)] = false; gprs_in_use[DecodeReg(ARM64Reg::W1)] = false;
if (jo.memcheck || !jo.fastmem) if (jo.memcheck || !jo.fastmem)
gprs_in_use[DecodeReg(ARM64Reg::W0)] = false; gprs_in_use[DecodeReg(ARM64Reg::W0)] = false;
fprs_in_use[DecodeReg(ARM64Reg::Q0)] = false;
if (!jo.memcheck) if (!jo.memcheck)
fprs_in_use[DecodeReg(VS)] = 0; fprs_in_use[DecodeReg(VS)] = 0;
@ -135,7 +135,6 @@ void JitArm64::psq_lXX(UGeckoInstruction inst)
} }
gpr.Unlock(ARM64Reg::W1, ARM64Reg::W30); gpr.Unlock(ARM64Reg::W1, ARM64Reg::W30);
fpr.Unlock(ARM64Reg::Q0);
if (!js.assumeNoPairedQuantize) if (!js.assumeNoPairedQuantize)
{ {
gpr.Unlock(ARM64Reg::W0, ARM64Reg::W2, ARM64Reg::W3); gpr.Unlock(ARM64Reg::W0, ARM64Reg::W2, ARM64Reg::W3);
@ -145,6 +144,8 @@ void JitArm64::psq_lXX(UGeckoInstruction inst)
{ {
gpr.Unlock(ARM64Reg::W0); gpr.Unlock(ARM64Reg::W0);
} }
if (!js.assumeNoPairedQuantize || w)
fpr.Unlock(ARM64Reg::Q0);
} }
void JitArm64::psq_stXX(UGeckoInstruction inst) void JitArm64::psq_stXX(UGeckoInstruction inst)