JitArm64: Lock fewer registers when assumeNoPairedQuantize

This commit is contained in:
JosJuice 2021-07-10 13:21:27 +02:00
parent 96760093e9
commit 93e968208e

View File

@ -36,8 +36,13 @@ void JitArm64::psq_lXX(UGeckoInstruction inst)
const int i = indexed ? inst.Ix : inst.I;
const int w = indexed ? inst.Wx : inst.W;
gpr.Lock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W30);
fpr.Lock(ARM64Reg::Q0, ARM64Reg::Q1);
gpr.Lock(ARM64Reg::W0, ARM64Reg::W30);
fpr.Lock(ARM64Reg::Q0);
if (!js.assumeNoPairedQuantize)
{
gpr.Lock(ARM64Reg::W1, ARM64Reg::W2);
fpr.Lock(ARM64Reg::Q1);
}
constexpr ARM64Reg addr_reg = ARM64Reg::W0;
constexpr ARM64Reg scale_reg = ARM64Reg::W1;
@ -73,8 +78,8 @@ void JitArm64::psq_lXX(UGeckoInstruction inst)
BitSet32 fprs_in_use = fpr.GetCallerSavedUsed();
// Wipe the registers we are using as temporaries
gprs_in_use &= BitSet32(~7);
fprs_in_use &= BitSet32(~3);
gprs_in_use[DecodeReg(ARM64Reg::W0)] = false;
fprs_in_use[DecodeReg(ARM64Reg::Q0)] = false;
fprs_in_use[DecodeReg(VS)] = 0;
u32 flags = BackPatchInfo::FLAG_LOAD | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_32;
@ -103,8 +108,13 @@ void JitArm64::psq_lXX(UGeckoInstruction inst)
m_float_emit.INS(32, VS, 1, ARM64Reg::Q0, 0);
}
gpr.Unlock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W30);
fpr.Unlock(ARM64Reg::Q0, ARM64Reg::Q1);
gpr.Unlock(ARM64Reg::W0, ARM64Reg::W30);
fpr.Unlock(ARM64Reg::Q0);
if (!js.assumeNoPairedQuantize)
{
gpr.Unlock(ARM64Reg::W1, ARM64Reg::W2);
fpr.Unlock(ARM64Reg::Q1);
}
}
void JitArm64::psq_stXX(UGeckoInstruction inst)
@ -127,6 +137,7 @@ void JitArm64::psq_stXX(UGeckoInstruction inst)
const int i = indexed ? inst.Ix : inst.I;
const int w = indexed ? inst.Wx : inst.W;
if (!js.assumeNoPairedQuantize)
fpr.Lock(ARM64Reg::Q0, ARM64Reg::Q1);
const bool have_single = fpr.IsSingle(inst.RS);
@ -162,7 +173,9 @@ void JitArm64::psq_stXX(UGeckoInstruction inst)
}
}
gpr.Lock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W30);
gpr.Lock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W30);
if (!js.assumeNoPairedQuantize)
gpr.Lock(ARM64Reg::W2);
constexpr ARM64Reg scale_reg = ARM64Reg::W0;
constexpr ARM64Reg addr_reg = ARM64Reg::W1;
@ -191,15 +204,15 @@ void JitArm64::psq_stXX(UGeckoInstruction inst)
MOV(gpr.R(inst.RA), addr_reg);
}
if (js.assumeNoPairedQuantize)
{
BitSet32 gprs_in_use = gpr.GetCallerSavedUsed();
BitSet32 fprs_in_use = fpr.GetCallerSavedUsed();
// Wipe the registers we are using as temporaries
gprs_in_use &= BitSet32(~7);
fprs_in_use &= BitSet32(~3);
gprs_in_use[DecodeReg(ARM64Reg::W0)] = false;
gprs_in_use[DecodeReg(ARM64Reg::W1)] = false;
if (js.assumeNoPairedQuantize)
{
u32 flags = BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_32;
if (!w)
flags |= BackPatchInfo::FLAG_PAIR;
@ -221,6 +234,10 @@ void JitArm64::psq_stXX(UGeckoInstruction inst)
if (js.assumeNoPairedQuantize && !have_single)
fpr.Unlock(VS);
gpr.Unlock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W30);
gpr.Unlock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W30);
if (!js.assumeNoPairedQuantize)
{
gpr.Unlock(ARM64Reg::W2);
fpr.Unlock(ARM64Reg::Q0, ARM64Reg::Q1);
}
}