From 5a09a432fd6830672cbddd127933d81f87246c72 Mon Sep 17 00:00:00 2001 From: JosJuice Date: Sat, 16 Apr 2022 13:37:38 +0200 Subject: [PATCH] JitArm64: Skip locking Q0 for most FPR loads Normal float loads never use Q0, and paired float loads can skip using Q0 in the simplest case. --- .../Core/PowerPC/JitArm64/JitArm64_LoadStoreFloating.cpp | 3 --- .../Core/PowerPC/JitArm64/JitArm64_LoadStorePaired.cpp | 7 ++++--- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStoreFloating.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStoreFloating.cpp index 4fe8ca4cb3..98761d52bf 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStoreFloating.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStoreFloating.cpp @@ -78,7 +78,6 @@ void JitArm64::lfXX(UGeckoInstruction inst) (flags & BackPatchInfo::FLAG_SIZE_64) != 0 ? RegType::LowerPair : RegType::DuplicatedSingle; gpr.Lock(ARM64Reg::W1, ARM64Reg::W30); - fpr.Lock(ARM64Reg::Q0); if (jo.memcheck || !jo.fastmem) gpr.Lock(ARM64Reg::W0); @@ -170,7 +169,6 @@ void JitArm64::lfXX(UGeckoInstruction inst) regs_in_use[DecodeReg(ARM64Reg::W1)] = 0; if (jo.memcheck || !jo.fastmem) regs_in_use[DecodeReg(ARM64Reg::W0)] = 0; - fprs_in_use[DecodeReg(ARM64Reg::Q0)] = 0; if (!jo.memcheck) fprs_in_use[DecodeReg(VD)] = 0; @@ -193,7 +191,6 @@ void JitArm64::lfXX(UGeckoInstruction inst) } gpr.Unlock(ARM64Reg::W1, ARM64Reg::W30); - fpr.Unlock(ARM64Reg::Q0); if (jo.memcheck || !jo.fastmem) gpr.Unlock(ARM64Reg::W0); } diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStorePaired.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStorePaired.cpp index 725826dd23..57e1ffb398 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStorePaired.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStorePaired.cpp @@ -38,7 +38,6 @@ void JitArm64::psq_lXX(UGeckoInstruction inst) const int w = indexed ? inst.Wx : inst.W; gpr.Lock(ARM64Reg::W1, ARM64Reg::W30); - fpr.Lock(ARM64Reg::Q0); if (!js.assumeNoPairedQuantize) { gpr.Lock(ARM64Reg::W0, ARM64Reg::W2, ARM64Reg::W3); @@ -48,6 +47,8 @@ void JitArm64::psq_lXX(UGeckoInstruction inst) { gpr.Lock(ARM64Reg::W0); } + if (!js.assumeNoPairedQuantize || w) + fpr.Lock(ARM64Reg::Q0); constexpr ARM64Reg type_reg = ARM64Reg::W0; constexpr ARM64Reg addr_reg = ARM64Reg::W1; @@ -88,7 +89,6 @@ void JitArm64::psq_lXX(UGeckoInstruction inst) gprs_in_use[DecodeReg(ARM64Reg::W1)] = false; if (jo.memcheck || !jo.fastmem) gprs_in_use[DecodeReg(ARM64Reg::W0)] = false; - fprs_in_use[DecodeReg(ARM64Reg::Q0)] = false; if (!jo.memcheck) fprs_in_use[DecodeReg(VS)] = 0; @@ -135,7 +135,6 @@ void JitArm64::psq_lXX(UGeckoInstruction inst) } gpr.Unlock(ARM64Reg::W1, ARM64Reg::W30); - fpr.Unlock(ARM64Reg::Q0); if (!js.assumeNoPairedQuantize) { gpr.Unlock(ARM64Reg::W0, ARM64Reg::W2, ARM64Reg::W3); @@ -145,6 +144,8 @@ void JitArm64::psq_lXX(UGeckoInstruction inst) { gpr.Unlock(ARM64Reg::W0); } + if (!js.assumeNoPairedQuantize || w) + fpr.Unlock(ARM64Reg::Q0); } void JitArm64::psq_stXX(UGeckoInstruction inst)