diff --git a/Source/Core/Core/PowerPC/Jit64/Jit.h b/Source/Core/Core/PowerPC/Jit64/Jit.h index a344242e10..fe0376e921 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit.h +++ b/Source/Core/Core/PowerPC/Jit64/Jit.h @@ -135,7 +135,8 @@ public: Gen::FixupBranch JumpIfCRFieldBit(int field, int bit, bool jump_if_set = true); void SetFPRFIfNeeded(Gen::X64Reg xmm); - void HandleNaNs(UGeckoInstruction inst, Gen::X64Reg xmm_out, Gen::X64Reg xmm_in); + void HandleNaNs(UGeckoInstruction inst, Gen::X64Reg xmm_out, Gen::X64Reg xmm_in, + Gen::X64Reg clobber = Gen::XMM0); void MultiplyImmediate(u32 imm, int a, int d, bool overflow); diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_FloatingPoint.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_FloatingPoint.cpp index 59abb25cd2..f94aea8b93 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_FloatingPoint.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_FloatingPoint.cpp @@ -55,7 +55,7 @@ void Jit64::SetFPRFIfNeeded(X64Reg xmm) SetFPRF(xmm); } -void Jit64::HandleNaNs(UGeckoInstruction inst, X64Reg xmm_out, X64Reg xmm) +void Jit64::HandleNaNs(UGeckoInstruction inst, X64Reg xmm_out, X64Reg xmm, X64Reg clobber) { // | PowerPC | x86 // ---------------------+----------+--------- @@ -72,7 +72,7 @@ void Jit64::HandleNaNs(UGeckoInstruction inst, X64Reg xmm_out, X64Reg xmm) return; } - _assert_(xmm != XMM0); + _assert_(xmm != clobber); std::vector inputs; u32 a = inst.FA, b = inst.FB, c = inst.FC; @@ -110,15 +110,16 @@ void Jit64::HandleNaNs(UGeckoInstruction inst, X64Reg xmm_out, X64Reg xmm) std::reverse(inputs.begin(), inputs.end()); if (cpu_info.bSSE4_1) { - avx_op(&XEmitter::VCMPPD, &XEmitter::CMPPD, XMM0, R(xmm), R(xmm), CMP_UNORD); - PTEST(XMM0, R(XMM0)); + avx_op(&XEmitter::VCMPPD, &XEmitter::CMPPD, clobber, R(xmm), R(xmm), CMP_UNORD); + PTEST(clobber, R(clobber)); FixupBranch handle_nan = J_CC(CC_NZ, true); SwitchToFarCode(); SetJumpTarget(handle_nan); + _assert_msg_(DYNA_REC, clobber == XMM0, "BLENDVPD implicitly uses XMM0"); BLENDVPD(xmm, M(psGeneratedQNaN)); for (u32 x : inputs) { - avx_op(&XEmitter::VCMPPD, &XEmitter::CMPPD, XMM0, fpr.R(x), fpr.R(x), CMP_UNORD); + avx_op(&XEmitter::VCMPPD, &XEmitter::CMPPD, clobber, fpr.R(x), fpr.R(x), CMP_UNORD); BLENDVPD(xmm, fpr.R(x)); } FixupBranch done = J(true); @@ -130,26 +131,26 @@ void Jit64::HandleNaNs(UGeckoInstruction inst, X64Reg xmm_out, X64Reg xmm) // SSE2 fallback X64Reg tmp = fpr.GetFreeXReg(); fpr.FlushLockX(tmp); - MOVAPD(XMM0, R(xmm)); - CMPPD(XMM0, R(XMM0), CMP_UNORD); - MOVMSKPD(RSCRATCH, R(XMM0)); + MOVAPD(clobber, R(xmm)); + CMPPD(clobber, R(clobber), CMP_UNORD); + MOVMSKPD(RSCRATCH, R(clobber)); TEST(32, R(RSCRATCH), R(RSCRATCH)); FixupBranch handle_nan = J_CC(CC_NZ, true); SwitchToFarCode(); SetJumpTarget(handle_nan); - MOVAPD(tmp, R(XMM0)); - PANDN(XMM0, R(xmm)); + MOVAPD(tmp, R(clobber)); + PANDN(clobber, R(xmm)); PAND(tmp, M(psGeneratedQNaN)); - POR(tmp, R(XMM0)); + POR(tmp, R(clobber)); MOVAPD(xmm, R(tmp)); for (u32 x : inputs) { - MOVAPD(XMM0, fpr.R(x)); - CMPPD(XMM0, R(XMM0), CMP_ORD); - MOVAPD(tmp, R(XMM0)); - PANDN(XMM0, fpr.R(x)); + MOVAPD(clobber, fpr.R(x)); + CMPPD(clobber, R(clobber), CMP_ORD); + MOVAPD(tmp, R(clobber)); + PANDN(clobber, fpr.R(x)); PAND(xmm, R(tmp)); - POR(xmm, R(XMM0)); + POR(xmm, R(clobber)); } FixupBranch done = J(true); SwitchToNearCode(); diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_Paired.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_Paired.cpp index a985d265ac..d438e172a1 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_Paired.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_Paired.cpp @@ -68,7 +68,7 @@ void Jit64::ps_sum(UGeckoInstruction inst) default: PanicAlert("ps_sum WTF!!!"); } - HandleNaNs(inst, fpr.RX(d), tmp); + HandleNaNs(inst, fpr.RX(d), tmp, tmp == XMM1 ? XMM0 : XMM1); ForceSinglePrecision(fpr.RX(d), fpr.R(d)); SetFPRFIfNeeded(fpr.RX(d)); fpr.UnlockAll();