From 6b8ab5993affcc03e4b1914d9d520e1f02653a6d Mon Sep 17 00:00:00 2001 From: Tillmann Karras Date: Thu, 21 May 2015 12:33:36 +0200 Subject: [PATCH] Jit64: make ForceSinglePrecision more versatile --- .../Core/PowerPC/Jit64/Jit_FloatingPoint.cpp | 34 +++-------------- Source/Core/Core/PowerPC/Jit64/Jit_Paired.cpp | 12 +++--- .../Core/Core/PowerPC/JitCommon/Jit_Util.cpp | 37 +++++++++---------- Source/Core/Core/PowerPC/JitCommon/Jit_Util.h | 3 +- 4 files changed, 30 insertions(+), 56 deletions(-) diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_FloatingPoint.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_FloatingPoint.cpp index 349a7a269c..bf40cecbec 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_FloatingPoint.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_FloatingPoint.cpp @@ -38,17 +38,7 @@ void Jit64::fp_tri_op(int d, int a, int b, bool reversible, bool single, void (X avx_op(avxOp, sseOp, fpr.RX(d), fpr.R(a), fpr.R(b), packed, reversible); } if (single) - { - if (packed) - { - ForceSinglePrecisionP(fpr.RX(d), fpr.RX(d)); - } - else - { - ForceSinglePrecisionS(fpr.RX(d), fpr.RX(d)); - MOVDDUP(fpr.RX(d), fpr.R(d)); - } - } + ForceSinglePrecision(fpr.RX(d), fpr.R(d), packed, true); SetFPRFIfNeeded(fpr.RX(d)); fpr.UnlockAll(); } @@ -215,21 +205,9 @@ void Jit64::fmaddXX(UGeckoInstruction inst) fpr.BindToRegister(d, !single); if (single) - { - if (packed) - { - ForceSinglePrecisionP(fpr.RX(d), XMM0); - } - else - { - ForceSinglePrecisionS(fpr.RX(d), XMM0); - MOVDDUP(fpr.RX(d), fpr.R(d)); - } - } + ForceSinglePrecision(fpr.RX(d), R(XMM0), packed, true); else - { MOVSD(fpr.RX(d), R(XMM0)); - } SetFPRFIfNeeded(fpr.RX(d)); fpr.UnlockAll(); } @@ -492,11 +470,9 @@ void Jit64::frspx(UGeckoInstruction inst) int d = inst.FD; fpr.Lock(b, d); - fpr.BindToRegister(d, d == b); - if (b != d) - MOVAPD(fpr.RX(d), fpr.R(b)); - ForceSinglePrecisionS(fpr.RX(d), fpr.RX(d)); - MOVDDUP(fpr.RX(d), fpr.R(d)); + OpArg src = fpr.R(b); + fpr.BindToRegister(d, false); + ForceSinglePrecision(fpr.RX(d), src, false, true); SetFPRFIfNeeded(fpr.RX(d)); fpr.UnlockAll(); } diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_Paired.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_Paired.cpp index 2c772a3606..ecaa1daa76 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_Paired.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_Paired.cpp @@ -113,7 +113,7 @@ void Jit64::tri_op(int d, int a, int b, bool reversible, void (XEmitter::*avxOp) { avx_op(avxOp, sseOp, fpr.RX(d), fpr.R(a), fpr.R(b), true, reversible); } - ForceSinglePrecisionP(fpr.RX(d), fpr.RX(d)); + ForceSinglePrecision(fpr.RX(d), fpr.R(d)); SetFPRFIfNeeded(fpr.RX(d)); fpr.UnlockAll(); } @@ -173,7 +173,7 @@ void Jit64::ps_sum(UGeckoInstruction inst) PanicAlert("ps_sum WTF!!!"); } fpr.BindToRegister(d, false); - ForceSinglePrecisionP(fpr.RX(d), XMM0); + ForceSinglePrecision(fpr.RX(d), R(XMM0)); SetFPRFIfNeeded(fpr.RX(d)); fpr.UnlockAll(); } @@ -205,7 +205,7 @@ void Jit64::ps_muls(UGeckoInstruction inst) Force25BitPrecision(XMM0, R(XMM0), XMM1); MULPD(XMM0, fpr.R(a)); fpr.BindToRegister(d, false); - ForceSinglePrecisionP(fpr.RX(d), XMM0); + ForceSinglePrecision(fpr.RX(d), R(XMM0)); SetFPRFIfNeeded(fpr.RX(d)); fpr.UnlockAll(); } @@ -264,7 +264,7 @@ void Jit64::ps_rsqrte(UGeckoInstruction inst) CALL((void *)asm_routines.frsqrte); MOVLHPS(fpr.RX(d), XMM0); - ForceSinglePrecisionP(fpr.RX(d), fpr.RX(d)); + ForceSinglePrecision(fpr.RX(d), fpr.R(d)); SetFPRFIfNeeded(fpr.RX(d)); fpr.UnlockAll(); gpr.UnlockAllX(); @@ -291,7 +291,7 @@ void Jit64::ps_res(UGeckoInstruction inst) CALL((void *)asm_routines.fres); MOVLHPS(fpr.RX(d), XMM0); - ForceSinglePrecisionP(fpr.RX(d), fpr.RX(d)); + ForceSinglePrecision(fpr.RX(d), fpr.R(d)); SetFPRFIfNeeded(fpr.RX(d)); fpr.UnlockAll(); gpr.UnlockAllX(); @@ -386,7 +386,7 @@ void Jit64::ps_maddXX(UGeckoInstruction inst) } fpr.BindToRegister(d, false); - ForceSinglePrecisionP(fpr.RX(d), XMM0); + ForceSinglePrecision(fpr.RX(d), R(XMM0)); SetFPRFIfNeeded(fpr.RX(d)); fpr.UnlockAll(); } diff --git a/Source/Core/Core/PowerPC/JitCommon/Jit_Util.cpp b/Source/Core/Core/PowerPC/JitCommon/Jit_Util.cpp index 20e42f40d3..b01a9e4262 100644 --- a/Source/Core/Core/PowerPC/JitCommon/Jit_Util.cpp +++ b/Source/Core/Core/PowerPC/JitCommon/Jit_Util.cpp @@ -640,31 +640,30 @@ void EmuCodeBlock::WriteToConstRamAddress(int accessSize, OpArg arg, u32 address MOV(accessSize, MRegSum(RMEM, RSCRATCH2), R(reg)); } -void EmuCodeBlock::ForceSinglePrecisionS(X64Reg output, X64Reg input) +void EmuCodeBlock::ForceSinglePrecision(X64Reg output, OpArg input, bool packed, bool duplicate) { // Most games don't need these. Zelda requires it though - some platforms get stuck without them. if (jit->jo.accurateSinglePrecision) { - CVTSD2SS(output, R(input)); - CVTSS2SD(output, R(output)); + if (packed) + { + CVTPD2PS(output, input); + CVTPS2PD(output, R(output)); + } + else + { + CVTSD2SS(output, input); + CVTSS2SD(output, R(output)); + if (duplicate) + MOVDDUP(output, R(output)); + } } - else if (output != input) + else if (!input.IsSimpleReg() || input.GetSimpleReg() != output) { - MOVAPD(output, R(input)); - } -} - -void EmuCodeBlock::ForceSinglePrecisionP(X64Reg output, X64Reg input) -{ - // Most games don't need these. Zelda requires it though - some platforms get stuck without them. - if (jit->jo.accurateSinglePrecision) - { - CVTPD2PS(output, R(input)); - CVTPS2PD(output, R(output)); - } - else if (output != input) - { - MOVAPD(output, R(input)); + if (duplicate) + MOVDDUP(output, input); + else + MOVAPD(output, input); } } diff --git a/Source/Core/Core/PowerPC/JitCommon/Jit_Util.h b/Source/Core/Core/PowerPC/JitCommon/Jit_Util.h index c3175633ba..7e25131eb4 100644 --- a/Source/Core/Core/PowerPC/JitCommon/Jit_Util.h +++ b/Source/Core/Core/PowerPC/JitCommon/Jit_Util.h @@ -121,8 +121,7 @@ public: void avx_op(void (Gen::XEmitter::*avxOp)(Gen::X64Reg, Gen::X64Reg, Gen::OpArg, u8), void (Gen::XEmitter::*sseOp)(Gen::X64Reg, Gen::OpArg, u8), Gen::X64Reg regOp, Gen::OpArg arg1, Gen::OpArg arg2, u8 imm); - void ForceSinglePrecisionS(Gen::X64Reg output, Gen::X64Reg input); - void ForceSinglePrecisionP(Gen::X64Reg output, Gen::X64Reg input); + void ForceSinglePrecision(Gen::X64Reg output, Gen::OpArg input, bool packed = true, bool duplicate = false); void Force25BitPrecision(Gen::X64Reg output, Gen::OpArg input, Gen::X64Reg tmp); // RSCRATCH might get trashed