From cfe65e0e91d6cdb793f13d5feb3975294ac6420d Mon Sep 17 00:00:00 2001 From: degasus Date: Wed, 2 Sep 2015 23:11:05 +0200 Subject: [PATCH] JitArm64: Fix single precision --- .../JitArm64/JitArm64_FloatingPoint.cpp | 8 ++++++++ .../Core/PowerPC/JitArm64/JitArm64_Paired.cpp | 15 +++++++++++++++ .../PowerPC/JitArm64/JitArm64_RegCache.cpp | 18 ++++++++++++++++++ .../Core/PowerPC/JitArm64/JitArm64_RegCache.h | 2 ++ 4 files changed, 43 insertions(+) diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp index df88501693..6b4304ad1c 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp @@ -42,6 +42,7 @@ void JitArm64::faddsx(UGeckoInstruction inst) ARM64Reg VD = fpr.RW(d, REG_DUP); m_float_emit.FADD(EncodeRegToDouble(VD), EncodeRegToDouble(VA), EncodeRegToDouble(VB)); + fpr.FixSinglePrecision(d); } void JitArm64::faddx(UGeckoInstruction inst) @@ -75,6 +76,7 @@ void JitArm64::fmaddsx(UGeckoInstruction inst) m_float_emit.FMUL(EncodeRegToDouble(V0), EncodeRegToDouble(VA), EncodeRegToDouble(VC)); m_float_emit.FADD(EncodeRegToDouble(VD), EncodeRegToDouble(V0), EncodeRegToDouble(VB)); + fpr.FixSinglePrecision(d); fpr.Unlock(V0); } @@ -125,6 +127,7 @@ void JitArm64::fmsubsx(UGeckoInstruction inst) m_float_emit.FMUL(EncodeRegToDouble(V0), EncodeRegToDouble(VA), EncodeRegToDouble(VC)); m_float_emit.FSUB(EncodeRegToDouble(VD), EncodeRegToDouble(V0), EncodeRegToDouble(VB)); + fpr.FixSinglePrecision(d); fpr.Unlock(V0); } @@ -158,6 +161,7 @@ void JitArm64::fmulsx(UGeckoInstruction inst) ARM64Reg VD = fpr.RW(d, REG_DUP); m_float_emit.FMUL(EncodeRegToDouble(VD), EncodeRegToDouble(VA), EncodeRegToDouble(VC)); + fpr.FixSinglePrecision(d); } void JitArm64::fmulx(UGeckoInstruction inst) @@ -221,6 +225,7 @@ void JitArm64::fnmaddsx(UGeckoInstruction inst) m_float_emit.FMUL(EncodeRegToDouble(V0), EncodeRegToDouble(VA), EncodeRegToDouble(VC)); m_float_emit.FADD(EncodeRegToDouble(VD), EncodeRegToDouble(V0), EncodeRegToDouble(VB)); m_float_emit.FNEG(EncodeRegToDouble(VD), EncodeRegToDouble(VD)); + fpr.FixSinglePrecision(d); fpr.Unlock(V0); } @@ -258,6 +263,7 @@ void JitArm64::fnmsubsx(UGeckoInstruction inst) m_float_emit.FMUL(EncodeRegToDouble(V0), EncodeRegToDouble(VA), EncodeRegToDouble(VC)); m_float_emit.FSUB(EncodeRegToDouble(VD), EncodeRegToDouble(V0), EncodeRegToDouble(VB)); m_float_emit.FNEG(EncodeRegToDouble(VD), EncodeRegToDouble(VD)); + fpr.FixSinglePrecision(d); fpr.Unlock(V0); } @@ -308,6 +314,7 @@ void JitArm64::fsubsx(UGeckoInstruction inst) ARM64Reg VD = fpr.RW(d, REG_DUP); m_float_emit.FSUB(EncodeRegToDouble(VD), EncodeRegToDouble(VA), EncodeRegToDouble(VB)); + fpr.FixSinglePrecision(d); } void JitArm64::fsubx(UGeckoInstruction inst) @@ -476,4 +483,5 @@ void JitArm64::fdivsx(UGeckoInstruction inst) ARM64Reg VD = fpr.RW(d, REG_DUP); m_float_emit.FDIV(EncodeRegToDouble(VD), EncodeRegToDouble(VA), EncodeRegToDouble(VB)); + fpr.FixSinglePrecision(d); } diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Paired.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Paired.cpp index b4272fedae..60a65d6f8b 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Paired.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Paired.cpp @@ -43,6 +43,7 @@ void JitArm64::ps_add(UGeckoInstruction inst) ARM64Reg VD = fpr.RW(d, REG_REG); m_float_emit.FADD(64, VD, VA, VB); + fpr.FixSinglePrecision(d); } void JitArm64::ps_div(UGeckoInstruction inst) @@ -58,6 +59,7 @@ void JitArm64::ps_div(UGeckoInstruction inst) ARM64Reg VD = fpr.RW(d, REG_REG); m_float_emit.FDIV(64, VD, VA, VB); + fpr.FixSinglePrecision(d); } void JitArm64::ps_madd(UGeckoInstruction inst) @@ -76,6 +78,7 @@ void JitArm64::ps_madd(UGeckoInstruction inst) m_float_emit.FMUL(64, V0, VA, VC); m_float_emit.FADD(64, VD, V0, VB); + fpr.FixSinglePrecision(d); fpr.Unlock(V0); } @@ -97,6 +100,7 @@ void JitArm64::ps_madds0(UGeckoInstruction inst) m_float_emit.DUP(64, V0, VC, 0); m_float_emit.FMUL(64, V0, V0, VA); m_float_emit.FADD(64, VD, V0, VB); + fpr.FixSinglePrecision(d); fpr.Unlock(V0); } @@ -118,6 +122,7 @@ void JitArm64::ps_madds1(UGeckoInstruction inst) m_float_emit.DUP(64, V0, VC, 1); m_float_emit.FMUL(64, V0, V0, VA); m_float_emit.FADD(64, VD, V0, VB); + fpr.FixSinglePrecision(d); fpr.Unlock(V0); } @@ -225,6 +230,7 @@ void JitArm64::ps_mul(UGeckoInstruction inst) ARM64Reg VD = fpr.RW(d, REG_REG); m_float_emit.FMUL(64, VD, VA, VC); + fpr.FixSinglePrecision(d); } void JitArm64::ps_muls0(UGeckoInstruction inst) @@ -242,6 +248,7 @@ void JitArm64::ps_muls0(UGeckoInstruction inst) m_float_emit.DUP(64, V0, VC, 0); m_float_emit.FMUL(64, VD, VA, V0); + fpr.FixSinglePrecision(d); fpr.Unlock(V0); } @@ -260,6 +267,7 @@ void JitArm64::ps_muls1(UGeckoInstruction inst) m_float_emit.DUP(64, V0, VC, 1); m_float_emit.FMUL(64, VD, VA, V0); + fpr.FixSinglePrecision(d); fpr.Unlock(V0); } @@ -279,6 +287,7 @@ void JitArm64::ps_msub(UGeckoInstruction inst) m_float_emit.FMUL(64, V0, VA, VC); m_float_emit.FSUB(64, VD, V0, VB); + fpr.FixSinglePrecision(d); fpr.Unlock(V0); } @@ -329,6 +338,7 @@ void JitArm64::ps_nmadd(UGeckoInstruction inst) m_float_emit.FMUL(64, V0, VA, VC); m_float_emit.FADD(64, VD, V0, VB); m_float_emit.FNEG(64, VD, VD); + fpr.FixSinglePrecision(d); fpr.Unlock(V0); } @@ -350,6 +360,7 @@ void JitArm64::ps_nmsub(UGeckoInstruction inst) m_float_emit.FMUL(64, V0, VA, VC); m_float_emit.FSUB(64, VD, V0, VB); m_float_emit.FNEG(64, VD, VD); + fpr.FixSinglePrecision(d); fpr.Unlock(V0); } @@ -366,6 +377,7 @@ void JitArm64::ps_res(UGeckoInstruction inst) ARM64Reg VD = fpr.RW(d, REG_REG); m_float_emit.FRSQRTE(64, VD, VB); + fpr.FixSinglePrecision(d); } void JitArm64::ps_sel(UGeckoInstruction inst) @@ -409,6 +421,7 @@ void JitArm64::ps_sub(UGeckoInstruction inst) ARM64Reg VD = fpr.RW(d, REG_REG); m_float_emit.FSUB(64, VD, VA, VB); + fpr.FixSinglePrecision(d); } void JitArm64::ps_sum0(UGeckoInstruction inst) @@ -436,6 +449,7 @@ void JitArm64::ps_sum0(UGeckoInstruction inst) m_float_emit.FADD(64, V0, V0, VA); m_float_emit.INS(64, VD, 0, V0, 0); } + fpr.FixSinglePrecision(d); fpr.Unlock(V0); } @@ -465,6 +479,7 @@ void JitArm64::ps_sum1(UGeckoInstruction inst) m_float_emit.FADD(64, V0, V0, VB); m_float_emit.INS(64, VD, 1, V0, 1); } + fpr.FixSinglePrecision(d); fpr.Unlock(V0); } diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.cpp index b103f95ed6..f0bba4bb48 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.cpp @@ -559,3 +559,21 @@ BitSet32 Arm64FPRCache::GetCallerSavedUsed() registers[it.GetReg() - Q0] = 1; return registers; } + +void Arm64FPRCache::FixSinglePrecision(u32 preg) +{ + ARM64Reg host_reg = m_guest_registers[preg].GetReg(); + switch (m_guest_registers[preg].GetType()) + { + case REG_DUP: // only PS0 needs to be converted + m_float_emit->FCVT(32, 64, EncodeRegToDouble(host_reg), EncodeRegToDouble(host_reg)); + m_float_emit->FCVT(64, 32, EncodeRegToDouble(host_reg), EncodeRegToDouble(host_reg)); + break; + case REG_REG: // PS0 and PS1 needs to be converted + m_float_emit->FCVTN(32, EncodeRegToDouble(host_reg), EncodeRegToDouble(host_reg)); + m_float_emit->FCVTL(64, EncodeRegToDouble(host_reg), EncodeRegToDouble(host_reg)); + break; + default: + break; + } +} diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h b/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h index 09f86523ff..89042777d7 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h @@ -277,6 +277,8 @@ public: BitSet32 GetCallerSavedUsed() override; + void FixSinglePrecision(u32 preg); + protected: // Get the order of the host registers void GetAllocationOrder();