From 84b90ad1c6c3e82913e0a4fb98828192d5f0ef92 Mon Sep 17 00:00:00 2001 From: Ryan Houdek Date: Sun, 25 Jan 2015 03:16:29 -0600 Subject: [PATCH 1/5] [AArch64] Fix cmp --- .../Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp index 472162332c..be74f76f90 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp @@ -377,13 +377,18 @@ void JitArm64::cmp(UGeckoInstruction inst) } ARM64Reg WA = gpr.GetReg(); + ARM64Reg WB = gpr.GetReg(); + ARM64Reg XA = EncodeRegTo64(WA); + ARM64Reg XB = EncodeRegTo64(WB); ARM64Reg RA = gpr.R(a); ARM64Reg RB = gpr.R(b); + SXTW(XA, RA); + SXTW(XB, RB); - SUB(WA, RA, RB); - ComputeRC(WA, crf); + SUB(XA, XA, XB); + STR(INDEX_UNSIGNED, XA, X29, PPCSTATE_OFF(cr_val[0]) + (sizeof(PowerPC::ppcState.cr_val[0]) * crf)); - gpr.Unlock(WA); + gpr.Unlock(WA, WB); } void JitArm64::cmpl(UGeckoInstruction inst) From 6791a808b6777a14e4b0ae5ab43248138f04aec1 Mon Sep 17 00:00:00 2001 From: Ryan Houdek Date: Sun, 25 Jan 2015 03:16:57 -0600 Subject: [PATCH 2/5] [AArch64] Fix addzex. --- Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp index be74f76f90..44da8822a8 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp @@ -659,11 +659,13 @@ void JitArm64::addzex(UGeckoInstruction inst) gpr.BindToRegister(d, d == a); ARM64Reg WA = gpr.GetReg(); LDRB(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(xer_ca)); - CMP(WA, 1); - CSINC(gpr.R(d), gpr.R(a), gpr.R(a), CC_NEQ); + CMP(WA, 0); + CSINC(gpr.R(d), gpr.R(a), gpr.R(a), CC_EQ); CMP(gpr.R(d), 0); gpr.Unlock(WA); ComputeCarry(); + if (inst.Rc) + ComputeRC(gpr.R(d), 0); } void JitArm64::subfx(UGeckoInstruction inst) From 2483f7b35901f772ecd8da8301ecc18ce3aec9b4 Mon Sep 17 00:00:00 2001 From: Ryan Houdek Date: Sun, 25 Jan 2015 03:17:14 -0600 Subject: [PATCH 3/5] [AArch64] Fix addcx. --- Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp index 44da8822a8..09ecdcec12 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp @@ -702,7 +702,7 @@ void JitArm64::addcx(UGeckoInstruction inst) if (gpr.IsImm(a) && gpr.IsImm(b)) { u32 i = gpr.GetImm(a), j = gpr.GetImm(b); - gpr.SetImmediate(d, i * j); + gpr.SetImmediate(d, i + j); bool has_carry = Interpreter::Helper_Carry(i, j); ComputeCarry(has_carry); From e4e08b059605f1fa5f92cc2eb8b73c29c10fa093 Mon Sep 17 00:00:00 2001 From: Ryan Houdek Date: Sun, 25 Jan 2015 03:17:46 -0600 Subject: [PATCH 4/5] [AArch64] Make sure to load the full destination register on float instructions. With the float instructions that only affect the lower 64bits of the destination register, we need to make sure to load the full 128bit register. This ensures that we aren't saving garbage in to the top 64bits. --- .../PowerPC/JitArm64/JitArm64_FloatingPoint.cpp | 17 +---------------- 1 file changed, 1 insertion(+), 16 deletions(-) diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp index 305f30d217..ef2637503f 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp @@ -22,7 +22,6 @@ void JitArm64::fabsx(UGeckoInstruction inst) JITDISABLE(bJITFloatingPointOff); FALLBACK_IF(inst.Rc); - fpr.BindToRegister(inst.FD, inst.FD == inst.FB); ARM64Reg VB = fpr.R(inst.FB); ARM64Reg VD = fpr.R(inst.FD); ARM64Reg V0 = fpr.GetReg(); @@ -54,7 +53,6 @@ void JitArm64::faddx(UGeckoInstruction inst) JITDISABLE(bJITFloatingPointOff); FALLBACK_IF(inst.Rc); - fpr.BindToRegister(inst.FD, inst.FD == inst.FA || inst.FD == inst.FB); ARM64Reg VA = fpr.R(inst.FA); ARM64Reg VB = fpr.R(inst.FB); ARM64Reg VD = fpr.R(inst.FD); @@ -94,7 +92,6 @@ void JitArm64::fmaddx(UGeckoInstruction inst) FALLBACK_IF(inst.Rc); u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD; - fpr.BindToRegister(d, d == a || d == b || d == c); ARM64Reg VA = fpr.R(a); ARM64Reg VB = fpr.R(b); @@ -114,7 +111,6 @@ void JitArm64::fmrx(UGeckoInstruction inst) JITDISABLE(bJITFloatingPointOff); FALLBACK_IF(inst.Rc); - fpr.BindToRegister(inst.FD, inst.FD == inst.FB); ARM64Reg VB = fpr.R(inst.FB); ARM64Reg VD = fpr.R(inst.FD); @@ -149,7 +145,6 @@ void JitArm64::fmsubx(UGeckoInstruction inst) FALLBACK_IF(inst.Rc); u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD; - fpr.BindToRegister(d, d == a || d == b || d == c); ARM64Reg VA = fpr.R(a); ARM64Reg VB = fpr.R(b); @@ -184,7 +179,6 @@ void JitArm64::fmulx(UGeckoInstruction inst) JITDISABLE(bJITFloatingPointOff); FALLBACK_IF(inst.Rc); - fpr.BindToRegister(inst.FD, inst.FD == inst.FA || inst.FD == inst.FC); ARM64Reg VA = fpr.R(inst.FA); ARM64Reg VC = fpr.R(inst.FC); ARM64Reg VD = fpr.R(inst.FD); @@ -202,7 +196,6 @@ void JitArm64::fnabsx(UGeckoInstruction inst) JITDISABLE(bJITFloatingPointOff); FALLBACK_IF(inst.Rc); - fpr.BindToRegister(inst.FD, inst.FD == inst.FB); ARM64Reg VB = fpr.R(inst.FB); ARM64Reg VD = fpr.R(inst.FD); ARM64Reg V0 = fpr.GetReg(); @@ -220,7 +213,6 @@ void JitArm64::fnegx(UGeckoInstruction inst) JITDISABLE(bJITFloatingPointOff); FALLBACK_IF(inst.Rc); - fpr.BindToRegister(inst.FD, inst.FD == inst.FB); ARM64Reg VB = fpr.R(inst.FB); ARM64Reg VD = fpr.R(inst.FD); ARM64Reg V0 = fpr.GetReg(); @@ -260,7 +252,6 @@ void JitArm64::fnmaddx(UGeckoInstruction inst) FALLBACK_IF(inst.Rc); u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD; - fpr.BindToRegister(d, d == a || d == b || d == c); ARM64Reg VA = fpr.R(a); ARM64Reg VB = fpr.R(b); @@ -304,7 +295,6 @@ void JitArm64::fnmsubx(UGeckoInstruction inst) FALLBACK_IF(inst.Rc); u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD; - fpr.BindToRegister(d, d == a || d == b || d == c); ARM64Reg VA = fpr.R(a); ARM64Reg VB = fpr.R(b); @@ -324,16 +314,12 @@ void JitArm64::fselx(UGeckoInstruction inst) INSTRUCTION_START JITDISABLE(bJITFloatingPointOff); FALLBACK_IF(inst.Rc); - fpr.BindToRegister(inst.FD, - inst.FD == inst.FA || - inst.FD == inst.FB || - inst.FD == inst.FC); ARM64Reg V0 = fpr.GetReg(); ARM64Reg VD = fpr.R(inst.FD); ARM64Reg VA = fpr.R(inst.FA); ARM64Reg VB = fpr.R(inst.FB); - ARM64Reg VC = gpr.R(inst.FC); + ARM64Reg VC = fpr.R(inst.FC); m_float_emit.FCMPE(VA); m_float_emit.FCSEL(V0, VC, VB, CC_GE); @@ -363,7 +349,6 @@ void JitArm64::fsubx(UGeckoInstruction inst) JITDISABLE(bJITFloatingPointOff); FALLBACK_IF(inst.Rc); - fpr.BindToRegister(inst.FD, inst.FD == inst.FA || inst.FD == inst.FB); ARM64Reg VA = fpr.R(inst.FA); ARM64Reg VB = fpr.R(inst.FB); ARM64Reg VD = fpr.R(inst.FD); From f24c466e7ec78f8eec9f06f1749e52b2bf5ca24f Mon Sep 17 00:00:00 2001 From: Ryan Houdek Date: Sun, 25 Jan 2015 03:19:36 -0600 Subject: [PATCH 5/5] [AArch64] Fix AArch64 instruction encoding. --- Source/Core/Common/Arm64Emitter.cpp | 6 ++++-- .../Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp | 4 ++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/Source/Core/Common/Arm64Emitter.cpp b/Source/Core/Common/Arm64Emitter.cpp index 5801e5ea57..8812bceb7e 100644 --- a/Source/Core/Common/Arm64Emitter.cpp +++ b/Source/Core/Common/Arm64Emitter.cpp @@ -1754,7 +1754,8 @@ void ARM64FloatEmitter::EmitConversion(bool sf, bool S, u32 type, u32 rmode, u32 void ARM64FloatEmitter::EmitCompare(bool M, bool S, u32 op, u32 opcode2, ARM64Reg Rn, ARM64Reg Rm) { - bool is_double = !IsSingle(Rn); + _assert_msg_(DYNA_REC, IsQuad(Rn), "%s doesn't support vector!", __FUNCTION__); + bool is_double = IsDouble(Rn); Rn = DecodeReg(Rn); Rm = DecodeReg(Rm); @@ -1765,7 +1766,8 @@ void ARM64FloatEmitter::EmitCompare(bool M, bool S, u32 op, u32 opcode2, ARM64Re void ARM64FloatEmitter::EmitCondSelect(bool M, bool S, CCFlags cond, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) { - bool is_double = !IsSingle(Rd); + _assert_msg_(DYNA_REC, IsQuad(Rd), "%s doesn't support vector!", __FUNCTION__); + bool is_double = IsDouble(Rd); Rd = DecodeReg(Rd); Rn = DecodeReg(Rn); diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp index ef2637503f..e423791cde 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp @@ -321,8 +321,8 @@ void JitArm64::fselx(UGeckoInstruction inst) ARM64Reg VB = fpr.R(inst.FB); ARM64Reg VC = fpr.R(inst.FC); - m_float_emit.FCMPE(VA); - m_float_emit.FCSEL(V0, VC, VB, CC_GE); + m_float_emit.FCMPE(EncodeRegToDouble(VA)); + m_float_emit.FCSEL(EncodeRegToDouble(V0), EncodeRegToDouble(VC), EncodeRegToDouble(VB), CC_GE); m_float_emit.INS(64, VD, 0, V0, 0); fpr.Unlock(V0);