diff --git a/Source/Core/Core/Src/PowerPC/JitArm32/Jit.h b/Source/Core/Core/Src/PowerPC/JitArm32/Jit.h index b2034c7c13..e6160004ab 100644 --- a/Source/Core/Core/Src/PowerPC/JitArm32/Jit.h +++ b/Source/Core/Core/Src/PowerPC/JitArm32/Jit.h @@ -220,6 +220,7 @@ public: void ps_sum0(UGeckoInstruction _inst); void ps_sum1(UGeckoInstruction _inst); void ps_madd(UGeckoInstruction _inst); + void ps_msub(UGeckoInstruction _inst); void ps_madds0(UGeckoInstruction _inst); void ps_madds1(UGeckoInstruction _inst); void ps_sub(UGeckoInstruction _inst); diff --git a/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_Paired.cpp b/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_Paired.cpp index 9f8c90b7df..6e3c898c7f 100644 --- a/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_Paired.cpp +++ b/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_Paired.cpp @@ -69,15 +69,42 @@ void JitArm::ps_madd(UGeckoInstruction inst) ARMReg V0 = fpr.GetReg(); ARMReg V1 = fpr.GetReg(); - VMOV(V0, vB0); - VMOV(V1, vB1); + VMUL(V0, vA0, vC0); + VMUL(V1, vA1, vC1); + VADD(vD0, V0, vB0); + VADD(vD1, V1, vB1); + + fpr.Unlock(V0); + fpr.Unlock(V1); +} + +void JitArm::ps_msub(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(bJITPairedOff) - VMLA(V0, vA0, vC0); - VMLA(V1, vA1, vC1); + u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD; - VMOV(vD0, V0); - VMOV(vD1, V1); + if (inst.Rc) { + Default(inst); return; + } + ARMReg vA0 = fpr.R0(a); + ARMReg vA1 = fpr.R1(a); + ARMReg vB0 = fpr.R0(b); + ARMReg vB1 = fpr.R1(b); + ARMReg vC0 = fpr.R0(c); + ARMReg vC1 = fpr.R1(c); + ARMReg vD0 = fpr.R0(d, false); + ARMReg vD1 = fpr.R1(d, false); + ARMReg V0 = fpr.GetReg(); + ARMReg V1 = fpr.GetReg(); + + VMUL(V0, vA0, vC0); + VMUL(V1, vA1, vC1); + VSUB(vD0, V0, vB0); + VSUB(vD1, V1, vB1); + fpr.Unlock(V0); fpr.Unlock(V1); } @@ -102,15 +129,12 @@ void JitArm::ps_madds0(UGeckoInstruction inst) ARMReg V0 = fpr.GetReg(); ARMReg V1 = fpr.GetReg(); - - VMOV(V0, vB0); - VMOV(V1, vB1); - VMLA(V0, vA0, vC0); - VMLA(V1, vA1, vC0); + VMUL(V0, vA0, vC0); + VMUL(V1, vA1, vC0); - VMOV(vD0, V0); - VMOV(vD1, V1); + VADD(vD0, V0, vB0); + VADD(vD1, V1, vB1); fpr.Unlock(V0); fpr.Unlock(V1); @@ -137,14 +161,10 @@ void JitArm::ps_madds1(UGeckoInstruction inst) ARMReg V0 = fpr.GetReg(); ARMReg V1 = fpr.GetReg(); - VMOV(V0, vB0); - VMOV(V1, vB1); - - VMLA(V0, vA0, vC1); - VMLA(V1, vA1, vC1); - - VMOV(vD0, V0); - VMOV(vD1, V1); + VMUL(V0, vA0, vC1); + VMUL(V1, vA1, vC1); + VADD(vD0, V0, vB0); + VADD(vD1, V1, vB1); fpr.Unlock(V0); fpr.Unlock(V1); diff --git a/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_Tables.cpp b/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_Tables.cpp index d37a0206e0..a8e34ebdf6 100644 --- a/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_Tables.cpp +++ b/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_Tables.cpp @@ -156,7 +156,7 @@ static GekkoOPTemplate table4_2[] = {24, &JitArm::Default}, //"ps_res", OPTYPE_PS, 0}}, {25, &JitArm::ps_mul}, //"ps_mul", OPTYPE_PS, 0}}, {26, &JitArm::Default}, //"ps_rsqrte", OPTYPE_PS, 0, 1}}, - {28, &JitArm::Default}, //"ps_msub", OPTYPE_PS, 0}}, + {28, &JitArm::ps_msub}, //"ps_msub", OPTYPE_PS, 0}}, {29, &JitArm::ps_madd}, //"ps_madd", OPTYPE_PS, 0}}, {30, &JitArm::Default}, //"ps_nmsub", OPTYPE_PS, 0}}, {31, &JitArm::Default}, //"ps_nmadd", OPTYPE_PS, 0}},