From 624c92f97e717ba5da05d31d83c8613664b9b0f7 Mon Sep 17 00:00:00 2001
From: Ryan Houdek <Sonicadvance1@Gmail.com>
Date: Wed, 25 Sep 2013 03:00:14 +0000
Subject: [PATCH] [ARM] fresx/fnmaddsx/fselx/frsqrtex/fnmaddx implementations.

---
 Source/Core/Core/Src/PowerPC/JitArm32/Jit.h   |   5 +
 .../PowerPC/JitArm32/JitArm_FloatingPoint.cpp | 143 ++++++++++++++++++
 .../Src/PowerPC/JitArm32/JitArm_Tables.cpp    |  10 +-
 3 files changed, 153 insertions(+), 5 deletions(-)

diff --git a/Source/Core/Core/Src/PowerPC/JitArm32/Jit.h b/Source/Core/Core/Src/PowerPC/JitArm32/Jit.h
index 160a17bc2c..2722317850 100644
--- a/Source/Core/Core/Src/PowerPC/JitArm32/Jit.h
+++ b/Source/Core/Core/Src/PowerPC/JitArm32/Jit.h
@@ -214,6 +214,11 @@ public:
 	void fctiwzx(UGeckoInstruction _inst);
 	void fcmpo(UGeckoInstruction _inst);
 	void fcmpu(UGeckoInstruction _inst);
+	void fnmaddx(UGeckoInstruction _inst);
+	void fnmaddsx(UGeckoInstruction _inst);
+	void fresx(UGeckoInstruction _inst);
+	void fselx(UGeckoInstruction _inst);
+	void frsqrtex(UGeckoInstruction _inst);
 
 	// Floating point loadStore
 	void lfXX(UGeckoInstruction _inst);
diff --git a/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_FloatingPoint.cpp b/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_FloatingPoint.cpp
index eb0501002c..8c600fd588 100644
--- a/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_FloatingPoint.cpp
+++ b/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_FloatingPoint.cpp
@@ -541,3 +541,146 @@ void JitArm::fmaddx(UGeckoInstruction inst)
 
 	if (inst.Rc) Helper_UpdateCR1(vD0);
 }
+
+void JitArm::fnmaddx(UGeckoInstruction inst)
+{
+	INSTRUCTION_START
+	JITDISABLE(bJITFloatingPointOff)
+
+	u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD;
+
+	ARMReg vA0 = fpr.R0(a);
+	ARMReg vB0 = fpr.R0(b);
+	ARMReg vC0 = fpr.R0(c);
+	ARMReg vD0 = fpr.R0(d, false);
+
+	ARMReg V0 = fpr.GetReg();
+
+	VMOV(V0, vB0);
+	
+	VMLA(V0, vA0, vC0);
+
+	VNEG(vD0, V0);
+
+	fpr.Unlock(V0);
+
+	if (inst.Rc) Helper_UpdateCR1(vD0);
+}
+void JitArm::fnmaddsx(UGeckoInstruction inst)
+{
+	INSTRUCTION_START
+	JITDISABLE(bJITFloatingPointOff)
+
+	u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD;
+
+	ARMReg vA0 = fpr.R0(a);
+	ARMReg vB0 = fpr.R0(b);
+	ARMReg vC0 = fpr.R0(c);
+	ARMReg vD0 = fpr.R0(d, false);
+	ARMReg vD1 = fpr.R1(d, false);
+
+	ARMReg V0 = fpr.GetReg();
+
+	VMOV(V0, vB0);
+	
+	VMLA(V0, vA0, vC0);
+
+	VNEG(vD0, V0);
+	VNEG(vD1, V0);
+
+	fpr.Unlock(V0);
+
+	if (inst.Rc) Helper_UpdateCR1(vD0);
+}
+
+// XXX: Messes up Super Mario Sunshine title screen
+void JitArm::fresx(UGeckoInstruction inst)
+{
+	INSTRUCTION_START
+	JITDISABLE(bJITFloatingPointOff)
+
+	u32 b = inst.FB, d = inst.FD;
+
+	Default(inst); return;
+
+	ARMReg vB0 = fpr.R0(b);
+	ARMReg vD0 = fpr.R0(d, false);
+	ARMReg vD1 = fpr.R1(d, false);
+
+	ARMReg V0 = fpr.GetReg();
+	MOVI2R(V0, 1.0, INVALID_REG); // temp reg isn't needed for 1.0
+	
+	VDIV(vD1, V0, vB0);
+	VDIV(vD0, V0, vB0);
+	fpr.Unlock(V0);
+
+	if (inst.Rc) Helper_UpdateCR1(vD0);
+}
+
+void JitArm::fselx(UGeckoInstruction inst)
+{
+	INSTRUCTION_START
+	JITDISABLE(bJITPairedOff)
+	
+	u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD;
+
+	if (inst.Rc) {
+		Default(inst); return;
+	}
+	ARMReg vA0 = fpr.R0(a);
+	ARMReg vB0 = fpr.R0(b);
+	ARMReg vC0 = fpr.R0(c);
+	ARMReg vD0 = fpr.R0(d, false);
+	
+	VCMP(vA0);
+	VMRS(_PC);
+
+	FixupBranch GT0 = B_CC(CC_GE);
+	VMOV(vD0, vB0);
+	FixupBranch EQ0 = B();
+	SetJumpTarget(GT0);
+	VMOV(vD0, vC0);
+	SetJumpTarget(EQ0);
+}
+
+void JitArm::frsqrtex(UGeckoInstruction inst)
+{
+	INSTRUCTION_START
+	JITDISABLE(bJITPairedOff)
+
+	u32 b = inst.FB, d = inst.FD;
+	if (inst.Rc){
+		Default(inst); return;
+	}
+	ARMReg vB0 = fpr.R0(b);
+	ARMReg vD0 = fpr.R0(d, false);
+	ARMReg fpscrReg = gpr.GetReg();
+	ARMReg V0 = D1;
+	ARMReg rA = gpr.GetReg();
+
+	MOVI2R(fpscrReg, (u32)&PPC_NAN);
+	VLDR(V0, fpscrReg, 0);
+	LDR(fpscrReg, R9, PPCSTATE_OFF(fpscr));
+
+	VCMP(vB0);
+	VMRS(_PC);
+	FixupBranch Less0 = B_CC(CC_LT);
+		VMOV(vD0, V0);	
+		SetFPException(fpscrReg, FPSCR_VXSQRT);
+		FixupBranch SkipOrr0 = B();
+	SetJumpTarget(Less0);
+		FixupBranch noException = B_CC(CC_EQ);
+		SetFPException(fpscrReg, FPSCR_ZX);
+		SetJumpTarget(noException);
+	SetJumpTarget(SkipOrr0);
+
+	VCVT(S0, vB0, 0);
+
+	NEONXEmitter nemit(this);
+	nemit.VRSQRTE(F_32, D0, D0);
+	VCVT(vD0, S0, 0);
+
+	STR(fpscrReg, R9, PPCSTATE_OFF(fpscr));
+	gpr.Unlock(fpscrReg, rA);
+}
+
diff --git a/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_Tables.cpp b/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_Tables.cpp
index 848f5338b5..9eea997317 100644
--- a/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_Tables.cpp
+++ b/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_Tables.cpp
@@ -336,12 +336,12 @@ static GekkoOPTemplate table59[] =
 	{20, &JitArm::fsubsx}, //"fsubsx",   OPTYPE_FPU, FL_RC_BIT_F}}, 
 	{21, &JitArm::faddsx}, //"faddsx",   OPTYPE_FPU, FL_RC_BIT_F}}, 
 //	{22, &JitArm::Default}, //"fsqrtsx",  OPTYPE_FPU, FL_RC_BIT_F}}, // Not implemented on gekko
-	{24, &JitArm::Default}, //"fresx",    OPTYPE_FPU, FL_RC_BIT_F}}, 
+	{24, &JitArm::fresx}, //"fresx",    OPTYPE_FPU, FL_RC_BIT_F}}, 
 	{25, &JitArm::fmulsx}, //"fmulsx",   OPTYPE_FPU, FL_RC_BIT_F}}, 
 	{28, &JitArm::Default}, //"fmsubsx",  OPTYPE_FPU, FL_RC_BIT_F}}, 
 	{29, &JitArm::fmaddsx}, //"fmaddsx",  OPTYPE_FPU, FL_RC_BIT_F}}, 
 	{30, &JitArm::Default}, //"fnmsubsx", OPTYPE_FPU, FL_RC_BIT_F}}, 
-	{31, &JitArm::Default}, //"fnmaddsx", OPTYPE_FPU, FL_RC_BIT_F}}, 
+	{31, &JitArm::fnmaddsx}, //"fnmaddsx", OPTYPE_FPU, FL_RC_BIT_F}}, 
 };							    
 
 static GekkoOPTemplate table63[] = 
@@ -370,13 +370,13 @@ static GekkoOPTemplate table63_2[] =
 	{20, &JitArm::fsubx}, //"fsubx",    OPTYPE_FPU, FL_RC_BIT_F}},
 	{21, &JitArm::faddx}, //"faddx",    OPTYPE_FPU, FL_RC_BIT_F}},
 	{22, &JitArm::Default}, //"fsqrtx",   OPTYPE_FPU, FL_RC_BIT_F}},
-	{23, &JitArm::Default}, //"fselx",    OPTYPE_FPU, FL_RC_BIT_F}},
+	{23, &JitArm::fselx}, //"fselx",    OPTYPE_FPU, FL_RC_BIT_F}},
 	{25, &JitArm::fmulx}, //"fmulx",    OPTYPE_FPU, FL_RC_BIT_F}},
-	{26, &JitArm::Default}, //"frsqrtex", OPTYPE_FPU, FL_RC_BIT_F}},
+	{26, &JitArm::frsqrtex}, //"frsqrtex", OPTYPE_FPU, FL_RC_BIT_F}},
 	{28, &JitArm::Default}, //"fmsubx",   OPTYPE_FPU, FL_RC_BIT_F}},
 	{29, &JitArm::fmaddx}, //"fmaddx",   OPTYPE_FPU, FL_RC_BIT_F}},
 	{30, &JitArm::Default}, //"fnmsubx",  OPTYPE_FPU, FL_RC_BIT_F}},
-	{31, &JitArm::Default}, //"fnmaddx",  OPTYPE_FPU, FL_RC_BIT_F}},
+	{31, &JitArm::fnmaddx}, //"fnmaddx",  OPTYPE_FPU, FL_RC_BIT_F}},
 };