From 1b425dedd1264965a6f7145c158a5727279493a8 Mon Sep 17 00:00:00 2001
From: Fiora <fioraaeterna@gmail.com>
Date: Thu, 18 Sep 2014 03:57:24 -0700
Subject: [PATCH] JIT: add ps_cmp0/ps_cmp1 implementations using current fcmp
 code

---
 Source/Core/Core/PowerPC/Jit64/Jit.h          |  2 +
 .../Core/Core/PowerPC/Jit64/Jit64_Tables.cpp  |  8 ++--
 .../Core/PowerPC/Jit64/Jit_FloatingPoint.cpp  | 44 +++++++++++++------
 Source/Core/Core/PowerPC/Jit64/Jit_Paired.cpp |  9 ++++
 4 files changed, 45 insertions(+), 18 deletions(-)

diff --git a/Source/Core/Core/PowerPC/Jit64/Jit.h b/Source/Core/Core/PowerPC/Jit64/Jit.h
index 0391d258cc..6faf302422 100644
--- a/Source/Core/Core/PowerPC/Jit64/Jit.h
+++ b/Source/Core/Core/PowerPC/Jit64/Jit.h
@@ -142,6 +142,7 @@ public:
 	typedef u32 (*Operation)(u32 a, u32 b);
 	void regimmop(int d, int a, bool binary, u32 value, Operation doop, void (Gen::XEmitter::*op)(int, const Gen::OpArg&, const Gen::OpArg&), bool Rc = false, bool carry = false);
 	void fp_tri_op(int d, int a, int b, bool reversible, bool single, void (Gen::XEmitter::*op)(Gen::X64Reg, Gen::OpArg), UGeckoInstruction inst, bool roundRHS = false);
+	void FloatCompare(UGeckoInstruction inst, bool upper = false);
 
 	// OPCODES
 	void unknown_instruction(UGeckoInstruction _inst);
@@ -199,6 +200,7 @@ public:
 	void ps_maddXX(UGeckoInstruction inst);
 	void ps_sum(UGeckoInstruction inst);
 	void ps_muls(UGeckoInstruction inst);
+	void ps_cmpXX(UGeckoInstruction inst);
 
 	void fp_arith(UGeckoInstruction inst);
 
diff --git a/Source/Core/Core/PowerPC/Jit64/Jit64_Tables.cpp b/Source/Core/Core/PowerPC/Jit64/Jit64_Tables.cpp
index 0007915089..4437c67e47 100644
--- a/Source/Core/Core/PowerPC/Jit64/Jit64_Tables.cpp
+++ b/Source/Core/Core/PowerPC/Jit64/Jit64_Tables.cpp
@@ -110,14 +110,14 @@ static GekkoOPTemplate primarytable[] =
 
 static GekkoOPTemplate table4[] =
 {    //SUBOP10
-	{0,    &Jit64::FallBackToInterpreter}, //"ps_cmpu0",   OPTYPE_PS, FL_SET_CRn}},
-	{32,   &Jit64::FallBackToInterpreter}, //"ps_cmpo0",   OPTYPE_PS, FL_SET_CRn}},
+	{0,    &Jit64::ps_cmpXX},              //"ps_cmpu0",   OPTYPE_PS, FL_SET_CRn}},
+	{32,   &Jit64::ps_cmpXX},              //"ps_cmpo0",   OPTYPE_PS, FL_SET_CRn}},
 	{40,   &Jit64::ps_sign},               //"ps_neg",     OPTYPE_PS, FL_RC_BIT}},
 	{136,  &Jit64::ps_sign},               //"ps_nabs",    OPTYPE_PS, FL_RC_BIT}},
 	{264,  &Jit64::ps_sign},               //"ps_abs",     OPTYPE_PS, FL_RC_BIT}},
-	{64,   &Jit64::FallBackToInterpreter}, //"ps_cmpu1",   OPTYPE_PS, FL_RC_BIT}},
+	{64,   &Jit64::ps_cmpXX},              //"ps_cmpu1",   OPTYPE_PS, FL_RC_BIT}},
 	{72,   &Jit64::ps_mr},                 //"ps_mr",      OPTYPE_PS, FL_RC_BIT}},
-	{96,   &Jit64::FallBackToInterpreter}, //"ps_cmpo1",   OPTYPE_PS, FL_RC_BIT}},
+	{96,   &Jit64::ps_cmpXX},              //"ps_cmpo1",   OPTYPE_PS, FL_RC_BIT}},
 	{528,  &Jit64::ps_mergeXX},            //"ps_merge00", OPTYPE_PS, FL_RC_BIT}},
 	{560,  &Jit64::ps_mergeXX},            //"ps_merge01", OPTYPE_PS, FL_RC_BIT}},
 	{592,  &Jit64::ps_mergeXX},            //"ps_merge10", OPTYPE_PS, FL_RC_BIT}},
diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_FloatingPoint.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_FloatingPoint.cpp
index a20dd17e89..25bc67a578 100644
--- a/Source/Core/Core/PowerPC/Jit64/Jit_FloatingPoint.cpp
+++ b/Source/Core/Core/PowerPC/Jit64/Jit_FloatingPoint.cpp
@@ -267,25 +267,32 @@ void Jit64::fmrx(UGeckoInstruction inst)
 	fpr.UnlockAll();
 }
 
-void Jit64::fcmpx(UGeckoInstruction inst)
+void Jit64::FloatCompare(UGeckoInstruction inst, bool upper)
 {
-	INSTRUCTION_START
-	JITDISABLE(bJITFloatingPointOff);
-	FALLBACK_IF(jo.fpAccurateFcmp);
-
-	//bool ordered = inst.SUBOP10 == 32;
-	int a   = inst.FA;
-	int b   = inst.FB;
-	int crf = inst.CRFD;
 	bool fprf = SConfig::GetInstance().m_LocalCoreStartupParameter.bEnableFPRF && js.op->wantsFPRF;
+	//bool ordered = !!(inst.SUBOP10 & 32);
+	int a = inst.FA;
+	int b = inst.FB;
+	int crf = inst.CRFD;
 
-	fpr.Lock(a,b);
-	fpr.BindToRegister(b, true);
+	fpr.Lock(a, b);
+	fpr.BindToRegister(b, true, false);
 
 	if (fprf)
 		AND(32, PPCSTATE(fpscr), Imm32(~FPRF_MASK));
-	// Are we masking sNaN invalid floating point exceptions? If not this could crash if we don't handle the exception?
-	UCOMISD(fpr.R(b).GetSimpleReg(), fpr.R(a));
+
+	if (upper)
+	{
+		fpr.BindToRegister(a, true, false);
+		MOVHLPS(XMM0, fpr.RX(a));
+		MOVHLPS(XMM1, fpr.RX(b));
+		UCOMISD(XMM1, R(XMM0));
+	}
+	else
+	{
+		// Are we masking sNaN invalid floating point exceptions? If not this could crash if we don't handle the exception?
+		UCOMISD(fpr.RX(b), fpr.R(a));
+	}
 
 	FixupBranch pNaN, pLesser, pGreater;
 	FixupBranch continue1, continue2, continue3;
@@ -293,7 +300,7 @@ void Jit64::fcmpx(UGeckoInstruction inst)
 	if (a != b)
 	{
 		// if B > A, goto Lesser's jump target
-		pLesser  = J_CC(CC_A);
+		pLesser = J_CC(CC_A);
 	}
 
 	// if (B != B) or (A != A), goto NaN's jump target
@@ -344,6 +351,15 @@ void Jit64::fcmpx(UGeckoInstruction inst)
 	fpr.UnlockAll();
 }
 
+void Jit64::fcmpx(UGeckoInstruction inst)
+{
+	INSTRUCTION_START
+	JITDISABLE(bJITFloatingPointOff);
+	FALLBACK_IF(jo.fpAccurateFcmp);
+
+	FloatCompare(inst);
+}
+
 void Jit64::fctiwx(UGeckoInstruction inst)
 {
 	INSTRUCTION_START
diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_Paired.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_Paired.cpp
index f08fb6b863..f8e4adfec4 100644
--- a/Source/Core/Core/PowerPC/Jit64/Jit_Paired.cpp
+++ b/Source/Core/Core/PowerPC/Jit64/Jit_Paired.cpp
@@ -351,3 +351,12 @@ void Jit64::ps_maddXX(UGeckoInstruction inst)
 	MOVAPD(fpr.RX(d), R(XMM0));
 	fpr.UnlockAll();
 }
+
+void Jit64::ps_cmpXX(UGeckoInstruction inst)
+{
+	INSTRUCTION_START
+	JITDISABLE(bJITFloatingPointOff);
+	FALLBACK_IF(jo.fpAccurateFcmp);
+
+	FloatCompare(inst, !!(inst.SUBOP10 & 64));
+}