From cd069fdce1bd7a30f99f3bed9dfe0af60cd562d8 Mon Sep 17 00:00:00 2001
From: Tillmann Karras <tilkax@gmail.com>
Date: Thu, 24 Oct 2013 22:05:53 +0200
Subject: [PATCH] Interpreter: software-based flush-to-zero

bDAZ is now called bFlushToZero to better reflect what it's actually
used for.

I decided not to support any hardware-based flush-to-zero on systems
that don't support this for both inputs _and_ outputs. It makes the code
cleaner and the intersection of CPUs that support SSE2 but not DAZ
should be very small.
---
 Source/Core/Common/Src/CPUDetect.h            |  5 +++-
 Source/Core/Common/Src/MathUtil.h             |  4 +--
 Source/Core/Common/Src/x64CPUDetect.cpp       |  8 ++++-
 Source/Core/Common/Src/x64FPURoundMode.cpp    |  2 +-
 .../PowerPC/Interpreter/Interpreter_FPUtils.h | 29 ++++++++-----------
 5 files changed, 26 insertions(+), 22 deletions(-)
diff --git a/Source/Core/Common/Src/CPUDetect.h b/Source/Core/Common/Src/CPUDetect.h
index eab62d3d19..967be0949b 100644
--- a/Source/Core/Common/Src/CPUDetect.h
+++ b/Source/Core/Common/Src/CPUDetect.h
@@ -45,7 +45,10 @@ struct CPUInfo
 	bool bAES;
 	// FXSAVE/FXRSTOR
 	bool bFXSR;
-	bool bDAZ;
+	// This flag indicates that the hardware supports some mode
+	// in which denormal inputs _and_ outputs are automatically set to (signed) zero.
+	// TODO: ARM
+	bool bFlushToZero;
 	bool bLAHFSAHF64;
 	bool bLongMode;
 
diff --git a/Source/Core/Common/Src/MathUtil.h b/Source/Core/Common/Src/MathUtil.h
index 31772c3c60..f085c6ed2b 100644
--- a/Source/Core/Common/Src/MathUtil.h
+++ b/Source/Core/Common/Src/MathUtil.h
@@ -64,10 +64,10 @@ inline float FlushToZero(float f)
 	return x.f;
 }
 
-inline double FlushToZeroAsFloat(double d)
+inline double FlushToZero(double d)
 {
 	IntDouble x; x.d = d;
-	if ((x.i & DOUBLE_EXP) < 0x3800000000000000ULL)
+	if ((x.i & DOUBLE_EXP) == 0)
 		x.i &= DOUBLE_SIGN;  // turn into signed zero
 	return x.d;
 }
diff --git a/Source/Core/Common/Src/x64CPUDetect.cpp b/Source/Core/Common/Src/x64CPUDetect.cpp
index 182cca5224..d6f36eb142 100644
--- a/Source/Core/Common/Src/x64CPUDetect.cpp
+++ b/Source/Core/Common/Src/x64CPUDetect.cpp
@@ -162,6 +162,7 @@ void CPUInfo::Detect()
 		if ((cpu_id[2] >> 20) & 1) bSSE4_2 = true;
 		if ((cpu_id[2] >> 25) & 1) bAES = true;
 
+		// To check DAZ support, we first need to check FXSAVE support.
 		if ((cpu_id[3] >> 24) & 1)
 		{
 			// We can use FXSAVE.
@@ -181,7 +182,12 @@ void CPUInfo::Detect()
 
 			// lowest byte of MXCSR_MASK
 			if ((fx_state[0x1C] >> 6) & 1)
-				bDAZ = true;
+			{
+				// On x86, the FTZ field (supported since SSE1) only flushes denormal _outputs_ to zero,
+				// now that we checked DAZ support (flushing denormal _inputs_ to zero),
+				// we can set our generic flag.
+				bFlushToZero = true;
+			}
 		}
 
 		// AVX support requires 3 separate checks:
diff --git a/Source/Core/Common/Src/x64FPURoundMode.cpp b/Source/Core/Common/Src/x64FPURoundMode.cpp
index a8b0d16809..f46c6000eb 100644
--- a/Source/Core/Common/Src/x64FPURoundMode.cpp
+++ b/Source/Core/Common/Src/x64FPURoundMode.cpp
@@ -103,7 +103,7 @@ namespace FPURoundMode
 		};
 		if (nonIEEEMode)
 		{
-			csr |= denormalLUT[cpu_info.bDAZ];
+			csr |= denormalLUT[cpu_info.bFlushToZero];
 		}
 		_mm_setcsr(csr);
 	}
diff --git a/Source/Core/Core/Src/PowerPC/Interpreter/Interpreter_FPUtils.h b/Source/Core/Core/Src/PowerPC/Interpreter/Interpreter_FPUtils.h
index d379bf7049..9190a18ed7 100644
--- a/Source/Core/Core/Src/PowerPC/Interpreter/Interpreter_FPUtils.h
+++ b/Source/Core/Core/Src/PowerPC/Interpreter/Interpreter_FPUtils.h
@@ -5,6 +5,7 @@
 #ifndef _INTERPRETER_FPUTILS_H
 #define _INTERPRETER_FPUTILS_H
 
+#include "CPUDetect.h"
 #include "Interpreter.h"
 #include "MathUtil.h"
 
@@ -69,28 +70,22 @@ inline void UpdateFPSCR()
 
 inline double ForceSingle(double _x)
 {
-	//if (FPSCR.RN != 0)
-	//	PanicAlert("RN = %d at %x", (int)FPSCR.RN, PC);
-	if (FPSCR.NI)
-		_x = FlushToZeroAsFloat(_x);
-
-	double x = static_cast<float>(_x);
-
+	// convert to float...
+	float x = _x;
+	if (!cpu_info.bFlushToZero && FPSCR.NI)
+	{
+		x = FlushToZero(x);
+	}
+	// ...and back to double:
 	return x;
 }
 
 inline double ForceDouble(double d)
 {
-	//if (FPSCR.RN != 0)
-	//	PanicAlert("RN = %d at %x", (int)FPSCR.RN, PC);
-
-	//if (FPSCR.NI)
-	//{
-	//	IntDouble x; x.d = d;
-		//if ((x.i & DOUBLE_EXP) == 0)
-		//	x.i &= DOUBLE_SIGN;  // turn into signed zero
-	//	return x.d;
-	//}
+	if (!cpu_info.bFlushToZero && FPSCR.NI)
+	{
+		d = FlushToZero(d);
+	}
 	return d;
 }