From cd069fdce1bd7a30f99f3bed9dfe0af60cd562d8 Mon Sep 17 00:00:00 2001 From: Tillmann Karras Date: Thu, 24 Oct 2013 22:05:53 +0200 Subject: [PATCH] Interpreter: software-based flush-to-zero bDAZ is now called bFlushToZero to better reflect what it's actually used for. I decided not to support any hardware-based flush-to-zero on systems that don't support this for both inputs _and_ outputs. It makes the code cleaner and the intersection of CPUs that support SSE2 but not DAZ should be very small. --- Source/Core/Common/Src/CPUDetect.h | 5 +++- Source/Core/Common/Src/MathUtil.h | 4 +-- Source/Core/Common/Src/x64CPUDetect.cpp | 8 ++++- Source/Core/Common/Src/x64FPURoundMode.cpp | 2 +- .../PowerPC/Interpreter/Interpreter_FPUtils.h | 29 ++++++++----------- 5 files changed, 26 insertions(+), 22 deletions(-) diff --git a/Source/Core/Common/Src/CPUDetect.h b/Source/Core/Common/Src/CPUDetect.h index eab62d3d19..967be0949b 100644 --- a/Source/Core/Common/Src/CPUDetect.h +++ b/Source/Core/Common/Src/CPUDetect.h @@ -45,7 +45,10 @@ struct CPUInfo bool bAES; // FXSAVE/FXRSTOR bool bFXSR; - bool bDAZ; + // This flag indicates that the hardware supports some mode + // in which denormal inputs _and_ outputs are automatically set to (signed) zero. + // TODO: ARM + bool bFlushToZero; bool bLAHFSAHF64; bool bLongMode; diff --git a/Source/Core/Common/Src/MathUtil.h b/Source/Core/Common/Src/MathUtil.h index 31772c3c60..f085c6ed2b 100644 --- a/Source/Core/Common/Src/MathUtil.h +++ b/Source/Core/Common/Src/MathUtil.h @@ -64,10 +64,10 @@ inline float FlushToZero(float f) return x.f; } -inline double FlushToZeroAsFloat(double d) +inline double FlushToZero(double d) { IntDouble x; x.d = d; - if ((x.i & DOUBLE_EXP) < 0x3800000000000000ULL) + if ((x.i & DOUBLE_EXP) == 0) x.i &= DOUBLE_SIGN; // turn into signed zero return x.d; } diff --git a/Source/Core/Common/Src/x64CPUDetect.cpp b/Source/Core/Common/Src/x64CPUDetect.cpp index 182cca5224..d6f36eb142 100644 --- a/Source/Core/Common/Src/x64CPUDetect.cpp +++ b/Source/Core/Common/Src/x64CPUDetect.cpp @@ -162,6 +162,7 @@ void CPUInfo::Detect() if ((cpu_id[2] >> 20) & 1) bSSE4_2 = true; if ((cpu_id[2] >> 25) & 1) bAES = true; + // To check DAZ support, we first need to check FXSAVE support. if ((cpu_id[3] >> 24) & 1) { // We can use FXSAVE. @@ -181,7 +182,12 @@ void CPUInfo::Detect() // lowest byte of MXCSR_MASK if ((fx_state[0x1C] >> 6) & 1) - bDAZ = true; + { + // On x86, the FTZ field (supported since SSE1) only flushes denormal _outputs_ to zero, + // now that we checked DAZ support (flushing denormal _inputs_ to zero), + // we can set our generic flag. + bFlushToZero = true; + } } // AVX support requires 3 separate checks: diff --git a/Source/Core/Common/Src/x64FPURoundMode.cpp b/Source/Core/Common/Src/x64FPURoundMode.cpp index a8b0d16809..f46c6000eb 100644 --- a/Source/Core/Common/Src/x64FPURoundMode.cpp +++ b/Source/Core/Common/Src/x64FPURoundMode.cpp @@ -103,7 +103,7 @@ namespace FPURoundMode }; if (nonIEEEMode) { - csr |= denormalLUT[cpu_info.bDAZ]; + csr |= denormalLUT[cpu_info.bFlushToZero]; } _mm_setcsr(csr); } diff --git a/Source/Core/Core/Src/PowerPC/Interpreter/Interpreter_FPUtils.h b/Source/Core/Core/Src/PowerPC/Interpreter/Interpreter_FPUtils.h index d379bf7049..9190a18ed7 100644 --- a/Source/Core/Core/Src/PowerPC/Interpreter/Interpreter_FPUtils.h +++ b/Source/Core/Core/Src/PowerPC/Interpreter/Interpreter_FPUtils.h @@ -5,6 +5,7 @@ #ifndef _INTERPRETER_FPUTILS_H #define _INTERPRETER_FPUTILS_H +#include "CPUDetect.h" #include "Interpreter.h" #include "MathUtil.h" @@ -69,28 +70,22 @@ inline void UpdateFPSCR() inline double ForceSingle(double _x) { - //if (FPSCR.RN != 0) - // PanicAlert("RN = %d at %x", (int)FPSCR.RN, PC); - if (FPSCR.NI) - _x = FlushToZeroAsFloat(_x); - - double x = static_cast(_x); - + // convert to float... + float x = _x; + if (!cpu_info.bFlushToZero && FPSCR.NI) + { + x = FlushToZero(x); + } + // ...and back to double: return x; } inline double ForceDouble(double d) { - //if (FPSCR.RN != 0) - // PanicAlert("RN = %d at %x", (int)FPSCR.RN, PC); - - //if (FPSCR.NI) - //{ - // IntDouble x; x.d = d; - //if ((x.i & DOUBLE_EXP) == 0) - // x.i &= DOUBLE_SIGN; // turn into signed zero - // return x.d; - //} + if (!cpu_info.bFlushToZero && FPSCR.NI) + { + d = FlushToZero(d); + } return d; }