From 466a7afde336d381e250d51e4c268c769e072a02 Mon Sep 17 00:00:00 2001 From: Tillmann Karras Date: Thu, 24 Oct 2013 13:52:22 +0200 Subject: [PATCH] Interpreter: support non-IEEE mode emulation v2: fix fxsave on visual studio, thx @ rodolfo for this patch --- Source/Core/Common/Src/CPUDetect.h | 3 ++ Source/Core/Common/Src/FPURoundMode.h | 2 +- .../Core/Common/Src/GenericFPURoundMode.cpp | 2 +- Source/Core/Common/Src/x64CPUDetect.cpp | 22 +++++++++++++ Source/Core/Common/Src/x64FPURoundMode.cpp | 32 ++++++++++++++----- .../Interpreter_SystemRegisters.cpp | 11 ++----- 6 files changed, 53 insertions(+), 19 deletions(-) diff --git a/Source/Core/Common/Src/CPUDetect.h b/Source/Core/Common/Src/CPUDetect.h index e93a902d63..eab62d3d19 100644 --- a/Source/Core/Common/Src/CPUDetect.h +++ b/Source/Core/Common/Src/CPUDetect.h @@ -43,6 +43,9 @@ struct CPUInfo bool bAVX; bool bFMA; bool bAES; + // FXSAVE/FXRSTOR + bool bFXSR; + bool bDAZ; bool bLAHFSAHF64; bool bLongMode; diff --git a/Source/Core/Common/Src/FPURoundMode.h b/Source/Core/Common/Src/FPURoundMode.h index fad4d5d6aa..c552ad7ff0 100644 --- a/Source/Core/Common/Src/FPURoundMode.h +++ b/Source/Core/Common/Src/FPURoundMode.h @@ -36,7 +36,7 @@ namespace FPURoundMode void SetPrecisionMode(u32 mode); - void SetSIMDMode(u32 mode); + void SetSIMDMode(u32 roundingMode, u32 nonIEEEMode); /* * There are two different flavors of float to int conversion: diff --git a/Source/Core/Common/Src/GenericFPURoundMode.cpp b/Source/Core/Common/Src/GenericFPURoundMode.cpp index cc878291a1..c8e70a4990 100644 --- a/Source/Core/Common/Src/GenericFPURoundMode.cpp +++ b/Source/Core/Common/Src/GenericFPURoundMode.cpp @@ -26,7 +26,7 @@ namespace FPURoundMode void SetPrecisionMode(u32 mode) { } - void SetSIMDMode(u32 mode) + void SetSIMDMode(u32 mode, u32 nonIEEEMode) { } void SaveSIMDState() diff --git a/Source/Core/Common/Src/x64CPUDetect.cpp b/Source/Core/Common/Src/x64CPUDetect.cpp index 2b434ad2b6..182cca5224 100644 --- a/Source/Core/Common/Src/x64CPUDetect.cpp +++ b/Source/Core/Common/Src/x64CPUDetect.cpp @@ -162,6 +162,28 @@ void CPUInfo::Detect() if ((cpu_id[2] >> 20) & 1) bSSE4_2 = true; if ((cpu_id[2] >> 25) & 1) bAES = true; + if ((cpu_id[3] >> 24) & 1) + { + // We can use FXSAVE. + bFXSR = true; + + GC_ALIGNED16(u8 fx_state[512]); + memset(fx_state, 0, sizeof(fx_state)); +#ifdef _WIN32 +#ifdef _M_IX86 + _fxsave(fx_state); +#elif defined (_M_X64) + _fxsave64(fx_state); +#endif +#else + __asm__("fxsave %0" : "=m" (fx_state)); +#endif + + // lowest byte of MXCSR_MASK + if ((fx_state[0x1C] >> 6) & 1) + bDAZ = true; + } + // AVX support requires 3 separate checks: // - Is the AVX bit set in CPUID? // - Is the XSAVE bit set in CPUID? diff --git a/Source/Core/Common/Src/x64FPURoundMode.cpp b/Source/Core/Common/Src/x64FPURoundMode.cpp index 2c950ade96..a8b0d16809 100644 --- a/Source/Core/Common/Src/x64FPURoundMode.cpp +++ b/Source/Core/Common/Src/x64FPURoundMode.cpp @@ -4,6 +4,7 @@ #include "Common.h" #include "FPURoundMode.h" +#include "CPUDetect.h" #ifndef _WIN32 static const unsigned short FPU_ROUND_NEAR = 0 << 10; @@ -14,8 +15,11 @@ static const unsigned short FPU_ROUND_MASK = 3 << 10; #include #endif -const u32 MASKS = 0x1F80; // mask away the interrupts. +// OR-mask for disabling FPU exceptions (bits 7-12 in the MXCSR register) +const u32 EXCEPTION_MASK = 0x1F80; +// Denormals-Are-Zero (non-IEEE mode: denormal inputs are set to +/- 0) const u32 DAZ = 0x40; +// Flush-To-Zero (non-IEEE mode: denormal outputs are set to +/- 0) const u32 FTZ = 0x8000; namespace FPURoundMode @@ -79,16 +83,28 @@ namespace FPURoundMode //but still - set any useful sse options here #endif } - void SetSIMDMode(u32 mode) + + void SetSIMDMode(u32 roundingMode, u32 nonIEEEMode) { - static const u32 ssetable[4] = + // lookup table for FPSCR.RN-to-MXCSR.RC translation + static const u32 roundingModeLUT[4] = { - (0 << 13) | MASKS, - (3 << 13) | MASKS, - (2 << 13) | MASKS, - (1 << 13) | MASKS, + (0 << 13) | EXCEPTION_MASK, // nearest + (3 << 13) | EXCEPTION_MASK, // -inf + (2 << 13) | EXCEPTION_MASK, // +inf + (1 << 13) | EXCEPTION_MASK, // zero }; - u32 csr = ssetable[mode]; + u32 csr = roundingModeLUT[roundingMode]; + + static const u32 denormalLUT[2] = + { + FTZ, // flush-to-zero only + FTZ | DAZ, // flush-to-zero and denormals-are-zero (may not be supported) + }; + if (nonIEEEMode) + { + csr |= denormalLUT[cpu_info.bDAZ]; + } _mm_setcsr(csr); } diff --git a/Source/Core/Core/Src/PowerPC/Interpreter/Interpreter_SystemRegisters.cpp b/Source/Core/Core/Src/PowerPC/Interpreter/Interpreter_SystemRegisters.cpp index 688d166608..475f7591ce 100644 --- a/Source/Core/Core/Src/PowerPC/Interpreter/Interpreter_SystemRegisters.cpp +++ b/Source/Core/Core/Src/PowerPC/Interpreter/Interpreter_SystemRegisters.cpp @@ -48,15 +48,8 @@ static void FPSCRtoFPUSettings(UReg_FPSCR fp) // Pokemon Colosseum does this. Gah. } - // Also corresponding SSE rounding mode setting - if (FPSCR.NI) - { - // Either one of these two breaks Beyond Good & Evil. - // if (cpu_info.bSSSE3) - // csr |= DAZ; - // csr |= FTZ; - } - FPURoundMode::SetSIMDMode(FPSCR.RN); + // Set SSE rounding mode and denormal handling + FPURoundMode::SetSIMDMode(FPSCR.RN, FPSCR.NI); } void Interpreter::mtfsb0x(UGeckoInstruction _inst)