2013-04-17 23:09:55 -04:00
|
|
|
// Copyright 2013 Dolphin Emulator Project
|
|
|
|
// Licensed under GPLv2
|
|
|
|
// Refer to the license.txt file included.
|
2013-02-26 13:49:00 -06:00
|
|
|
|
2014-03-28 19:06:12 +01:00
|
|
|
#include <cfenv>
|
|
|
|
|
2014-02-17 05:18:15 -05:00
|
|
|
#include "Common/Common.h"
|
|
|
|
#include "Common/CPUDetect.h"
|
2014-02-23 14:57:31 +01:00
|
|
|
#include "Common/FPURoundMode.h"
|
2013-02-26 13:49:00 -06:00
|
|
|
|
2014-02-23 14:57:31 +01:00
|
|
|
#ifdef _WIN32
|
|
|
|
# include <mmintrin.h>
|
|
|
|
#else
|
|
|
|
# include <xmmintrin.h>
|
2013-02-26 13:49:00 -06:00
|
|
|
#endif
|
|
|
|
|
|
|
|
namespace FPURoundMode
|
|
|
|
{
|
|
|
|
// Get the default SSE states here.
|
|
|
|
static u32 saved_sse_state = _mm_getcsr();
|
|
|
|
static const u32 default_sse_state = _mm_getcsr();
|
|
|
|
|
2014-03-09 15:21:50 +01:00
|
|
|
void SetRoundMode(int mode)
|
2013-02-26 13:49:00 -06:00
|
|
|
{
|
2014-03-28 19:06:12 +01:00
|
|
|
// Convert PowerPC to native rounding mode.
|
|
|
|
const int rounding_mode_lut[] = {
|
|
|
|
FE_TONEAREST,
|
|
|
|
FE_TOWARDZERO,
|
|
|
|
FE_UPWARD,
|
|
|
|
FE_DOWNWARD
|
|
|
|
};
|
|
|
|
fesetround(rounding_mode_lut[mode]);
|
2013-02-26 13:49:00 -06:00
|
|
|
}
|
|
|
|
|
2014-03-09 15:21:50 +01:00
|
|
|
void SetPrecisionMode(PrecisionMode mode)
|
2013-03-19 21:51:12 -04:00
|
|
|
{
|
2013-02-26 13:49:00 -06:00
|
|
|
#ifdef _WIN32
|
|
|
|
_control87(_PC_53, MCW_PC);
|
|
|
|
#else
|
2014-02-23 14:57:31 +01:00
|
|
|
const unsigned short PRECISION_MASK = 3 << 8;
|
|
|
|
const unsigned short precision_table[] = {
|
|
|
|
0 << 8, // 24 bits
|
|
|
|
2 << 8, // 53 bits
|
|
|
|
3 << 8, // 64 bits
|
2013-02-26 13:49:00 -06:00
|
|
|
};
|
2014-04-02 01:49:56 +02:00
|
|
|
unsigned short cw;
|
|
|
|
asm ("fnstcw %0" : "=m" (cw));
|
|
|
|
cw = (cw & ~PRECISION_MASK) | precision_table[mode];
|
|
|
|
asm ("fldcw %0" : : "m" (cw));
|
2013-02-26 13:49:00 -06:00
|
|
|
#endif
|
|
|
|
}
|
2013-10-24 13:52:22 +02:00
|
|
|
|
2014-03-09 15:21:50 +01:00
|
|
|
void SetSIMDMode(int rounding_mode, bool non_ieee_mode)
|
2013-02-26 13:49:00 -06:00
|
|
|
{
|
2014-02-23 14:57:31 +01:00
|
|
|
// OR-mask for disabling FPU exceptions (bits 7-12 in the MXCSR register)
|
|
|
|
const u32 EXCEPTION_MASK = 0x1F80;
|
|
|
|
// Denormals-Are-Zero (non-IEEE mode: denormal inputs are set to +/- 0)
|
|
|
|
const u32 DAZ = 0x40;
|
|
|
|
// Flush-To-Zero (non-IEEE mode: denormal outputs are set to +/- 0)
|
|
|
|
const u32 FTZ = 0x8000;
|
2013-10-24 13:52:22 +02:00
|
|
|
// lookup table for FPSCR.RN-to-MXCSR.RC translation
|
2014-02-23 14:57:31 +01:00
|
|
|
static const u32 simd_rounding_table[] =
|
2013-02-26 13:49:00 -06:00
|
|
|
{
|
2013-10-24 13:52:22 +02:00
|
|
|
(0 << 13) | EXCEPTION_MASK, // nearest
|
|
|
|
(3 << 13) | EXCEPTION_MASK, // -inf
|
|
|
|
(2 << 13) | EXCEPTION_MASK, // +inf
|
|
|
|
(1 << 13) | EXCEPTION_MASK, // zero
|
2013-02-26 13:49:00 -06:00
|
|
|
};
|
2014-02-23 14:57:31 +01:00
|
|
|
u32 csr = simd_rounding_table[rounding_mode];
|
2013-10-24 13:52:22 +02:00
|
|
|
|
2014-02-23 14:57:31 +01:00
|
|
|
// Some initial steppings of Pentium 4 CPUs support FTZ but not DAZ.
|
|
|
|
// They will not flush input operands but flushing outputs only is better than nothing.
|
2013-10-24 13:52:22 +02:00
|
|
|
static const u32 denormalLUT[2] =
|
|
|
|
{
|
|
|
|
FTZ, // flush-to-zero only
|
|
|
|
FTZ | DAZ, // flush-to-zero and denormals-are-zero (may not be supported)
|
|
|
|
};
|
2014-02-23 14:57:31 +01:00
|
|
|
if (non_ieee_mode)
|
2013-10-24 13:52:22 +02:00
|
|
|
{
|
2013-10-24 22:05:53 +02:00
|
|
|
csr |= denormalLUT[cpu_info.bFlushToZero];
|
2013-10-24 13:52:22 +02:00
|
|
|
}
|
2013-02-26 13:49:00 -06:00
|
|
|
_mm_setcsr(csr);
|
|
|
|
}
|
2013-03-19 21:51:12 -04:00
|
|
|
|
2013-02-26 13:49:00 -06:00
|
|
|
void SaveSIMDState()
|
|
|
|
{
|
|
|
|
saved_sse_state = _mm_getcsr();
|
|
|
|
}
|
|
|
|
void LoadSIMDState()
|
|
|
|
{
|
|
|
|
_mm_setcsr(saved_sse_state);
|
|
|
|
}
|
|
|
|
void LoadDefaultSIMDState()
|
|
|
|
{
|
|
|
|
_mm_setcsr(default_sse_state);
|
|
|
|
}
|
|
|
|
}
|