JitArm64_FloatingPoint: Implement fctiwx in ARM64 JIT

We implement this by first rounding to nearest integer using the current
rouding mode, then converting this value from floating point to an integral
value.
This commit is contained in:
Merry 2021-11-06 19:16:02 +00:00
parent 7c2b09e156
commit 9c75957319
3 changed files with 35 additions and 13 deletions

View File

@ -144,7 +144,7 @@ public:
void fselx(UGeckoInstruction inst);
void fcmpX(UGeckoInstruction inst);
void frspx(UGeckoInstruction inst);
void fctiwzx(UGeckoInstruction inst);
void fctiwx(UGeckoInstruction inst);
void fresx(UGeckoInstruction inst);
void frsqrtex(UGeckoInstruction inst);

View File

@ -507,7 +507,7 @@ void JitArm64::fcmpX(UGeckoInstruction inst)
FloatCompare(inst);
}
void JitArm64::fctiwzx(UGeckoInstruction inst)
void JitArm64::fctiwx(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITFloatingPointOff);
@ -518,19 +518,32 @@ void JitArm64::fctiwzx(UGeckoInstruction inst)
const u32 d = inst.FD;
const bool single = fpr.IsSingle(b, true);
const bool is_fctiwzx = inst.SUBOP10 == 15;
const ARM64Reg VB = fpr.R(b, single ? RegType::LowerPairSingle : RegType::LowerPair);
const ARM64Reg VD = fpr.RW(d, RegType::LowerPair);
// TODO: The upper 32 bits of the result are set to 0xfff80000, except for -0.0 where should be
// set to 0xfff80001 (TODO).
if (single)
{
const ARM64Reg V0 = fpr.GetReg();
if (is_fctiwzx)
{
m_float_emit.FCVTS(EncodeRegToSingle(VD), EncodeRegToSingle(VB), RoundingMode::Z);
}
else
{
m_float_emit.FRINTI(EncodeRegToSingle(VD), EncodeRegToSingle(VB));
m_float_emit.FCVTS(EncodeRegToSingle(VD), EncodeRegToSingle(VD), RoundingMode::Z);
}
// Generate 0xFFF8'0000'0000'0000ULL
m_float_emit.MOVI(64, EncodeRegToDouble(V0), 0xFFFF'0000'0000'0000ULL);
m_float_emit.BIC(16, EncodeRegToDouble(V0), 0x7);
m_float_emit.FCVTS(EncodeRegToSingle(VD), EncodeRegToSingle(VB), RoundingMode::Z);
m_float_emit.ORR(EncodeRegToDouble(VD), EncodeRegToDouble(VD), EncodeRegToDouble(V0));
fpr.Unlock(V0);
@ -539,7 +552,16 @@ void JitArm64::fctiwzx(UGeckoInstruction inst)
{
const ARM64Reg WA = gpr.GetReg();
m_float_emit.FCVTS(WA, EncodeRegToDouble(VB), RoundingMode::Z);
if (is_fctiwzx)
{
m_float_emit.FCVTS(WA, EncodeRegToDouble(VB), RoundingMode::Z);
}
else
{
m_float_emit.FRINTI(EncodeRegToDouble(VD), EncodeRegToDouble(VB));
m_float_emit.FCVTS(WA, EncodeRegToDouble(VD), RoundingMode::Z);
}
ORR(EncodeRegTo64(WA), EncodeRegTo64(WA), LogicalImm(0xFFF8'0000'0000'0000ULL, 64));
m_float_emit.FMOV(EncodeRegToDouble(VD), EncodeRegTo64(WA));

View File

@ -304,15 +304,15 @@ constexpr std::array<GekkoOPTemplate, 9> table59{{
}};
constexpr std::array<GekkoOPTemplate, 15> table63{{
{264, &JitArm64::fp_logic}, // fabsx
{32, &JitArm64::fcmpX}, // fcmpo
{0, &JitArm64::fcmpX}, // fcmpu
{14, &JitArm64::FallBackToInterpreter}, // fctiwx
{15, &JitArm64::fctiwzx}, // fctiwzx
{72, &JitArm64::fp_logic}, // fmrx
{136, &JitArm64::fp_logic}, // fnabsx
{40, &JitArm64::fp_logic}, // fnegx
{12, &JitArm64::frspx}, // frspx
{264, &JitArm64::fp_logic}, // fabsx
{32, &JitArm64::fcmpX}, // fcmpo
{0, &JitArm64::fcmpX}, // fcmpu
{14, &JitArm64::fctiwx}, // fctiwx
{15, &JitArm64::fctiwx}, // fctiwzx
{72, &JitArm64::fp_logic}, // fmrx
{136, &JitArm64::fp_logic}, // fnabsx
{40, &JitArm64::fp_logic}, // fnegx
{12, &JitArm64::frspx}, // frspx
{64, &JitArm64::mcrfs}, // mcrfs
{583, &JitArm64::mffsx}, // mffsx