Merge pull request #10202 from merryhime/fctiwx

JitArm64: Implement fctiwx
This commit is contained in:
Mai M 2021-11-07 00:05:51 -04:00 committed by GitHub
commit 58f8c6e529
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 40 additions and 13 deletions

View File

@ -2913,6 +2913,10 @@ void ARM64FloatEmitter::FSQRT(ARM64Reg Rd, ARM64Reg Rn)
{ {
EmitScalar1Source(0, 0, IsDouble(Rd), 3, Rd, Rn); EmitScalar1Source(0, 0, IsDouble(Rd), 3, Rd, Rn);
} }
void ARM64FloatEmitter::FRINTI(ARM64Reg Rd, ARM64Reg Rn)
{
EmitScalar1Source(0, 0, IsDouble(Rd), 15, Rd, Rn);
}
void ARM64FloatEmitter::FRECPE(ARM64Reg Rd, ARM64Reg Rn) void ARM64FloatEmitter::FRECPE(ARM64Reg Rd, ARM64Reg Rn)
{ {

View File

@ -1230,6 +1230,7 @@ public:
void FABS(ARM64Reg Rd, ARM64Reg Rn); void FABS(ARM64Reg Rd, ARM64Reg Rn);
void FNEG(ARM64Reg Rd, ARM64Reg Rn); void FNEG(ARM64Reg Rd, ARM64Reg Rn);
void FSQRT(ARM64Reg Rd, ARM64Reg Rn); void FSQRT(ARM64Reg Rd, ARM64Reg Rn);
void FRINTI(ARM64Reg Rd, ARM64Reg Rn);
void FMOV(ARM64Reg Rd, ARM64Reg Rn, bool top = false); // Also generalized move between GPR/FP void FMOV(ARM64Reg Rd, ARM64Reg Rn, bool top = false); // Also generalized move between GPR/FP
void FRECPE(ARM64Reg Rd, ARM64Reg Rn); void FRECPE(ARM64Reg Rd, ARM64Reg Rn);
void FRSQRTE(ARM64Reg Rd, ARM64Reg Rn); void FRSQRTE(ARM64Reg Rd, ARM64Reg Rn);

View File

@ -144,7 +144,7 @@ public:
void fselx(UGeckoInstruction inst); void fselx(UGeckoInstruction inst);
void fcmpX(UGeckoInstruction inst); void fcmpX(UGeckoInstruction inst);
void frspx(UGeckoInstruction inst); void frspx(UGeckoInstruction inst);
void fctiwzx(UGeckoInstruction inst); void fctiwx(UGeckoInstruction inst);
void fresx(UGeckoInstruction inst); void fresx(UGeckoInstruction inst);
void frsqrtex(UGeckoInstruction inst); void frsqrtex(UGeckoInstruction inst);

View File

@ -507,7 +507,7 @@ void JitArm64::fcmpX(UGeckoInstruction inst)
FloatCompare(inst); FloatCompare(inst);
} }
void JitArm64::fctiwzx(UGeckoInstruction inst) void JitArm64::fctiwx(UGeckoInstruction inst)
{ {
INSTRUCTION_START INSTRUCTION_START
JITDISABLE(bJITFloatingPointOff); JITDISABLE(bJITFloatingPointOff);
@ -518,19 +518,32 @@ void JitArm64::fctiwzx(UGeckoInstruction inst)
const u32 d = inst.FD; const u32 d = inst.FD;
const bool single = fpr.IsSingle(b, true); const bool single = fpr.IsSingle(b, true);
const bool is_fctiwzx = inst.SUBOP10 == 15;
const ARM64Reg VB = fpr.R(b, single ? RegType::LowerPairSingle : RegType::LowerPair); const ARM64Reg VB = fpr.R(b, single ? RegType::LowerPairSingle : RegType::LowerPair);
const ARM64Reg VD = fpr.RW(d, RegType::LowerPair); const ARM64Reg VD = fpr.RW(d, RegType::LowerPair);
// TODO: The upper 32 bits of the result are set to 0xfff80000, except for -0.0 where should be
// set to 0xfff80001 (TODO).
if (single) if (single)
{ {
const ARM64Reg V0 = fpr.GetReg(); const ARM64Reg V0 = fpr.GetReg();
if (is_fctiwzx)
{
m_float_emit.FCVTS(EncodeRegToSingle(VD), EncodeRegToSingle(VB), RoundingMode::Z);
}
else
{
m_float_emit.FRINTI(EncodeRegToSingle(VD), EncodeRegToSingle(VB));
m_float_emit.FCVTS(EncodeRegToSingle(VD), EncodeRegToSingle(VD), RoundingMode::Z);
}
// Generate 0xFFF8'0000'0000'0000ULL // Generate 0xFFF8'0000'0000'0000ULL
m_float_emit.MOVI(64, EncodeRegToDouble(V0), 0xFFFF'0000'0000'0000ULL); m_float_emit.MOVI(64, EncodeRegToDouble(V0), 0xFFFF'0000'0000'0000ULL);
m_float_emit.BIC(16, EncodeRegToDouble(V0), 0x7); m_float_emit.BIC(16, EncodeRegToDouble(V0), 0x7);
m_float_emit.FCVTS(EncodeRegToSingle(VD), EncodeRegToSingle(VB), RoundingMode::Z);
m_float_emit.ORR(EncodeRegToDouble(VD), EncodeRegToDouble(VD), EncodeRegToDouble(V0)); m_float_emit.ORR(EncodeRegToDouble(VD), EncodeRegToDouble(VD), EncodeRegToDouble(V0));
fpr.Unlock(V0); fpr.Unlock(V0);
@ -539,7 +552,16 @@ void JitArm64::fctiwzx(UGeckoInstruction inst)
{ {
const ARM64Reg WA = gpr.GetReg(); const ARM64Reg WA = gpr.GetReg();
m_float_emit.FCVTS(WA, EncodeRegToDouble(VB), RoundingMode::Z); if (is_fctiwzx)
{
m_float_emit.FCVTS(WA, EncodeRegToDouble(VB), RoundingMode::Z);
}
else
{
m_float_emit.FRINTI(EncodeRegToDouble(VD), EncodeRegToDouble(VB));
m_float_emit.FCVTS(WA, EncodeRegToDouble(VD), RoundingMode::Z);
}
ORR(EncodeRegTo64(WA), EncodeRegTo64(WA), LogicalImm(0xFFF8'0000'0000'0000ULL, 64)); ORR(EncodeRegTo64(WA), EncodeRegTo64(WA), LogicalImm(0xFFF8'0000'0000'0000ULL, 64));
m_float_emit.FMOV(EncodeRegToDouble(VD), EncodeRegTo64(WA)); m_float_emit.FMOV(EncodeRegToDouble(VD), EncodeRegTo64(WA));

View File

@ -304,15 +304,15 @@ constexpr std::array<GekkoOPTemplate, 9> table59{{
}}; }};
constexpr std::array<GekkoOPTemplate, 15> table63{{ constexpr std::array<GekkoOPTemplate, 15> table63{{
{264, &JitArm64::fp_logic}, // fabsx {264, &JitArm64::fp_logic}, // fabsx
{32, &JitArm64::fcmpX}, // fcmpo {32, &JitArm64::fcmpX}, // fcmpo
{0, &JitArm64::fcmpX}, // fcmpu {0, &JitArm64::fcmpX}, // fcmpu
{14, &JitArm64::FallBackToInterpreter}, // fctiwx {14, &JitArm64::fctiwx}, // fctiwx
{15, &JitArm64::fctiwzx}, // fctiwzx {15, &JitArm64::fctiwx}, // fctiwzx
{72, &JitArm64::fp_logic}, // fmrx {72, &JitArm64::fp_logic}, // fmrx
{136, &JitArm64::fp_logic}, // fnabsx {136, &JitArm64::fp_logic}, // fnabsx
{40, &JitArm64::fp_logic}, // fnegx {40, &JitArm64::fp_logic}, // fnegx
{12, &JitArm64::frspx}, // frspx {12, &JitArm64::frspx}, // frspx
{64, &JitArm64::mcrfs}, // mcrfs {64, &JitArm64::mcrfs}, // mcrfs
{583, &JitArm64::mffsx}, // mffsx {583, &JitArm64::mffsx}, // mffsx