Update JITs and Tests

Assume NI Set For Unit Tests

This does *not* match x86-64, which properly handles any weird values using a function call
This should hopefully pass tests though, which is important before fixing that issue

I had forgotten that the JITs would use the same modified base and pair tables ^^;
Also fixes call for complex inputs in x86
This saves an instruction on both x86-64 and ARM64!!
TODO: Due to fixes with interpreter, ARM64 JIT likely doesn't match x86 JIT which calls a fallback on weird inputs
This commit is contained in:
Geotale 2024-10-02 01:32:40 -05:00
parent f0e6a1363f
commit c512ae13f3
3 changed files with 12 additions and 9 deletions

View File

@ -254,18 +254,17 @@ void CommonAsmRoutines::GenFres()
IMUL(32, RSCRATCH,
MComplex(RSCRATCH_EXTRA, RSCRATCH2, SCALE_8, offsetof(Common::BaseAndDec, m_dec)));
ADD(32, R(RSCRATCH), Imm8(1));
SHR(32, R(RSCRATCH), Imm8(1));
MOV(32, R(RSCRATCH2),
MComplex(RSCRATCH_EXTRA, RSCRATCH2, SCALE_8, offsetof(Common::BaseAndDec, m_base)));
SUB(32, R(RSCRATCH2), R(RSCRATCH));
ADD(32, R(RSCRATCH2), R(RSCRATCH));
SHR(32, R(RSCRATCH2), Imm8(1));
SHL(64, R(RSCRATCH2), Imm8(29));
POP(RSCRATCH_EXTRA);
OR(64, R(RSCRATCH2), R(RSCRATCH_EXTRA)); // vali |= (s64)(fres_expected_base[i / 1024] -
// (fres_expected_dec[i / 1024] * (i % 1024) + 1) / 2)
OR(64, R(RSCRATCH2), R(RSCRATCH_EXTRA)); // vali |= (s64)((u64)(fres_expected_base[i / 1024] +
// (fres_expected_dec[i / 1024] * (i % 1024)) / 2))
// << 29
MOVQ_xmm(XMM0, R(RSCRATCH2));
RET();
@ -279,6 +278,7 @@ void CommonAsmRoutines::GenFres()
SetJumpTarget(complex);
ABI_PushRegistersAndAdjustStack(QUANTIZED_REGS_TO_SAVE, 8);
LEA(64, ABI_PARAM1, PPCSTATE(fpscr));
ABI_CallFunction(Common::ApproximateReciprocal);
ABI_PopRegistersAndAdjustStack(QUANTIZED_REGS_TO_SAVE, 8);
RET();

View File

@ -292,11 +292,10 @@ void JitArm64::GenerateFres()
ADD(ARM64Reg::X2, ARM64Reg::X3, ARM64Reg::X2, ArithOption(ARM64Reg::X2, ShiftType::LSL, 3));
UBFX(ARM64Reg::X1, ARM64Reg::X1, 37, 10); // Grab lower part of mantissa
LDP(IndexType::Signed, ARM64Reg::W2, ARM64Reg::W3, ARM64Reg::X2, 0);
MOVI2R(ARM64Reg::W4, 1);
MADD(ARM64Reg::W1, ARM64Reg::W3, ARM64Reg::W1, ARM64Reg::W4);
SUB(ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W1, ArithOption(ARM64Reg::W1, ShiftType::LSR, 1));
MADD(ARM64Reg::W1, ARM64Reg::W3, ARM64Reg::W1, ARM64Reg::W2);
AND(ARM64Reg::X0, ARM64Reg::X0,
LogicalImm(Common::DOUBLE_SIGN | Common::DOUBLE_EXP, GPRSize::B64));
LSR(ARM64Reg::W1, ARM64Reg::W1, 1);
ORR(ARM64Reg::X0, ARM64Reg::X0, ARM64Reg::X1, ArithOption(ARM64Reg::X1, ShiftType::LSL, 29));
RET();

View File

@ -40,6 +40,7 @@ public:
MOV(ARM64Reg::X1, ARM64Reg::X0);
m_float_emit.FMOV(ARM64Reg::D0, ARM64Reg::X0);
m_float_emit.FRECPE(ARM64Reg::D0, ARM64Reg::D0);
m_float_emit.FMOV(ARM64Reg::X0, ARM64Reg::D0);
BL(raw_fres);
MOV(ARM64Reg::X30, ARM64Reg::X15);
MOV(PPC_REG, ARM64Reg::X14);
@ -58,11 +59,14 @@ TEST(JitArm64, Fres)
TestFres test(Core::System::GetInstance());
// FPSCR with NI set
const UReg_FPSCR fpscr = UReg_FPSCR(0x00000004);
for (const u64 ivalue : double_test_values)
{
const double dvalue = std::bit_cast<double>(ivalue);
const u64 expected = std::bit_cast<u64>(Common::ApproximateReciprocal(dvalue));
const u64 expected = std::bit_cast<u64>(Common::ApproximateReciprocal(fpscr, dvalue));
const u64 actual = test.fres(ivalue);
if (expected != actual)