From c512ae13f3de0eb0c6316a42090120604b3cd28c Mon Sep 17 00:00:00 2001 From: Geotale Date: Wed, 2 Oct 2024 01:32:40 -0500 Subject: [PATCH] Update JITs and Tests Assume NI Set For Unit Tests This does *not* match x86-64, which properly handles any weird values using a function call This should hopefully pass tests though, which is important before fixing that issue I had forgotten that the JITs would use the same modified base and pair tables ^^; Also fixes call for complex inputs in x86 This saves an instruction on both x86-64 and ARM64!! TODO: Due to fixes with interpreter, ARM64 JIT likely doesn't match x86 JIT which calls a fallback on weird inputs --- .../Core/Core/PowerPC/Jit64Common/Jit64AsmCommon.cpp | 10 +++++----- Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp | 5 ++--- Source/UnitTests/Core/PowerPC/JitArm64/Fres.cpp | 6 +++++- 3 files changed, 12 insertions(+), 9 deletions(-) diff --git a/Source/Core/Core/PowerPC/Jit64Common/Jit64AsmCommon.cpp b/Source/Core/Core/PowerPC/Jit64Common/Jit64AsmCommon.cpp index 94e7f99423..4a68fb5d1f 100644 --- a/Source/Core/Core/PowerPC/Jit64Common/Jit64AsmCommon.cpp +++ b/Source/Core/Core/PowerPC/Jit64Common/Jit64AsmCommon.cpp @@ -254,18 +254,17 @@ void CommonAsmRoutines::GenFres() IMUL(32, RSCRATCH, MComplex(RSCRATCH_EXTRA, RSCRATCH2, SCALE_8, offsetof(Common::BaseAndDec, m_dec))); - ADD(32, R(RSCRATCH), Imm8(1)); - SHR(32, R(RSCRATCH), Imm8(1)); MOV(32, R(RSCRATCH2), MComplex(RSCRATCH_EXTRA, RSCRATCH2, SCALE_8, offsetof(Common::BaseAndDec, m_base))); - SUB(32, R(RSCRATCH2), R(RSCRATCH)); + ADD(32, R(RSCRATCH2), R(RSCRATCH)); + SHR(32, R(RSCRATCH2), Imm8(1)); SHL(64, R(RSCRATCH2), Imm8(29)); POP(RSCRATCH_EXTRA); - OR(64, R(RSCRATCH2), R(RSCRATCH_EXTRA)); // vali |= (s64)(fres_expected_base[i / 1024] - - // (fres_expected_dec[i / 1024] * (i % 1024) + 1) / 2) + OR(64, R(RSCRATCH2), R(RSCRATCH_EXTRA)); // vali |= (s64)((u64)(fres_expected_base[i / 1024] + + // (fres_expected_dec[i / 1024] * (i % 1024)) / 2)) // << 29 MOVQ_xmm(XMM0, R(RSCRATCH2)); RET(); @@ -279,6 +278,7 @@ void CommonAsmRoutines::GenFres() SetJumpTarget(complex); ABI_PushRegistersAndAdjustStack(QUANTIZED_REGS_TO_SAVE, 8); + LEA(64, ABI_PARAM1, PPCSTATE(fpscr)); ABI_CallFunction(Common::ApproximateReciprocal); ABI_PopRegistersAndAdjustStack(QUANTIZED_REGS_TO_SAVE, 8); RET(); diff --git a/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp b/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp index 56c26739a3..4ad97606e9 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp @@ -292,11 +292,10 @@ void JitArm64::GenerateFres() ADD(ARM64Reg::X2, ARM64Reg::X3, ARM64Reg::X2, ArithOption(ARM64Reg::X2, ShiftType::LSL, 3)); UBFX(ARM64Reg::X1, ARM64Reg::X1, 37, 10); // Grab lower part of mantissa LDP(IndexType::Signed, ARM64Reg::W2, ARM64Reg::W3, ARM64Reg::X2, 0); - MOVI2R(ARM64Reg::W4, 1); - MADD(ARM64Reg::W1, ARM64Reg::W3, ARM64Reg::W1, ARM64Reg::W4); - SUB(ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W1, ArithOption(ARM64Reg::W1, ShiftType::LSR, 1)); + MADD(ARM64Reg::W1, ARM64Reg::W3, ARM64Reg::W1, ARM64Reg::W2); AND(ARM64Reg::X0, ARM64Reg::X0, LogicalImm(Common::DOUBLE_SIGN | Common::DOUBLE_EXP, GPRSize::B64)); + LSR(ARM64Reg::W1, ARM64Reg::W1, 1); ORR(ARM64Reg::X0, ARM64Reg::X0, ARM64Reg::X1, ArithOption(ARM64Reg::X1, ShiftType::LSL, 29)); RET(); diff --git a/Source/UnitTests/Core/PowerPC/JitArm64/Fres.cpp b/Source/UnitTests/Core/PowerPC/JitArm64/Fres.cpp index fcfc8577a2..749bbc152e 100644 --- a/Source/UnitTests/Core/PowerPC/JitArm64/Fres.cpp +++ b/Source/UnitTests/Core/PowerPC/JitArm64/Fres.cpp @@ -40,6 +40,7 @@ public: MOV(ARM64Reg::X1, ARM64Reg::X0); m_float_emit.FMOV(ARM64Reg::D0, ARM64Reg::X0); m_float_emit.FRECPE(ARM64Reg::D0, ARM64Reg::D0); + m_float_emit.FMOV(ARM64Reg::X0, ARM64Reg::D0); BL(raw_fres); MOV(ARM64Reg::X30, ARM64Reg::X15); MOV(PPC_REG, ARM64Reg::X14); @@ -58,11 +59,14 @@ TEST(JitArm64, Fres) TestFres test(Core::System::GetInstance()); + // FPSCR with NI set + const UReg_FPSCR fpscr = UReg_FPSCR(0x00000004); + for (const u64 ivalue : double_test_values) { const double dvalue = std::bit_cast(ivalue); - const u64 expected = std::bit_cast(Common::ApproximateReciprocal(dvalue)); + const u64 expected = std::bit_cast(Common::ApproximateReciprocal(fpscr, dvalue)); const u64 actual = test.fres(ivalue); if (expected != actual)