From 709b64d310b5808506cd242eea4baaad363f0ce0 Mon Sep 17 00:00:00 2001 From: JosJuice Date: Thu, 26 Dec 2024 12:01:03 +0100 Subject: [PATCH] JitArm64: Fix frsqrte misclassifying large negative inputs as NaN The frsqrte routine has a little trick where it uses `TBNZ(ARM64Reg::X1, 62)` to check if the input is NaN/Inf. This only works if we've checked that the input isn't normal. While we have checked that the input isn't a positive normal at this point, it could still be a negative normal. Because of this, the frsqrte routine would incorrectly treat certain negative normal inputs as NaNs, causing it to output the wrong exception in FPSCR (while still producing the correct numerical value). To fix the problem, I'm swapping the order of the lines `FixupBranch nan_or_inf = TBNZ(ARM64Reg::X1, 62);` and `FixupBranch negative = TBNZ(ARM64Reg::X1, 63);`, and then instead of having the `nan_or_inf` case do a special check for negative infinity, I'm making the `negative` case do a special check for NaN. The total instruction count is the same. --- Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp b/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp index 56c26739a3..de1e22424c 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp @@ -363,8 +363,8 @@ void JitArm64::GenerateFrsqrte() RET(); SetJumpTarget(not_positive_normal_not_zero); - FixupBranch nan_or_inf = TBNZ(ARM64Reg::X1, 62); FixupBranch negative = TBNZ(ARM64Reg::X1, 63); + TBNZ(ARM64Reg::X1, 62, done); // Branch to done if NaN // "Normalize" denormal values. // The simplified calculation used here results in the upper 11 bits being incorrect, @@ -375,12 +375,11 @@ void JitArm64::GenerateFrsqrte() BFI(ARM64Reg::X1, ARM64Reg::X3, 52, 12); B(positive_normal); - SetJumpTarget(nan_or_inf); + SetJumpTarget(negative); MOVI2R(ARM64Reg::X2, std::bit_cast(-std::numeric_limits::infinity())); CMP(ARM64Reg::X1, ARM64Reg::X2); - B(CCFlags::CC_NEQ, done); + B(CCFlags::CC_HI, done); // Branch to done if NaN - SetJumpTarget(negative); TBNZ(ARM64Reg::W3, 9, done); ORRI2R(ARM64Reg::W3, ARM64Reg::W3, FPSCR_FX | FPSCR_VXSQRT, ARM64Reg::W2); B(store_fpscr);