mirror of
https://github.com/dolphin-emu/dolphin.git
synced 2025-01-08 15:20:45 +01:00
JitArm64: Fix frsqrte misclassifying large negative inputs as NaN
The frsqrte routine has a little trick where it uses `TBNZ(ARM64Reg::X1, 62)` to check if the input is NaN/Inf. This only works if we've checked that the input isn't normal. While we have checked that the input isn't a positive normal at this point, it could still be a negative normal. Because of this, the frsqrte routine would incorrectly treat certain negative normal inputs as NaNs, causing it to output the wrong exception in FPSCR (while still producing the correct numerical value). To fix the problem, I'm swapping the order of the lines `FixupBranch nan_or_inf = TBNZ(ARM64Reg::X1, 62);` and `FixupBranch negative = TBNZ(ARM64Reg::X1, 63);`, and then instead of having the `nan_or_inf` case do a special check for negative infinity, I'm making the `negative` case do a special check for NaN. The total instruction count is the same.
This commit is contained in:
parent
1a8619c85b
commit
709b64d310
@ -363,8 +363,8 @@ void JitArm64::GenerateFrsqrte()
|
||||
RET();
|
||||
|
||||
SetJumpTarget(not_positive_normal_not_zero);
|
||||
FixupBranch nan_or_inf = TBNZ(ARM64Reg::X1, 62);
|
||||
FixupBranch negative = TBNZ(ARM64Reg::X1, 63);
|
||||
TBNZ(ARM64Reg::X1, 62, done); // Branch to done if NaN
|
||||
|
||||
// "Normalize" denormal values.
|
||||
// The simplified calculation used here results in the upper 11 bits being incorrect,
|
||||
@ -375,12 +375,11 @@ void JitArm64::GenerateFrsqrte()
|
||||
BFI(ARM64Reg::X1, ARM64Reg::X3, 52, 12);
|
||||
B(positive_normal);
|
||||
|
||||
SetJumpTarget(nan_or_inf);
|
||||
SetJumpTarget(negative);
|
||||
MOVI2R(ARM64Reg::X2, std::bit_cast<u64>(-std::numeric_limits<double>::infinity()));
|
||||
CMP(ARM64Reg::X1, ARM64Reg::X2);
|
||||
B(CCFlags::CC_NEQ, done);
|
||||
B(CCFlags::CC_HI, done); // Branch to done if NaN
|
||||
|
||||
SetJumpTarget(negative);
|
||||
TBNZ(ARM64Reg::W3, 9, done);
|
||||
ORRI2R(ARM64Reg::W3, ARM64Reg::W3, FPSCR_FX | FPSCR_VXSQRT, ARM64Reg::W2);
|
||||
B(store_fpscr);
|
||||
|
Loading…
Reference in New Issue
Block a user