mirror of
https://github.com/dolphin-emu/dolphin.git
synced 2025-01-25 07:21:14 +01:00
Merge pull request #11322 from JosJuice/jit64-madds-nan
Jit64: Correctly handle NaNs for ps_maddsX
This commit is contained in:
commit
b23eb1f550
@ -350,11 +350,19 @@ void Jit64::fmaddXX(UGeckoInstruction inst)
|
|||||||
inst.OPCD == 4 || (!cpu_info.bAtom && !software_fma && single && js.op->fprIsDuplicated[a] &&
|
inst.OPCD == 4 || (!cpu_info.bAtom && !software_fma && single && js.op->fprIsDuplicated[a] &&
|
||||||
js.op->fprIsDuplicated[b] && js.op->fprIsDuplicated[c]);
|
js.op->fprIsDuplicated[b] && js.op->fprIsDuplicated[c]);
|
||||||
|
|
||||||
|
const bool subtract = inst.SUBOP5 == 28 || inst.SUBOP5 == 30; // msub, nmsub
|
||||||
|
const bool negate = inst.SUBOP5 == 30 || inst.SUBOP5 == 31; // nmsub, nmadd
|
||||||
|
const bool madds0 = inst.SUBOP5 == 14;
|
||||||
|
const bool madds1 = inst.SUBOP5 == 15;
|
||||||
|
const bool madds_accurate_nans = m_accurate_nans && (madds0 || madds1);
|
||||||
|
|
||||||
RCOpArg Ra;
|
RCOpArg Ra;
|
||||||
RCOpArg Rb;
|
RCOpArg Rb;
|
||||||
RCOpArg Rc;
|
RCOpArg Rc;
|
||||||
RCX64Reg Rd;
|
RCX64Reg Rd;
|
||||||
RCX64Reg scratch_guard;
|
RCX64Reg scratch_guard;
|
||||||
|
RCX64Reg Rc_duplicated_guard;
|
||||||
|
X64Reg Rc_duplicated = XMM2;
|
||||||
if (software_fma)
|
if (software_fma)
|
||||||
{
|
{
|
||||||
scratch_guard = fpr.Scratch(XMM2);
|
scratch_guard = fpr.Scratch(XMM2);
|
||||||
@ -374,12 +382,14 @@ void Jit64::fmaddXX(UGeckoInstruction inst)
|
|||||||
Rc = fpr.Use(c, RCMode::Read);
|
Rc = fpr.Use(c, RCMode::Read);
|
||||||
Rd = fpr.Bind(d, single ? RCMode::Write : RCMode::ReadWrite);
|
Rd = fpr.Bind(d, single ? RCMode::Write : RCMode::ReadWrite);
|
||||||
RegCache::Realize(Ra, Rb, Rc, Rd);
|
RegCache::Realize(Ra, Rb, Rc, Rd);
|
||||||
}
|
|
||||||
|
|
||||||
const bool subtract = inst.SUBOP5 == 28 || inst.SUBOP5 == 30; // msub, nmsub
|
if (madds_accurate_nans)
|
||||||
const bool negate = inst.SUBOP5 == 30 || inst.SUBOP5 == 31; // nmsub, nmadd
|
{
|
||||||
const bool madds0 = inst.SUBOP5 == 14;
|
Rc_duplicated_guard = fpr.Scratch();
|
||||||
const bool madds1 = inst.SUBOP5 == 15;
|
RegCache::Realize(Rc_duplicated_guard);
|
||||||
|
Rc_duplicated = Rc_duplicated_guard;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
X64Reg scratch_xmm = XMM0;
|
X64Reg scratch_xmm = XMM0;
|
||||||
X64Reg result_xmm = XMM1;
|
X64Reg result_xmm = XMM1;
|
||||||
@ -435,18 +445,30 @@ void Jit64::fmaddXX(UGeckoInstruction inst)
|
|||||||
{
|
{
|
||||||
result_xmm = XMM0;
|
result_xmm = XMM0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (madds_accurate_nans)
|
||||||
|
{
|
||||||
|
if (madds0)
|
||||||
|
MOVDDUP(Rc_duplicated, Rc);
|
||||||
|
else
|
||||||
|
avx_op(&XEmitter::VSHUFPD, &XEmitter::SHUFPD, Rc_duplicated, Rc, Rc, 3);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
if (madds0)
|
if (madds0)
|
||||||
{
|
{
|
||||||
MOVDDUP(result_xmm, Rc);
|
MOVDDUP(result_xmm, Rc);
|
||||||
|
if (madds_accurate_nans)
|
||||||
|
MOVAPD(R(Rc_duplicated), result_xmm);
|
||||||
if (round_input)
|
if (round_input)
|
||||||
Force25BitPrecision(result_xmm, R(result_xmm), scratch_xmm);
|
Force25BitPrecision(result_xmm, R(result_xmm), scratch_xmm);
|
||||||
}
|
}
|
||||||
else if (madds1)
|
else if (madds1)
|
||||||
{
|
{
|
||||||
avx_op(&XEmitter::VSHUFPD, &XEmitter::SHUFPD, result_xmm, Rc, Rc, 3);
|
avx_op(&XEmitter::VSHUFPD, &XEmitter::SHUFPD, result_xmm, Rc, Rc, 3);
|
||||||
|
if (madds_accurate_nans)
|
||||||
|
MOVAPD(R(Rc_duplicated), result_xmm);
|
||||||
if (round_input)
|
if (round_input)
|
||||||
Force25BitPrecision(result_xmm, R(result_xmm), scratch_xmm);
|
Force25BitPrecision(result_xmm, R(result_xmm), scratch_xmm);
|
||||||
}
|
}
|
||||||
@ -510,7 +532,8 @@ void Jit64::fmaddXX(UGeckoInstruction inst)
|
|||||||
result_xmm = Rd;
|
result_xmm = Rd;
|
||||||
}
|
}
|
||||||
|
|
||||||
HandleNaNs(inst, result_xmm, XMM0, Ra, Rb, Rc);
|
// If packed, the clobber register must be XMM0. If not packed, the clobber register is unused.
|
||||||
|
HandleNaNs(inst, result_xmm, XMM0, Ra, Rb, madds_accurate_nans ? R(Rc_duplicated) : Rc);
|
||||||
|
|
||||||
if (single)
|
if (single)
|
||||||
FinalizeSingleResult(Rd, R(result_xmm), packed, true);
|
FinalizeSingleResult(Rd, R(result_xmm), packed, true);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user