mirror of
https://github.com/dolphin-emu/dolphin.git
synced 2025-01-25 07:21:14 +01:00
Jit64: use domain-appropriate instructions
Original commit: commit 62d3423ccf090da95b0caae1652e8fbb248c4b99 Author: Tillmann Karras <tilkax@gmail.com> Date: Wed Nov 18 20:32:20 2015 +0100
This commit is contained in:
parent
cc66f0336f
commit
f000bb7590
@ -149,18 +149,18 @@ void Jit64::HandleNaNs(UGeckoInstruction inst, X64Reg xmm_out, X64Reg xmm, X64Re
|
||||
SwitchToFarCode();
|
||||
SetJumpTarget(handle_nan);
|
||||
MOVAPD(tmp, R(clobber));
|
||||
PANDN(clobber, R(xmm));
|
||||
PAND(tmp, M(psGeneratedQNaN));
|
||||
POR(tmp, R(clobber));
|
||||
ANDNPD(clobber, R(xmm));
|
||||
ANDPD(tmp, M(psGeneratedQNaN));
|
||||
ORPD(tmp, R(clobber));
|
||||
MOVAPD(xmm, R(tmp));
|
||||
for (u32 x : inputs)
|
||||
{
|
||||
MOVAPD(clobber, fpr.R(x));
|
||||
CMPPD(clobber, R(clobber), CMP_ORD);
|
||||
MOVAPD(tmp, R(clobber));
|
||||
PANDN(clobber, fpr.R(x));
|
||||
PAND(xmm, R(tmp));
|
||||
POR(xmm, R(clobber));
|
||||
ANDNPD(clobber, fpr.R(x));
|
||||
ANDPD(xmm, R(tmp));
|
||||
ORPD(xmm, R(clobber));
|
||||
}
|
||||
FixupBranch done = J(true);
|
||||
SwitchToNearCode();
|
||||
@ -350,7 +350,7 @@ void Jit64::fmaddXX(UGeckoInstruction inst)
|
||||
ADDSD(XMM1, fpr.R(b));
|
||||
}
|
||||
if (inst.SUBOP5 == 31) // nmadd
|
||||
PXOR(XMM1, M(packed ? psSignBits2 : psSignBits));
|
||||
XORPD(XMM1, M(packed ? psSignBits2 : psSignBits));
|
||||
}
|
||||
fpr.BindToRegister(d, !single);
|
||||
if (single)
|
||||
@ -384,15 +384,15 @@ void Jit64::fsign(UGeckoInstruction inst)
|
||||
switch (inst.SUBOP10)
|
||||
{
|
||||
case 40: // neg
|
||||
avx_op(&XEmitter::VPXOR, &XEmitter::PXOR, fpr.RX(d), src, M(packed ? psSignBits2 : psSignBits),
|
||||
packed);
|
||||
avx_op(&XEmitter::VXORPD, &XEmitter::XORPD, fpr.RX(d), src,
|
||||
M(packed ? psSignBits2 : psSignBits), packed);
|
||||
break;
|
||||
case 136: // nabs
|
||||
avx_op(&XEmitter::VPOR, &XEmitter::POR, fpr.RX(d), src, M(packed ? psSignBits2 : psSignBits),
|
||||
avx_op(&XEmitter::VORPD, &XEmitter::ORPD, fpr.RX(d), src, M(packed ? psSignBits2 : psSignBits),
|
||||
packed);
|
||||
break;
|
||||
case 264: // abs
|
||||
avx_op(&XEmitter::VPAND, &XEmitter::PAND, fpr.RX(d), src, M(packed ? psAbsMask2 : psAbsMask),
|
||||
avx_op(&XEmitter::VANDPD, &XEmitter::ANDPD, fpr.RX(d), src, M(packed ? psAbsMask2 : psAbsMask),
|
||||
packed);
|
||||
break;
|
||||
default:
|
||||
@ -416,7 +416,7 @@ void Jit64::fselx(UGeckoInstruction inst)
|
||||
bool packed = inst.OPCD == 4; // ps_sel
|
||||
|
||||
fpr.Lock(a, b, c, d);
|
||||
PXOR(XMM0, R(XMM0));
|
||||
XORPD(XMM0, R(XMM0));
|
||||
// This condition is very tricky; there's only one right way to handle both the case of
|
||||
// negative/positive zero and NaN properly.
|
||||
// (a >= -0.0 ? c : b) transforms into (0 > a ? b : c), hence the NLE.
|
||||
@ -433,9 +433,9 @@ void Jit64::fselx(UGeckoInstruction inst)
|
||||
else
|
||||
{
|
||||
MOVAPD(XMM1, R(XMM0));
|
||||
PAND(XMM0, fpr.R(b));
|
||||
PANDN(XMM1, fpr.R(c));
|
||||
POR(XMM1, R(XMM0));
|
||||
ANDPD(XMM0, fpr.R(b));
|
||||
ANDNPD(XMM1, fpr.R(c));
|
||||
ORPD(XMM1, R(XMM0));
|
||||
}
|
||||
|
||||
fpr.BindToRegister(d, !packed);
|
||||
|
@ -362,7 +362,7 @@ void Jit64::dcbz(UGeckoInstruction inst)
|
||||
FixupBranch slow = J_CC(CC_Z, true);
|
||||
|
||||
// Fast path: compute full address, then zero out 32 bytes of memory.
|
||||
PXOR(XMM0, R(XMM0));
|
||||
XORPS(XMM0, R(XMM0));
|
||||
MOVAPS(MComplex(RMEM, RSCRATCH, SCALE_1, 0), XMM0);
|
||||
MOVAPS(MComplex(RMEM, RSCRATCH, SCALE_1, 16), XMM0);
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user