mirror of
https://github.com/dolphin-emu/dolphin.git
synced 2025-01-08 15:20:45 +01:00
Merge pull request #13211 from Sintendo/blendvpd
Jit_FloatingPoint: fselx - Prefer BLENDVPD over VBLENDVPD
This commit is contained in:
commit
c528a70e64
@ -644,6 +644,20 @@ void Jit64::fselx(UGeckoInstruction inst)
|
|||||||
|
|
||||||
if (cpu_info.bAVX)
|
if (cpu_info.bAVX)
|
||||||
{
|
{
|
||||||
|
// Prefer BLENDVPD over VBLENDVPD if the latter doesn't save any
|
||||||
|
// instructions.
|
||||||
|
//
|
||||||
|
// VBLENDVPD allows separate source and destination registers, which can
|
||||||
|
// eliminate a MOVAPD/MOVSD. However, on Intel since Skylake, VBLENDVPD
|
||||||
|
// takes additional uops to execute compared to BLENDVPD (according to
|
||||||
|
// https://uops.info). On AMD and older Intel microarchitectures there is no
|
||||||
|
// difference.
|
||||||
|
if (d == c)
|
||||||
|
{
|
||||||
|
BLENDVPD(Rd, Rb);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
X64Reg src1 = XMM1;
|
X64Reg src1 = XMM1;
|
||||||
if (Rc.IsSimpleReg())
|
if (Rc.IsSimpleReg())
|
||||||
{
|
{
|
||||||
@ -654,7 +668,7 @@ void Jit64::fselx(UGeckoInstruction inst)
|
|||||||
MOVAPD(XMM1, Rc);
|
MOVAPD(XMM1, Rc);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (d == c || packed)
|
if (packed)
|
||||||
{
|
{
|
||||||
VBLENDVPD(Rd, src1, Rb, XMM0);
|
VBLENDVPD(Rd, src1, Rb, XMM0);
|
||||||
return;
|
return;
|
||||||
|
Loading…
Reference in New Issue
Block a user