Merge pull request #13211 from Sintendo/blendvpd

Jit_FloatingPoint: fselx - Prefer BLENDVPD over VBLENDVPD
2025-01-08 15:20:45 +01:00 · 2024-12-22 18:35:11 -05:00 · 2024-12-22 18:35:11 -05:00 · c528a70e64
commit c528a70e64
parent a1d6aa7d3e 065165f749
1 changed files with 15 additions and 1 deletions
--- a/Source/Core/Core/PowerPC/Jit64/Jit_FloatingPoint.cpp
+++ b/Source/Core/Core/PowerPC/Jit64/Jit_FloatingPoint.cpp
@ -644,6 +644,20 @@ void Jit64::fselx(UGeckoInstruction inst)
  if (cpu_info.bAVX)
  {
    // Prefer BLENDVPD over VBLENDVPD if the latter doesn't save any
    // instructions.
    //
    // VBLENDVPD allows separate source and destination registers, which can
    // eliminate a MOVAPD/MOVSD. However, on Intel since Skylake, VBLENDVPD
    // takes additional uops to execute compared to BLENDVPD (according to
    // https://uops.info). On AMD and older Intel microarchitectures there is no
    // difference.
    if (d == c)
    {
      BLENDVPD(Rd, Rb);
      return;
    }
    X64Reg src1 = XMM1;
    if (Rc.IsSimpleReg())
    {
@ -654,7 +668,7 @@ void Jit64::fselx(UGeckoInstruction inst)
      MOVAPD(XMM1, Rc);
    }
-    if (d == c || packed)
+    if (packed)
    {
      VBLENDVPD(Rd, src1, Rb, XMM0);
      return;