mirror of
https://github.com/dolphin-emu/dolphin.git
synced 2025-01-25 07:21:14 +01:00
JitArm64: Merge scalar 4-operant instructions.
This commit is contained in:
parent
9c048bbc36
commit
19713f7c14
@ -139,17 +139,9 @@ public:
|
||||
// Floating point
|
||||
void fp_arith(UGeckoInstruction inst);
|
||||
void fabsx(UGeckoInstruction inst);
|
||||
void fmaddsx(UGeckoInstruction inst);
|
||||
void fmaddx(UGeckoInstruction inst);
|
||||
void fmrx(UGeckoInstruction inst);
|
||||
void fmsubsx(UGeckoInstruction inst);
|
||||
void fmsubx(UGeckoInstruction inst);
|
||||
void fnabsx(UGeckoInstruction inst);
|
||||
void fnegx(UGeckoInstruction inst);
|
||||
void fnmaddsx(UGeckoInstruction inst);
|
||||
void fnmaddx(UGeckoInstruction inst);
|
||||
void fnmsubsx(UGeckoInstruction inst);
|
||||
void fnmsubx(UGeckoInstruction inst);
|
||||
void fselx(UGeckoInstruction inst);
|
||||
void fcmpX(UGeckoInstruction inst);
|
||||
void frspx(UGeckoInstruction inst);
|
||||
|
@ -37,40 +37,55 @@ void JitArm64::fp_arith(UGeckoInstruction inst)
|
||||
FALLBACK_IF(inst.Rc);
|
||||
FALLBACK_IF(SConfig::GetInstance().bFPRF && js.op->wantsFPRF);
|
||||
|
||||
u32 a = inst.FA, d = inst.FD;
|
||||
u32 b = inst.SUBOP5 == 25 ? inst.FC : inst.FB;
|
||||
u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD;
|
||||
u32 op5 = inst.SUBOP5;
|
||||
|
||||
bool single = inst.OPCD == 59;
|
||||
bool packed = inst.OPCD == 4;
|
||||
|
||||
bool use_c = op5 >= 25; // fmul and all kind of fmaddXX
|
||||
bool use_b = op5 != 25; // fmul uses no B
|
||||
|
||||
ARM64Reg VA, VB, VC, VD;
|
||||
|
||||
if (packed)
|
||||
{
|
||||
ARM64Reg VA = fpr.R(a, REG_REG);
|
||||
ARM64Reg VB = fpr.R(b, REG_REG);
|
||||
ARM64Reg VD = fpr.RW(d, REG_REG);
|
||||
VA = fpr.R(a, REG_REG);
|
||||
if (use_b)
|
||||
VB = fpr.R(b, REG_REG);
|
||||
if (use_c)
|
||||
VC = fpr.R(c, REG_REG);
|
||||
VD = fpr.RW(d, REG_REG);
|
||||
|
||||
switch (inst.SUBOP5)
|
||||
switch (op5)
|
||||
{
|
||||
case 18: m_float_emit.FDIV(64, VD, VA, VB); break;
|
||||
case 20: m_float_emit.FSUB(64, VD, VA, VB); break;
|
||||
case 21: m_float_emit.FADD(64, VD, VA, VB); break;
|
||||
case 25: m_float_emit.FMUL(64, VD, VA, VB); break;
|
||||
default: _assert_msg_(DYNA_REC, 0, "fp_arith WTF!!!");
|
||||
case 25: m_float_emit.FMUL(64, VD, VA, VC); break;
|
||||
default: _assert_msg_(DYNA_REC, 0, "fp_arith"); break;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
ARM64Reg VA = EncodeRegToDouble(fpr.R(a, REG_IS_LOADED));
|
||||
ARM64Reg VB = EncodeRegToDouble(fpr.R(b, REG_IS_LOADED));
|
||||
ARM64Reg VD = EncodeRegToDouble(fpr.RW(d, single ? REG_DUP : REG_LOWER_PAIR));
|
||||
VA = EncodeRegToDouble(fpr.R(a, REG_IS_LOADED));
|
||||
if (use_b)
|
||||
VB = EncodeRegToDouble(fpr.R(b, REG_IS_LOADED));
|
||||
if (use_c)
|
||||
VC = EncodeRegToDouble(fpr.R(c, REG_IS_LOADED));
|
||||
VD = EncodeRegToDouble(fpr.RW(d, single ? REG_DUP : REG_LOWER_PAIR));
|
||||
|
||||
switch (inst.SUBOP5)
|
||||
switch (op5)
|
||||
{
|
||||
case 18: m_float_emit.FDIV(VD, VA, VB); break;
|
||||
case 20: m_float_emit.FSUB(VD, VA, VB); break;
|
||||
case 21: m_float_emit.FADD(VD, VA, VB); break;
|
||||
case 25: m_float_emit.FMUL(VD, VA, VB); break;
|
||||
default: _assert_msg_(DYNA_REC, 0, "fp_arith WTF!!!");
|
||||
case 25: m_float_emit.FMUL(VD, VA, VC); break;
|
||||
case 28: m_float_emit.FNMSUB(VD, VA, VC, VB); break; // fmsub: "D = A*C - B" vs "Vd = (-Va) + Vn*Vm"
|
||||
case 29: m_float_emit.FMADD(VD, VA, VC, VB); break; // fmadd: "D = A*C + B" vs "Vd = Va + Vn*Vm"
|
||||
case 30: m_float_emit.FMSUB(VD, VA, VC, VB); break; // fnmsub: "D = -(A*C - B)" vs "Vd = Va + (-Vn)*Vm"
|
||||
case 31: m_float_emit.FNMADD(VD, VA, VC, VB); break; // fnmadd: "D = -(A*C + B)" vs "Vd = (-Va) + (-Vn)*Vm"
|
||||
default: _assert_msg_(DYNA_REC, 0, "fp_arith"); break;
|
||||
}
|
||||
}
|
||||
|
||||
@ -78,45 +93,6 @@ void JitArm64::fp_arith(UGeckoInstruction inst)
|
||||
fpr.FixSinglePrecision(d);
|
||||
}
|
||||
|
||||
void JitArm64::fmaddsx(UGeckoInstruction inst)
|
||||
{
|
||||
INSTRUCTION_START
|
||||
JITDISABLE(bJITFloatingPointOff);
|
||||
FALLBACK_IF(inst.Rc);
|
||||
FALLBACK_IF(SConfig::GetInstance().bFPRF && js.op->wantsFPRF);
|
||||
|
||||
u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD;
|
||||
|
||||
ARM64Reg VA = fpr.R(a, REG_IS_LOADED);
|
||||
ARM64Reg VB = fpr.R(b, REG_IS_LOADED);
|
||||
ARM64Reg VC = fpr.R(c, REG_IS_LOADED);
|
||||
ARM64Reg VD = fpr.RW(d, REG_DUP);
|
||||
ARM64Reg V0 = fpr.GetReg();
|
||||
|
||||
m_float_emit.FMUL(EncodeRegToDouble(V0), EncodeRegToDouble(VA), EncodeRegToDouble(VC));
|
||||
m_float_emit.FADD(EncodeRegToDouble(VD), EncodeRegToDouble(V0), EncodeRegToDouble(VB));
|
||||
fpr.FixSinglePrecision(d);
|
||||
|
||||
fpr.Unlock(V0);
|
||||
}
|
||||
|
||||
void JitArm64::fmaddx(UGeckoInstruction inst)
|
||||
{
|
||||
INSTRUCTION_START
|
||||
JITDISABLE(bJITFloatingPointOff);
|
||||
FALLBACK_IF(inst.Rc);
|
||||
FALLBACK_IF(SConfig::GetInstance().bFPRF && js.op->wantsFPRF);
|
||||
|
||||
u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD;
|
||||
|
||||
ARM64Reg VA = fpr.R(a, REG_IS_LOADED);
|
||||
ARM64Reg VB = fpr.R(b, REG_IS_LOADED);
|
||||
ARM64Reg VC = fpr.R(c, REG_IS_LOADED);
|
||||
ARM64Reg VD = fpr.RW(d);
|
||||
|
||||
m_float_emit.FMADD(EncodeRegToDouble(VD), EncodeRegToDouble(VA), EncodeRegToDouble(VC), EncodeRegToDouble(VB));
|
||||
}
|
||||
|
||||
void JitArm64::fmrx(UGeckoInstruction inst)
|
||||
{
|
||||
INSTRUCTION_START
|
||||
@ -131,45 +107,6 @@ void JitArm64::fmrx(UGeckoInstruction inst)
|
||||
m_float_emit.INS(64, VD, 0, VB, 0);
|
||||
}
|
||||
|
||||
void JitArm64::fmsubsx(UGeckoInstruction inst)
|
||||
{
|
||||
INSTRUCTION_START
|
||||
JITDISABLE(bJITFloatingPointOff);
|
||||
FALLBACK_IF(inst.Rc);
|
||||
FALLBACK_IF(SConfig::GetInstance().bFPRF && js.op->wantsFPRF);
|
||||
|
||||
u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD;
|
||||
|
||||
ARM64Reg VA = fpr.R(a, REG_IS_LOADED);
|
||||
ARM64Reg VB = fpr.R(b, REG_IS_LOADED);
|
||||
ARM64Reg VC = fpr.R(c, REG_IS_LOADED);
|
||||
ARM64Reg VD = fpr.RW(d, REG_DUP);
|
||||
ARM64Reg V0 = fpr.GetReg();
|
||||
|
||||
m_float_emit.FMUL(EncodeRegToDouble(V0), EncodeRegToDouble(VA), EncodeRegToDouble(VC));
|
||||
m_float_emit.FSUB(EncodeRegToDouble(VD), EncodeRegToDouble(V0), EncodeRegToDouble(VB));
|
||||
fpr.FixSinglePrecision(d);
|
||||
|
||||
fpr.Unlock(V0);
|
||||
}
|
||||
|
||||
void JitArm64::fmsubx(UGeckoInstruction inst)
|
||||
{
|
||||
INSTRUCTION_START
|
||||
JITDISABLE(bJITFloatingPointOff);
|
||||
FALLBACK_IF(inst.Rc);
|
||||
FALLBACK_IF(SConfig::GetInstance().bFPRF && js.op->wantsFPRF);
|
||||
|
||||
u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD;
|
||||
|
||||
ARM64Reg VA = fpr.R(a, REG_IS_LOADED);
|
||||
ARM64Reg VB = fpr.R(b, REG_IS_LOADED);
|
||||
ARM64Reg VC = fpr.R(c, REG_IS_LOADED);
|
||||
ARM64Reg VD = fpr.RW(d);
|
||||
|
||||
m_float_emit.FNMSUB(EncodeRegToDouble(VD), EncodeRegToDouble(VA), EncodeRegToDouble(VC), EncodeRegToDouble(VB));
|
||||
}
|
||||
|
||||
void JitArm64::fnabsx(UGeckoInstruction inst)
|
||||
{
|
||||
INSTRUCTION_START
|
||||
@ -199,86 +136,6 @@ void JitArm64::fnegx(UGeckoInstruction inst)
|
||||
m_float_emit.FNEG(EncodeRegToDouble(VD), EncodeRegToDouble(VB));
|
||||
}
|
||||
|
||||
void JitArm64::fnmaddsx(UGeckoInstruction inst)
|
||||
{
|
||||
INSTRUCTION_START
|
||||
JITDISABLE(bJITFloatingPointOff);
|
||||
FALLBACK_IF(inst.Rc);
|
||||
FALLBACK_IF(SConfig::GetInstance().bFPRF && js.op->wantsFPRF);
|
||||
|
||||
u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD;
|
||||
|
||||
ARM64Reg VA = fpr.R(a, REG_IS_LOADED);
|
||||
ARM64Reg VB = fpr.R(b, REG_IS_LOADED);
|
||||
ARM64Reg VC = fpr.R(c, REG_IS_LOADED);
|
||||
ARM64Reg VD = fpr.RW(d, REG_DUP);
|
||||
ARM64Reg V0 = fpr.GetReg();
|
||||
|
||||
m_float_emit.FMUL(EncodeRegToDouble(V0), EncodeRegToDouble(VA), EncodeRegToDouble(VC));
|
||||
m_float_emit.FADD(EncodeRegToDouble(VD), EncodeRegToDouble(V0), EncodeRegToDouble(VB));
|
||||
m_float_emit.FNEG(EncodeRegToDouble(VD), EncodeRegToDouble(VD));
|
||||
fpr.FixSinglePrecision(d);
|
||||
|
||||
fpr.Unlock(V0);
|
||||
}
|
||||
|
||||
void JitArm64::fnmaddx(UGeckoInstruction inst)
|
||||
{
|
||||
INSTRUCTION_START
|
||||
JITDISABLE(bJITFloatingPointOff);
|
||||
FALLBACK_IF(inst.Rc);
|
||||
FALLBACK_IF(SConfig::GetInstance().bFPRF && js.op->wantsFPRF);
|
||||
|
||||
u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD;
|
||||
|
||||
ARM64Reg VA = fpr.R(a, REG_IS_LOADED);
|
||||
ARM64Reg VB = fpr.R(b, REG_IS_LOADED);
|
||||
ARM64Reg VC = fpr.R(c, REG_IS_LOADED);
|
||||
ARM64Reg VD = fpr.RW(d);
|
||||
|
||||
m_float_emit.FNMADD(EncodeRegToDouble(VD), EncodeRegToDouble(VA), EncodeRegToDouble(VC), EncodeRegToDouble(VB));
|
||||
}
|
||||
|
||||
void JitArm64::fnmsubsx(UGeckoInstruction inst)
|
||||
{
|
||||
INSTRUCTION_START
|
||||
JITDISABLE(bJITFloatingPointOff);
|
||||
FALLBACK_IF(inst.Rc);
|
||||
FALLBACK_IF(SConfig::GetInstance().bFPRF && js.op->wantsFPRF);
|
||||
|
||||
u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD;
|
||||
|
||||
ARM64Reg VA = fpr.R(a, REG_IS_LOADED);
|
||||
ARM64Reg VB = fpr.R(b, REG_IS_LOADED);
|
||||
ARM64Reg VC = fpr.R(c, REG_IS_LOADED);
|
||||
ARM64Reg VD = fpr.RW(d, REG_DUP);
|
||||
ARM64Reg V0 = fpr.GetReg();
|
||||
|
||||
m_float_emit.FMUL(EncodeRegToDouble(V0), EncodeRegToDouble(VA), EncodeRegToDouble(VC));
|
||||
m_float_emit.FSUB(EncodeRegToDouble(VD), EncodeRegToDouble(V0), EncodeRegToDouble(VB));
|
||||
m_float_emit.FNEG(EncodeRegToDouble(VD), EncodeRegToDouble(VD));
|
||||
fpr.FixSinglePrecision(d);
|
||||
|
||||
fpr.Unlock(V0);
|
||||
}
|
||||
|
||||
void JitArm64::fnmsubx(UGeckoInstruction inst)
|
||||
{
|
||||
INSTRUCTION_START
|
||||
JITDISABLE(bJITFloatingPointOff);
|
||||
FALLBACK_IF(inst.Rc);
|
||||
FALLBACK_IF(SConfig::GetInstance().bFPRF && js.op->wantsFPRF);
|
||||
|
||||
u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD;
|
||||
|
||||
ARM64Reg VA = fpr.R(a, REG_IS_LOADED);
|
||||
ARM64Reg VB = fpr.R(b, REG_IS_LOADED);
|
||||
ARM64Reg VC = fpr.R(c, REG_IS_LOADED);
|
||||
ARM64Reg VD = fpr.RW(d);
|
||||
|
||||
m_float_emit.FMSUB(EncodeRegToDouble(VD), EncodeRegToDouble(VA), EncodeRegToDouble(VC), EncodeRegToDouble(VB));
|
||||
}
|
||||
|
||||
void JitArm64::fselx(UGeckoInstruction inst)
|
||||
{
|
||||
INSTRUCTION_START
|
||||
|
@ -318,10 +318,10 @@ static GekkoOPTemplate table59[] =
|
||||
{21, &JitArm64::fp_arith}, // faddsx
|
||||
{24, &JitArm64::FallBackToInterpreter}, // fresx
|
||||
{25, &JitArm64::fp_arith}, // fmulsx
|
||||
{28, &JitArm64::fmsubsx}, // fmsubsx
|
||||
{29, &JitArm64::fmaddsx}, // fmaddsx
|
||||
{30, &JitArm64::fnmsubsx}, // fnmsubsx
|
||||
{31, &JitArm64::fnmaddsx}, // fnmaddsx
|
||||
{28, &JitArm64::fp_arith}, // fmsubsx
|
||||
{29, &JitArm64::fp_arith}, // fmaddsx
|
||||
{30, &JitArm64::fp_arith}, // fnmsubsx
|
||||
{31, &JitArm64::fp_arith}, // fnmaddsx
|
||||
};
|
||||
|
||||
static GekkoOPTemplate table63[] =
|
||||
@ -352,10 +352,10 @@ static GekkoOPTemplate table63_2[] =
|
||||
{23, &JitArm64::fselx}, // fselx
|
||||
{25, &JitArm64::fp_arith}, // fmulx
|
||||
{26, &JitArm64::FallBackToInterpreter}, // frsqrtex
|
||||
{28, &JitArm64::fmsubx}, // fmsubx
|
||||
{29, &JitArm64::fmaddx}, // fmaddx
|
||||
{30, &JitArm64::fnmsubx}, // fnmsubx
|
||||
{31, &JitArm64::fnmaddx}, // fnmaddx
|
||||
{28, &JitArm64::fp_arith}, // fmsubx
|
||||
{29, &JitArm64::fp_arith}, // fmaddx
|
||||
{30, &JitArm64::fp_arith}, // fnmsubx
|
||||
{31, &JitArm64::fp_arith}, // fnmaddx
|
||||
};
|
||||
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user