Merge pull request #3629 from degasus/arm

JitArm64: Single precision tracking.
This commit is contained in:
Ryan Houdek 2016-02-25 18:10:15 -05:00
commit a0c51806ec
8 changed files with 412 additions and 263 deletions

View File

@ -73,6 +73,11 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, bool fastmem, bool do_farcode,
m_float_emit.REV32(8, D0, D0); m_float_emit.REV32(8, D0, D0);
m_float_emit.STR(64, Q0, X28, addr); m_float_emit.STR(64, Q0, X28, addr);
} }
else if (flags & BackPatchInfo::FLAG_SIZE_F32X2I)
{
m_float_emit.REV32(8, D0, RS);
m_float_emit.STR(64, Q0, X28, addr);
}
else else
{ {
m_float_emit.REV64(8, Q0, RS); m_float_emit.REV64(8, Q0, RS);
@ -86,7 +91,6 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, bool fastmem, bool do_farcode,
{ {
m_float_emit.LDR(32, EncodeRegToDouble(RS), X28, addr); m_float_emit.LDR(32, EncodeRegToDouble(RS), X28, addr);
m_float_emit.REV32(8, EncodeRegToDouble(RS), EncodeRegToDouble(RS)); m_float_emit.REV32(8, EncodeRegToDouble(RS), EncodeRegToDouble(RS));
m_float_emit.FCVT(64, 32, EncodeRegToDouble(RS), EncodeRegToDouble(RS));
} }
else else
{ {
@ -198,6 +202,13 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, bool fastmem, bool do_farcode,
MOVI2R(X30, (u64)PowerPC::Write_U64); MOVI2R(X30, (u64)PowerPC::Write_U64);
BLR(X30); BLR(X30);
} }
else if (flags & BackPatchInfo::FLAG_SIZE_F32X2I)
{
m_float_emit.UMOV(64, X0, RS, 0);
ORR(X0, SP, X0, ArithOption(X0, ST_ROR, 32));
MOVI2R(X30, (u64)PowerPC::Write_U64);
BLR(X30);
}
else else
{ {
MOVI2R(X30, (u64)&PowerPC::Write_U64); MOVI2R(X30, (u64)&PowerPC::Write_U64);
@ -214,7 +225,6 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, bool fastmem, bool do_farcode,
MOVI2R(X30, (u64)&PowerPC::Read_U32); MOVI2R(X30, (u64)&PowerPC::Read_U32);
BLR(X30); BLR(X30);
m_float_emit.INS(32, RS, 0, X0); m_float_emit.INS(32, RS, 0, X0);
m_float_emit.FCVT(64, 32, EncodeRegToDouble(RS), EncodeRegToDouble(RS));
} }
else else
{ {

View File

@ -33,34 +33,44 @@ void JitArm64::fp_arith(UGeckoInstruction inst)
bool use_c = op5 >= 25; // fmul and all kind of fmaddXX bool use_c = op5 >= 25; // fmul and all kind of fmaddXX
bool use_b = op5 != 25; // fmul uses no B bool use_b = op5 != 25; // fmul uses no B
bool inputs_are_singles = fpr.IsSingle(a, !packed) && (!use_b || fpr.IsSingle(b, !packed)) && (!use_c || fpr.IsSingle(c, !packed));
ARM64Reg VA, VB, VC, VD; ARM64Reg VA, VB, VC, VD;
if (packed) if (packed)
{ {
VA = fpr.R(a, REG_REG); RegType type = inputs_are_singles ? REG_REG_SINGLE : REG_REG;
u8 size = inputs_are_singles ? 32 : 64;
ARM64Reg (*reg_encoder)(ARM64Reg) = inputs_are_singles ? EncodeRegToDouble : EncodeRegToQuad;
VA = reg_encoder(fpr.R(a, type));
if (use_b) if (use_b)
VB = fpr.R(b, REG_REG); VB = reg_encoder(fpr.R(b, type));
if (use_c) if (use_c)
VC = fpr.R(c, REG_REG); VC = reg_encoder(fpr.R(c, type));
VD = fpr.RW(d, REG_REG); VD = reg_encoder(fpr.RW(d, type));
switch (op5) switch (op5)
{ {
case 18: m_float_emit.FDIV(64, VD, VA, VB); break; case 18: m_float_emit.FDIV(size, VD, VA, VB); break;
case 20: m_float_emit.FSUB(64, VD, VA, VB); break; case 20: m_float_emit.FSUB(size, VD, VA, VB); break;
case 21: m_float_emit.FADD(64, VD, VA, VB); break; case 21: m_float_emit.FADD(size, VD, VA, VB); break;
case 25: m_float_emit.FMUL(64, VD, VA, VC); break; case 25: m_float_emit.FMUL(size, VD, VA, VC); break;
default: _assert_msg_(DYNA_REC, 0, "fp_arith"); break; default: _assert_msg_(DYNA_REC, 0, "fp_arith"); break;
} }
} }
else else
{ {
VA = EncodeRegToDouble(fpr.R(a, REG_IS_LOADED)); RegType type = (inputs_are_singles && single) ? REG_LOWER_PAIR_SINGLE : REG_LOWER_PAIR;
RegType type_out = single ? (inputs_are_singles ? REG_DUP_SINGLE : REG_DUP) : REG_LOWER_PAIR;
ARM64Reg (*reg_encoder)(ARM64Reg) = (inputs_are_singles && single) ? EncodeRegToSingle : EncodeRegToDouble;
VA = reg_encoder(fpr.R(a, type));
if (use_b) if (use_b)
VB = EncodeRegToDouble(fpr.R(b, REG_IS_LOADED)); VB = reg_encoder(fpr.R(b, type));
if (use_c) if (use_c)
VC = EncodeRegToDouble(fpr.R(c, REG_IS_LOADED)); VC = reg_encoder(fpr.R(c, type));
VD = EncodeRegToDouble(fpr.RW(d, single ? REG_DUP : REG_LOWER_PAIR)); VD = reg_encoder(fpr.RW(d, type_out));
switch (op5) switch (op5)
{ {
@ -95,33 +105,42 @@ void JitArm64::fp_logic(UGeckoInstruction inst)
if (op10 == 72 && b == d) if (op10 == 72 && b == d)
return; return;
bool single = fpr.IsSingle(b, !packed);
u8 size = single ? 32 : 64;
if (packed) if (packed)
{ {
ARM64Reg VB = fpr.R(b, REG_REG); RegType type = single ? REG_REG_SINGLE : REG_REG;
ARM64Reg VD = fpr.RW(d, REG_REG); ARM64Reg (*reg_encoder)(ARM64Reg) = single ? EncodeRegToDouble : EncodeRegToQuad;
ARM64Reg VB = reg_encoder(fpr.R(b, type));
ARM64Reg VD = reg_encoder(fpr.RW(d, type));
switch (op10) switch (op10)
{ {
case 40: m_float_emit.FNEG(64, VD, VB); break; case 40: m_float_emit.FNEG(size, VD, VB); break;
case 72: m_float_emit.ORR(VD, VB, VB); break; case 72: m_float_emit.ORR(VD, VB, VB); break;
case 136: m_float_emit.FABS(64, VD, VB); case 136: m_float_emit.FABS(size, VD, VB);
m_float_emit.FNEG(64, VD, VD); break; m_float_emit.FNEG(size, VD, VD); break;
case 264: m_float_emit.FABS(64, VD, VB); break; case 264: m_float_emit.FABS(size, VD, VB); break;
default: _assert_msg_(DYNA_REC, 0, "fp_logic"); break; default: _assert_msg_(DYNA_REC, 0, "fp_logic"); break;
} }
} }
else else
{ {
ARM64Reg VB = fpr.R(b, REG_IS_LOADED); RegType type = single ? REG_LOWER_PAIR_SINGLE : REG_LOWER_PAIR;
ARM64Reg VD = fpr.RW(d); ARM64Reg (*reg_encoder)(ARM64Reg) = single ? EncodeRegToSingle : EncodeRegToDouble;
ARM64Reg VB = fpr.R(b, type);
ARM64Reg VD = fpr.RW(d, type);
switch (op10) switch (op10)
{ {
case 40: m_float_emit.FNEG(EncodeRegToDouble(VD), EncodeRegToDouble(VB)); break; case 40: m_float_emit.FNEG(reg_encoder(VD), reg_encoder(VB)); break;
case 72: m_float_emit.INS(64, VD, 0, VB, 0); break; case 72: m_float_emit.INS(size, VD, 0, VB, 0); break;
case 136: m_float_emit.FABS(EncodeRegToDouble(VD), EncodeRegToDouble(VB)); case 136: m_float_emit.FABS(reg_encoder(VD), reg_encoder(VB));
m_float_emit.FNEG(EncodeRegToDouble(VD), EncodeRegToDouble(VD)); break; m_float_emit.FNEG(reg_encoder(VD), reg_encoder(VD)); break;
case 264: m_float_emit.FABS(EncodeRegToDouble(VD), EncodeRegToDouble(VB)); break; case 264: m_float_emit.FABS(reg_encoder(VD), reg_encoder(VB)); break;
default: _assert_msg_(DYNA_REC, 0, "fp_logic"); break; default: _assert_msg_(DYNA_REC, 0, "fp_logic"); break;
} }
} }
@ -135,13 +154,26 @@ void JitArm64::fselx(UGeckoInstruction inst)
u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD; u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD;
ARM64Reg VA = fpr.R(a, REG_IS_LOADED); if (fpr.IsSingle(a, true))
ARM64Reg VB = fpr.R(b, REG_IS_LOADED); {
ARM64Reg VC = fpr.R(c, REG_IS_LOADED); ARM64Reg VA = fpr.R(a, REG_LOWER_PAIR_SINGLE);
ARM64Reg VD = fpr.RW(d); m_float_emit.FCMPE(EncodeRegToSingle(VA));
}
else
{
ARM64Reg VA = fpr.R(a, REG_LOWER_PAIR);
m_float_emit.FCMPE(EncodeRegToDouble(VA));
}
m_float_emit.FCMPE(EncodeRegToDouble(VA)); bool single = fpr.IsSingle(b, true) && fpr.IsSingle(c, true);
m_float_emit.FCSEL(EncodeRegToDouble(VD), EncodeRegToDouble(VC), EncodeRegToDouble(VB), CC_GE); RegType type = single ? REG_LOWER_PAIR_SINGLE : REG_LOWER_PAIR;
ARM64Reg (*reg_encoder)(ARM64Reg) = single ? EncodeRegToSingle : EncodeRegToDouble;
ARM64Reg VB = fpr.R(b, type);
ARM64Reg VC = fpr.R(c, type);
ARM64Reg VD = fpr.RW(d, type);
m_float_emit.FCSEL(reg_encoder(VD), reg_encoder(VC), reg_encoder(VB), CC_GE);
} }
void JitArm64::frspx(UGeckoInstruction inst) void JitArm64::frspx(UGeckoInstruction inst)
@ -153,11 +185,22 @@ void JitArm64::frspx(UGeckoInstruction inst)
u32 b = inst.FB, d = inst.FD; u32 b = inst.FB, d = inst.FD;
ARM64Reg VB = fpr.R(b, REG_IS_LOADED); if (fpr.IsSingle(b, true))
ARM64Reg VD = fpr.RW(d, REG_DUP); {
// Source is already in single precision, so no need to do anything but to copy to PSR1.
ARM64Reg VB = fpr.R(b, REG_LOWER_PAIR_SINGLE);
ARM64Reg VD = fpr.RW(d, REG_DUP_SINGLE);
m_float_emit.FCVT(32, 64, EncodeRegToDouble(VD), EncodeRegToDouble(VB)); if (b != d)
m_float_emit.FCVT(64, 32, EncodeRegToDouble(VD), EncodeRegToDouble(VD)); m_float_emit.FMOV(EncodeRegToSingle(VD), EncodeRegToSingle(VB));
}
else
{
ARM64Reg VB = fpr.R(b, REG_LOWER_PAIR);
ARM64Reg VD = fpr.RW(d, REG_DUP_SINGLE);
m_float_emit.FCVT(32, 64, EncodeRegToDouble(VD), EncodeRegToDouble(VB));
}
} }
void JitArm64::fcmpX(UGeckoInstruction inst) void JitArm64::fcmpX(UGeckoInstruction inst)
@ -169,8 +212,12 @@ void JitArm64::fcmpX(UGeckoInstruction inst)
u32 a = inst.FA, b = inst.FB; u32 a = inst.FA, b = inst.FB;
int crf = inst.CRFD; int crf = inst.CRFD;
ARM64Reg VA = fpr.R(a, REG_IS_LOADED); bool singles = fpr.IsSingle(a, true) && fpr.IsSingle(b, true);
ARM64Reg VB = fpr.R(b, REG_IS_LOADED); RegType type = singles ? REG_LOWER_PAIR_SINGLE : REG_LOWER_PAIR;
ARM64Reg (*reg_encoder)(ARM64Reg) = singles ? EncodeRegToSingle : EncodeRegToDouble;
ARM64Reg VA = reg_encoder(fpr.R(a, type));
ARM64Reg VB = reg_encoder(fpr.R(b, type));
ARM64Reg WA = gpr.GetReg(); ARM64Reg WA = gpr.GetReg();
ARM64Reg XA = EncodeRegTo64(WA); ARM64Reg XA = EncodeRegTo64(WA);
@ -179,7 +226,7 @@ void JitArm64::fcmpX(UGeckoInstruction inst)
FixupBranch continue1, continue2, continue3; FixupBranch continue1, continue2, continue3;
ORR(XA, ZR, 32, 0, true); ORR(XA, ZR, 32, 0, true);
m_float_emit.FCMP(EncodeRegToDouble(VA), EncodeRegToDouble(VB)); m_float_emit.FCMP(VA, VB);
if (a != b) if (a != b)
{ {
@ -231,7 +278,9 @@ void JitArm64::fctiwzx(UGeckoInstruction inst)
u32 b = inst.FB, d = inst.FD; u32 b = inst.FB, d = inst.FD;
ARM64Reg VB = fpr.R(b, REG_IS_LOADED); bool single = fpr.IsSingle(b, true);
ARM64Reg VB = fpr.R(b, single ? REG_LOWER_PAIR_SINGLE : REG_LOWER_PAIR);
ARM64Reg VD = fpr.RW(d); ARM64Reg VD = fpr.RW(d);
ARM64Reg V0 = fpr.GetReg(); ARM64Reg V0 = fpr.GetReg();
@ -240,8 +289,15 @@ void JitArm64::fctiwzx(UGeckoInstruction inst)
m_float_emit.MOVI(64, EncodeRegToDouble(V0), 0xFFFF000000000000ULL); m_float_emit.MOVI(64, EncodeRegToDouble(V0), 0xFFFF000000000000ULL);
m_float_emit.BIC(16, EncodeRegToDouble(V0), 0x7); m_float_emit.BIC(16, EncodeRegToDouble(V0), 0x7);
m_float_emit.FCVT(32, 64, EncodeRegToDouble(VD), EncodeRegToDouble(VB)); if (single)
m_float_emit.FCVTS(EncodeRegToSingle(VD), EncodeRegToSingle(VD), ROUND_Z); {
m_float_emit.FCVTS(EncodeRegToSingle(VD), EncodeRegToSingle(VB), ROUND_Z);
}
else
{
m_float_emit.FCVT(32, 64, EncodeRegToDouble(VD), EncodeRegToDouble(VB));
m_float_emit.FCVTS(EncodeRegToSingle(VD), EncodeRegToSingle(VD), ROUND_Z);
}
m_float_emit.ORR(EncodeRegToDouble(VD), EncodeRegToDouble(VD), EncodeRegToDouble(V0)); m_float_emit.ORR(EncodeRegToDouble(VD), EncodeRegToDouble(VD), EncodeRegToDouble(V0));
fpr.Unlock(V0); fpr.Unlock(V0);
} }

View File

@ -76,7 +76,7 @@ void JitArm64::lfXX(UGeckoInstruction inst)
u32 imm_addr = 0; u32 imm_addr = 0;
bool is_immediate = false; bool is_immediate = false;
RegType type = !!(flags & BackPatchInfo::FLAG_SIZE_F64) ? REG_LOWER_PAIR : REG_DUP; RegType type = !!(flags & BackPatchInfo::FLAG_SIZE_F64) ? REG_LOWER_PAIR : REG_DUP_SINGLE;
gpr.Lock(W0, W30); gpr.Lock(W0, W30);
fpr.Lock(Q0); fpr.Lock(Q0);
@ -270,7 +270,16 @@ void JitArm64::stfXX(UGeckoInstruction inst)
gpr.Lock(W0, W1, W30); gpr.Lock(W0, W1, W30);
fpr.Lock(Q0); fpr.Lock(Q0);
ARM64Reg V0 = fpr.R(inst.FS, REG_IS_LOADED); bool single = (flags & BackPatchInfo::FLAG_SIZE_F32) && fpr.IsSingle(inst.FS, true);
ARM64Reg V0 = fpr.R(inst.FS, single ? REG_LOWER_PAIR_SINGLE : REG_LOWER_PAIR);
if (single)
{
flags &= ~BackPatchInfo::FLAG_SIZE_F32;
flags |= BackPatchInfo::FLAG_SIZE_F32I;
}
ARM64Reg addr_reg = W1; ARM64Reg addr_reg = W1;
if (update) if (update)
@ -407,24 +416,29 @@ void JitArm64::stfXX(UGeckoInstruction inst)
ADD(X1, X30, pipe_off); ADD(X1, X30, pipe_off);
LDR(INDEX_UNSIGNED, W0, X30, count_off); LDR(INDEX_UNSIGNED, W0, X30, count_off);
if (accessSize == 64) if (flags & BackPatchInfo::FLAG_SIZE_F64)
{ {
m_float_emit.REV64(8, Q0, V0); m_float_emit.REV64(8, Q0, V0);
if (pipe_off)
m_float_emit.STR(64, Q0, X1, ArithOption(X0));
else
m_float_emit.STR(64, Q0, X30, ArithOption(X0));
} }
else if (accessSize == 32) else if (flags & BackPatchInfo::FLAG_SIZE_F32)
{ {
m_float_emit.FCVT(32, 64, D0, EncodeRegToDouble(V0)); m_float_emit.FCVT(32, 64, D0, EncodeRegToDouble(V0));
m_float_emit.REV32(8, D0, D0); m_float_emit.REV32(8, D0, D0);
if (pipe_off)
m_float_emit.STR(32, D0, X1, ArithOption(X0));
else
m_float_emit.STR(32, D0, X30, ArithOption(X0));
} }
else if (flags & BackPatchInfo::FLAG_SIZE_F32I)
{
m_float_emit.REV32(8, D0, V0);
}
if (pipe_off)
{
m_float_emit.STR(accessSize, accessSize == 64 ? Q0 : D0, X1, ArithOption(X0));
}
else
{
m_float_emit.STR(accessSize, accessSize == 64 ? Q0 : D0, X30, ArithOption(X0));
}
ADD(W0, W0, accessSize >> 3); ADD(W0, W0, accessSize >> 3);
STR(INDEX_UNSIGNED, W0, X30, count_off); STR(INDEX_UNSIGNED, W0, X30, count_off);
js.fifoBytesThisBlock += accessSize >> 3; js.fifoBytesThisBlock += accessSize >> 3;

View File

@ -62,20 +62,17 @@ void JitArm64::psq_l(UGeckoInstruction inst)
if (js.assumeNoPairedQuantize) if (js.assumeNoPairedQuantize)
{ {
VS = fpr.RW(inst.RS, REG_REG); VS = fpr.RW(inst.RS, REG_REG_SINGLE);
if (!inst.W) if (!inst.W)
{ {
ADD(EncodeRegTo64(addr_reg), EncodeRegTo64(addr_reg), X28); ADD(EncodeRegTo64(addr_reg), EncodeRegTo64(addr_reg), X28);
m_float_emit.LD1(32, 1, EncodeRegToDouble(VS), EncodeRegTo64(addr_reg)); m_float_emit.LD1(32, 1, EncodeRegToDouble(VS), EncodeRegTo64(addr_reg));
m_float_emit.REV32(8, VS, VS);
m_float_emit.FCVTL(64, VS, VS);
} }
else else
{ {
m_float_emit.LDR(32, VS, EncodeRegTo64(addr_reg), X28); m_float_emit.LDR(32, VS, EncodeRegTo64(addr_reg), X28);
m_float_emit.REV32(8, VS, VS);
m_float_emit.FCVT(64, 32, EncodeRegToDouble(VS), EncodeRegToDouble(VS));
} }
m_float_emit.REV32(8, EncodeRegToDouble(VS), EncodeRegToDouble(VS));
} }
else else
{ {
@ -87,17 +84,14 @@ void JitArm64::psq_l(UGeckoInstruction inst)
LDR(X30, X30, ArithOption(EncodeRegTo64(type_reg), true)); LDR(X30, X30, ArithOption(EncodeRegTo64(type_reg), true));
BLR(X30); BLR(X30);
VS = fpr.RW(inst.RS, REG_REG); VS = fpr.RW(inst.RS, REG_REG_SINGLE);
if (!inst.W) m_float_emit.ORR(EncodeRegToDouble(VS), D0, D0);
m_float_emit.FCVTL(64, VS, D0);
else
m_float_emit.FCVT(64, 32, EncodeRegToDouble(VS), D0);
} }
if (inst.W) if (inst.W)
{ {
m_float_emit.FMOV(D0, 0x70); // 1.0 as a Double m_float_emit.FMOV(S0, 0x70); // 1.0 as a Single
m_float_emit.INS(64, VS, 1, Q0, 0); m_float_emit.INS(32, VS, 1, Q0, 0);
} }
gpr.Unlock(W0, W1, W2, W30); gpr.Unlock(W0, W1, W2, W30);
@ -121,8 +115,10 @@ void JitArm64::psq_st(UGeckoInstruction inst)
gpr.Lock(W0, W1, W2, W30); gpr.Lock(W0, W1, W2, W30);
fpr.Lock(Q0, Q1); fpr.Lock(Q0, Q1);
bool single = fpr.IsSingle(inst.RS);
ARM64Reg arm_addr = gpr.R(inst.RA); ARM64Reg arm_addr = gpr.R(inst.RA);
ARM64Reg VS = fpr.R(inst.RS, REG_REG); ARM64Reg VS = fpr.R(inst.RS, single ? REG_REG_SINGLE : REG_REG);
ARM64Reg scale_reg = W0; ARM64Reg scale_reg = W0;
ARM64Reg addr_reg = W1; ARM64Reg addr_reg = W1;
@ -156,7 +152,12 @@ void JitArm64::psq_st(UGeckoInstruction inst)
if (js.assumeNoPairedQuantize) if (js.assumeNoPairedQuantize)
{ {
u32 flags = BackPatchInfo::FLAG_STORE; u32 flags = BackPatchInfo::FLAG_STORE;
flags |= (inst.W ? BackPatchInfo::FLAG_SIZE_F32 : BackPatchInfo::FLAG_SIZE_F32X2);
if (single)
flags |= (inst.W ? BackPatchInfo::FLAG_SIZE_F32I : BackPatchInfo::FLAG_SIZE_F32X2I);
else
flags |= (inst.W ? BackPatchInfo::FLAG_SIZE_F32 : BackPatchInfo::FLAG_SIZE_F32X2);
EmitBackpatchRoutine(flags, EmitBackpatchRoutine(flags,
jo.fastmem, jo.fastmem,
jo.fastmem, jo.fastmem,
@ -166,10 +167,17 @@ void JitArm64::psq_st(UGeckoInstruction inst)
} }
else else
{ {
if (inst.W) if (single)
m_float_emit.FCVT(32, 64, D0, VS); {
m_float_emit.ORR(D0, VS, VS);
}
else else
m_float_emit.FCVTN(32, D0, VS); {
if (inst.W)
m_float_emit.FCVT(32, 64, D0, VS);
else
m_float_emit.FCVTN(32, D0, VS);
}
LDR(INDEX_UNSIGNED, scale_reg, X29, PPCSTATE_OFF(spr[SPR_GQR0 + inst.I])); LDR(INDEX_UNSIGNED, scale_reg, X29, PPCSTATE_OFF(spr[SPR_GQR0 + inst.I]));
UBFM(type_reg, scale_reg, 0, 2); // Type UBFM(type_reg, scale_reg, 0, 2); // Type

View File

@ -25,36 +25,41 @@ void JitArm64::ps_mergeXX(UGeckoInstruction inst)
u32 a = inst.FA, b = inst.FB, d = inst.FD; u32 a = inst.FA, b = inst.FB, d = inst.FD;
ARM64Reg VA = fpr.R(a, REG_REG); bool singles = fpr.IsSingle(a) && fpr.IsSingle(b);
ARM64Reg VB = fpr.R(b, REG_REG); RegType type = singles ? REG_REG_SINGLE : REG_REG;
ARM64Reg VD = fpr.RW(d, REG_REG); u8 size = singles ? 32 : 64;
ARM64Reg (*reg_encoder)(ARM64Reg) = singles ? EncodeRegToDouble : EncodeRegToQuad;
ARM64Reg VA = fpr.R(a, type);
ARM64Reg VB = fpr.R(b, type);
ARM64Reg VD = fpr.RW(d, type);
switch (inst.SUBOP10) switch (inst.SUBOP10)
{ {
case 528: //00 case 528: //00
m_float_emit.TRN1(64, VD, VA, VB); m_float_emit.TRN1(size, VD, VA, VB);
break; break;
case 560: //01 case 560: //01
m_float_emit.INS(64, VD, 0, VA, 0); m_float_emit.INS(size, VD, 0, VA, 0);
m_float_emit.INS(64, VD, 1, VB, 1); m_float_emit.INS(size, VD, 1, VB, 1);
break; break;
case 592: //10 case 592: //10
if (d != a && d != b) if (d != a && d != b)
{ {
m_float_emit.INS(64, VD, 0, VA, 1); m_float_emit.INS(size, VD, 0, VA, 1);
m_float_emit.INS(64, VD, 1, VB, 0); m_float_emit.INS(size, VD, 1, VB, 0);
} }
else else
{ {
ARM64Reg V0 = fpr.GetReg(); ARM64Reg V0 = fpr.GetReg();
m_float_emit.INS(64, V0, 0, VA, 1); m_float_emit.INS(size, V0, 0, VA, 1);
m_float_emit.INS(64, V0, 1, VB, 0); m_float_emit.INS(size, V0, 1, VB, 0);
m_float_emit.ORR(VD, V0, V0); m_float_emit.ORR(reg_encoder(VD), reg_encoder(V0), reg_encoder(V0));
fpr.Unlock(V0); fpr.Unlock(V0);
} }
break; break;
case 624: //11 case 624: //11
m_float_emit.TRN2(64, VD, VA, VB); m_float_emit.TRN2(size, VD, VA, VB);
break; break;
default: default:
_assert_msg_(DYNA_REC, 0, "ps_merge - invalid op"); _assert_msg_(DYNA_REC, 0, "ps_merge - invalid op");
@ -73,13 +78,19 @@ void JitArm64::ps_mulsX(UGeckoInstruction inst)
bool upper = inst.SUBOP5 == 13; bool upper = inst.SUBOP5 == 13;
ARM64Reg VA = fpr.R(a, REG_REG); bool singles = fpr.IsSingle(a) && fpr.IsSingle(c);
ARM64Reg VC = fpr.R(c, REG_REG); RegType type = singles ? REG_REG_SINGLE : REG_REG;
ARM64Reg VD = fpr.RW(d, REG_REG); u8 size = singles ? 32 : 64;
ARM64Reg (*reg_encoder)(ARM64Reg) = singles ? EncodeRegToDouble : EncodeRegToQuad;
ARM64Reg VA = fpr.R(a, type);
ARM64Reg VC = fpr.R(c, type);
ARM64Reg VD = fpr.RW(d, type);
ARM64Reg V0 = fpr.GetReg(); ARM64Reg V0 = fpr.GetReg();
m_float_emit.DUP(64, V0, VC, upper ? 1 : 0); m_float_emit.DUP(size, reg_encoder(V0), reg_encoder(VC), upper ? 1 : 0);
m_float_emit.FMUL(64, VD, VA, V0); m_float_emit.FMUL(size, reg_encoder(VD), reg_encoder(VA), reg_encoder(V0));
fpr.FixSinglePrecision(d); fpr.FixSinglePrecision(d);
fpr.Unlock(V0); fpr.Unlock(V0);
} }
@ -94,41 +105,49 @@ void JitArm64::ps_maddXX(UGeckoInstruction inst)
u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD; u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD;
u32 op5 = inst.SUBOP5; u32 op5 = inst.SUBOP5;
ARM64Reg VA = fpr.R(a, REG_REG); bool singles = fpr.IsSingle(a) && fpr.IsSingle(b) && fpr.IsSingle(c);
ARM64Reg VB = fpr.R(b, REG_REG); RegType type = singles ? REG_REG_SINGLE : REG_REG;
ARM64Reg VC = fpr.R(c, REG_REG); u8 size = singles ? 32 : 64;
ARM64Reg VD = fpr.RW(d, REG_REG); ARM64Reg (*reg_encoder)(ARM64Reg) = singles ? EncodeRegToDouble : EncodeRegToQuad;
ARM64Reg V0 = fpr.GetReg();
ARM64Reg VA = reg_encoder(fpr.R(a, type));
ARM64Reg VB = reg_encoder(fpr.R(b, type));
ARM64Reg VC = reg_encoder(fpr.R(c, type));
ARM64Reg VD = reg_encoder(fpr.RW(d, type));
ARM64Reg V0Q = fpr.GetReg();
ARM64Reg V0 = reg_encoder(V0Q);
// TODO: Do FMUL and FADD/FSUB in *one* host call to save accuracy.
switch (op5) switch (op5)
{ {
case 14: // ps_madds0 case 14: // ps_madds0
m_float_emit.DUP(64, V0, VC, 0); m_float_emit.DUP(size, V0, VC, 0);
m_float_emit.FMUL(64, V0, V0, VA); m_float_emit.FMUL(size, V0, V0, VA);
m_float_emit.FADD(64, VD, V0, VB); m_float_emit.FADD(size, VD, V0, VB);
break; break;
case 15: // ps_madds1 case 15: // ps_madds1
m_float_emit.DUP(64, V0, VC, 1); m_float_emit.DUP(size, V0, VC, 1);
m_float_emit.FMUL(64, V0, V0, VA); m_float_emit.FMUL(size, V0, V0, VA);
m_float_emit.FADD(64, VD, V0, VB); m_float_emit.FADD(size, VD, V0, VB);
break; break;
case 28: // ps_msub case 28: // ps_msub
m_float_emit.FMUL(64, V0, VA, VC); m_float_emit.FMUL(size, V0, VA, VC);
m_float_emit.FSUB(64, VD, V0, VB); m_float_emit.FSUB(size, VD, V0, VB);
break; break;
case 29: // ps_madd case 29: // ps_madd
m_float_emit.FMUL(64, V0, VA, VC); m_float_emit.FMUL(size, V0, VA, VC);
m_float_emit.FADD(64, VD, V0, VB); m_float_emit.FADD(size, VD, V0, VB);
break; break;
case 30: // ps_nmsub case 30: // ps_nmsub
m_float_emit.FMUL(64, V0, VA, VC); m_float_emit.FMUL(size, V0, VA, VC);
m_float_emit.FSUB(64, VD, V0, VB); m_float_emit.FSUB(size, VD, V0, VB);
m_float_emit.FNEG(64, VD, VD); m_float_emit.FNEG(size, VD, VD);
break; break;
case 31: // ps_nmadd case 31: // ps_nmadd
m_float_emit.FMUL(64, V0, VA, VC); m_float_emit.FMUL(size, V0, VA, VC);
m_float_emit.FADD(64, VD, V0, VB); m_float_emit.FADD(size, VD, V0, VB);
m_float_emit.FNEG(64, VD, VD); m_float_emit.FNEG(size, VD, VD);
break; break;
default: default:
_assert_msg_(DYNA_REC, 0, "ps_madd - invalid op"); _assert_msg_(DYNA_REC, 0, "ps_madd - invalid op");
@ -136,7 +155,7 @@ void JitArm64::ps_maddXX(UGeckoInstruction inst)
} }
fpr.FixSinglePrecision(d); fpr.FixSinglePrecision(d);
fpr.Unlock(V0); fpr.Unlock(V0Q);
} }
void JitArm64::ps_res(UGeckoInstruction inst) void JitArm64::ps_res(UGeckoInstruction inst)
@ -148,10 +167,16 @@ void JitArm64::ps_res(UGeckoInstruction inst)
u32 b = inst.FB, d = inst.FD; u32 b = inst.FB, d = inst.FD;
ARM64Reg VB = fpr.R(b, REG_REG); bool singles = fpr.IsSingle(b);
ARM64Reg VD = fpr.RW(d, REG_REG); RegType type = singles ? REG_REG_SINGLE : REG_REG;
u8 size = singles ? 32 : 64;
ARM64Reg (*reg_encoder)(ARM64Reg) = singles ? EncodeRegToDouble : EncodeRegToQuad;
ARM64Reg VB = fpr.R(b, type);
ARM64Reg VD = fpr.RW(d, type);
m_float_emit.FRSQRTE(size, reg_encoder(VD), reg_encoder(VB));
m_float_emit.FRSQRTE(64, VD, VB);
fpr.FixSinglePrecision(d); fpr.FixSinglePrecision(d);
} }
@ -163,23 +188,29 @@ void JitArm64::ps_sel(UGeckoInstruction inst)
u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD; u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD;
ARM64Reg VA = fpr.R(a, REG_REG); bool singles = fpr.IsSingle(a) && fpr.IsSingle(b) && fpr.IsSingle(c);
ARM64Reg VB = fpr.R(b, REG_REG); RegType type = singles ? REG_REG_SINGLE : REG_REG;
ARM64Reg VC = fpr.R(c, REG_REG); u8 size = singles ? 32 : 64;
ARM64Reg VD = fpr.RW(d, REG_REG); ARM64Reg (*reg_encoder)(ARM64Reg) = singles ? EncodeRegToDouble : EncodeRegToQuad;
if (d != a && d != b && d != c) ARM64Reg VA = reg_encoder(fpr.R(a, type));
ARM64Reg VB = reg_encoder(fpr.R(b, type));
ARM64Reg VC = reg_encoder(fpr.R(c, type));
ARM64Reg VD = reg_encoder(fpr.RW(d, type));
if (d != b && d != c)
{ {
m_float_emit.FCMGE(64, VD, VA); m_float_emit.FCMGE(size, VD, VA);
m_float_emit.BSL(VD, VC, VB); m_float_emit.BSL(VD, VC, VB);
} }
else else
{ {
ARM64Reg V0 = fpr.GetReg(); ARM64Reg V0Q = fpr.GetReg();
m_float_emit.FCMGE(64, V0, VA); ARM64Reg V0 = reg_encoder(V0Q);
m_float_emit.FCMGE(size, V0, VA);
m_float_emit.BSL(V0, VC, VB); m_float_emit.BSL(V0, VC, VB);
m_float_emit.ORR(VD, V0, V0); m_float_emit.ORR(VD, V0, V0);
fpr.Unlock(V0); fpr.Unlock(V0Q);
} }
} }
@ -194,23 +225,29 @@ void JitArm64::ps_sumX(UGeckoInstruction inst)
bool upper = inst.SUBOP5 == 11; bool upper = inst.SUBOP5 == 11;
ARM64Reg VA = fpr.R(a, REG_REG); bool singles = fpr.IsSingle(a) && fpr.IsSingle(b) && fpr.IsSingle(c);
ARM64Reg VB = fpr.R(b, REG_REG); RegType type = singles ? REG_REG_SINGLE : REG_REG;
ARM64Reg VC = fpr.R(c, REG_REG); u8 size = singles ? 32 : 64;
ARM64Reg VD = fpr.RW(d, REG_REG); ARM64Reg (*reg_encoder)(ARM64Reg) = singles ? EncodeRegToDouble : EncodeRegToQuad;
ARM64Reg VA = fpr.R(a, type);
ARM64Reg VB = fpr.R(b, type);
ARM64Reg VC = fpr.R(c, type);
ARM64Reg VD = fpr.RW(d, type);
ARM64Reg V0 = fpr.GetReg(); ARM64Reg V0 = fpr.GetReg();
m_float_emit.DUP(64, V0, upper ? VA : VB, upper ? 0 : 1); m_float_emit.DUP(size, reg_encoder(V0), reg_encoder(upper ? VA : VB), upper ? 0 : 1);
if (d != c) if (d != c)
{ {
m_float_emit.FADD(64, VD, V0, upper ? VB : VA); m_float_emit.FADD(size, reg_encoder(VD), reg_encoder(V0), reg_encoder(upper ? VB : VA));
m_float_emit.INS(64, VD, upper ? 0 : 1, VC, upper ? 0 : 1); m_float_emit.INS(size, VD, upper ? 0 : 1, VC, upper ? 0 : 1);
} }
else else
{ {
m_float_emit.FADD(64, V0, V0, upper ? VB : VA); m_float_emit.FADD(size, reg_encoder(V0), reg_encoder(V0), reg_encoder(upper ? VB : VA));
m_float_emit.INS(64, VD, upper ? 1 : 0, V0, upper ? 1 : 0); m_float_emit.INS(size, VD, upper ? 1 : 0, V0, upper ? 1 : 0);
} }
fpr.FixSinglePrecision(d); fpr.FixSinglePrecision(d);
fpr.Unlock(V0); fpr.Unlock(V0);

View File

@ -198,7 +198,7 @@ ARM64Reg Arm64GPRCache::R(u32 preg)
{ {
ARM64Reg host_reg = GetReg(); ARM64Reg host_reg = GetReg();
m_emit->MOVI2R(host_reg, reg.GetImm()); m_emit->MOVI2R(host_reg, reg.GetImm());
reg.LoadToReg(host_reg); reg.Load(host_reg);
reg.SetDirty(true); reg.SetDirty(true);
return host_reg; return host_reg;
} }
@ -208,7 +208,7 @@ ARM64Reg Arm64GPRCache::R(u32 preg)
// This is a bit annoying. We try to keep these preloaded as much as possible // This is a bit annoying. We try to keep these preloaded as much as possible
// This can also happen on cases where PPCAnalyst isn't feeing us proper register usage statistics // This can also happen on cases where PPCAnalyst isn't feeing us proper register usage statistics
ARM64Reg host_reg = GetReg(); ARM64Reg host_reg = GetReg();
reg.LoadToReg(host_reg); reg.Load(host_reg);
reg.SetDirty(false); reg.SetDirty(false);
m_emit->LDR(INDEX_UNSIGNED, host_reg, X29, PPCSTATE_OFF(gpr[preg])); m_emit->LDR(INDEX_UNSIGNED, host_reg, X29, PPCSTATE_OFF(gpr[preg]));
return host_reg; return host_reg;
@ -240,7 +240,7 @@ void Arm64GPRCache::BindToRegister(u32 preg, bool do_load)
if (reg.GetType() == REG_NOTLOADED) if (reg.GetType() == REG_NOTLOADED)
{ {
ARM64Reg host_reg = GetReg(); ARM64Reg host_reg = GetReg();
reg.LoadToReg(host_reg); reg.Load(host_reg);
if (do_load) if (do_load)
m_emit->LDR(INDEX_UNSIGNED, host_reg, X29, PPCSTATE_OFF(gpr[preg])); m_emit->LDR(INDEX_UNSIGNED, host_reg, X29, PPCSTATE_OFF(gpr[preg]));
} }
@ -307,12 +307,38 @@ ARM64Reg Arm64FPRCache::R(u32 preg, RegType type)
OpArg& reg = m_guest_registers[preg]; OpArg& reg = m_guest_registers[preg];
IncrementAllUsed(); IncrementAllUsed();
reg.ResetLastUsed(); reg.ResetLastUsed();
ARM64Reg host_reg = reg.GetReg();
switch (reg.GetType()) switch (reg.GetType())
{ {
case REG_REG_SINGLE:
{
// We're asked for singles, so just return the register.
if (type == REG_REG_SINGLE || type == REG_LOWER_PAIR_SINGLE)
return host_reg;
// Else convert this register back to doubles.
m_float_emit->FCVTL(64, EncodeRegToDouble(host_reg), EncodeRegToDouble(host_reg));
reg.Load(host_reg, REG_REG);
// fall through
}
case REG_REG: // already in a reg case REG_REG: // already in a reg
return reg.GetReg(); {
break; return host_reg;
}
case REG_LOWER_PAIR_SINGLE:
{
// We're asked for the lower single, so just return the register.
if (type == REG_LOWER_PAIR_SINGLE)
return host_reg;
// Else convert this register back to a double.
m_float_emit->FCVT(64, 32, EncodeRegToDouble(host_reg), EncodeRegToDouble(host_reg));
reg.Load(host_reg, REG_LOWER_PAIR);
// fall through
}
case REG_LOWER_PAIR: case REG_LOWER_PAIR:
{ {
if (type == REG_REG) if (type == REG_REG)
@ -320,48 +346,62 @@ ARM64Reg Arm64FPRCache::R(u32 preg, RegType type)
// Load the high 64bits from the file and insert them in to the high 64bits of the host register // Load the high 64bits from the file and insert them in to the high 64bits of the host register
ARM64Reg tmp_reg = GetReg(); ARM64Reg tmp_reg = GetReg();
m_float_emit->LDR(64, INDEX_UNSIGNED, tmp_reg, X29, PPCSTATE_OFF(ps[preg][1])); m_float_emit->LDR(64, INDEX_UNSIGNED, tmp_reg, X29, PPCSTATE_OFF(ps[preg][1]));
m_float_emit->INS(64, reg.GetReg(), 1, tmp_reg, 0); m_float_emit->INS(64, host_reg, 1, tmp_reg, 0);
UnlockRegister(tmp_reg); UnlockRegister(tmp_reg);
// Change it over to a full 128bit register // Change it over to a full 128bit register
reg.LoadToReg(reg.GetReg()); reg.Load(host_reg, REG_REG);
} }
return reg.GetReg(); return host_reg;
}
case REG_DUP_SINGLE:
{
if (type == REG_LOWER_PAIR_SINGLE)
return host_reg;
if (type == REG_REG_SINGLE)
{
// Duplicate to the top and change over
m_float_emit->INS(32, host_reg, 1, host_reg, 0);
reg.Load(host_reg, REG_REG_SINGLE);
return host_reg;
}
m_float_emit->FCVT(64, 32, EncodeRegToDouble(host_reg), EncodeRegToDouble(host_reg));
reg.Load(host_reg, REG_DUP);
// fall through
} }
break;
case REG_DUP: case REG_DUP:
{ {
ARM64Reg host_reg = reg.GetReg();
if (type == REG_REG) if (type == REG_REG)
{ {
// We are requesting a full 128bit register // We are requesting a full 128bit register
// but we are only available in the lower 64bits // but we are only available in the lower 64bits
// Duplicate to the top and change over // Duplicate to the top and change over
m_float_emit->INS(64, host_reg, 1, host_reg, 0); m_float_emit->INS(64, host_reg, 1, host_reg, 0);
reg.LoadToReg(host_reg); reg.Load(host_reg, REG_REG);
} }
return host_reg; return host_reg;
} }
break;
case REG_NOTLOADED: // Register isn't loaded at /all/ case REG_NOTLOADED: // Register isn't loaded at /all/
{ {
ARM64Reg host_reg = GetReg(); host_reg = GetReg();
u32 load_size; u32 load_size;
if (type == REG_REG) if (type == REG_REG)
{ {
load_size = 128; load_size = 128;
reg.LoadToReg(host_reg); reg.Load(host_reg, REG_REG);
} }
else else
{ {
load_size = 64; load_size = 64;
reg.LoadLowerReg(host_reg); reg.Load(host_reg, REG_LOWER_PAIR);
} }
reg.SetDirty(false); reg.SetDirty(false);
m_float_emit->LDR(load_size, INDEX_UNSIGNED, host_reg, X29, PPCSTATE_OFF(ps[preg][0])); m_float_emit->LDR(load_size, INDEX_UNSIGNED, host_reg, X29, PPCSTATE_OFF(ps[preg][0]));
return host_reg; return host_reg;
} }
break;
default: default:
_dbg_assert_msg_(DYNA_REC, false, "Invalid OpArg Type!"); _dbg_assert_msg_(DYNA_REC, false, "Invalid OpArg Type!");
break; break;
@ -380,90 +420,52 @@ ARM64Reg Arm64FPRCache::RW(u32 preg, RegType type)
reg.ResetLastUsed(); reg.ResetLastUsed();
reg.SetDirty(true); reg.SetDirty(true);
switch (reg.GetType())
// If not loaded at all, just alloc a new one.
if (reg.GetType() == REG_NOTLOADED)
{ {
case REG_NOTLOADED: reg.Load(GetReg(), type);
{ return reg.GetReg();
ARM64Reg host_reg = GetReg();
if (type == REG_LOWER_PAIR)
{
reg.LoadLowerReg(host_reg);
}
else if (type == REG_DUP)
{
reg.LoadDup(host_reg);
}
else
{
reg.LoadToReg(host_reg);
}
} }
break;
case REG_LOWER_PAIR: // Only the lower value will be overwritten, so we must be extra careful to store PSR1 if dirty.
if ((type == REG_LOWER_PAIR || type == REG_LOWER_PAIR_SINGLE) && was_dirty)
{ {
// We must *not* change host_reg as this register might still be in use. So it's fine to
// store this register, but it's *not* fine to convert it to double. So for double convertion,
// a temporary register needs to be used.
ARM64Reg host_reg = reg.GetReg(); ARM64Reg host_reg = reg.GetReg();
if (type == REG_REG) ARM64Reg flush_reg = host_reg;
switch (reg.GetType())
{ {
// Change it over to a full 128bit register case REG_REG_SINGLE:
reg.LoadToReg(host_reg); flush_reg = GetReg();
} m_float_emit->FCVTL(64, EncodeRegToDouble(flush_reg), EncodeRegToDouble(host_reg));
else if (type == REG_DUP) // fall through
{ case REG_REG:
// Register is already the lower pair
// Just convert it over to a dup
reg.LoadDup(host_reg);
}
}
break;
case REG_REG:
{
ARM64Reg host_reg = reg.GetReg();
if (type == REG_LOWER_PAIR)
{
// If we only want the lower bits, let's store away the high bits and drop to a lower only register
// We are doing a full 128bit store because it takes 2 cycles on a Cortex-A57 to do a 128bit store. // We are doing a full 128bit store because it takes 2 cycles on a Cortex-A57 to do a 128bit store.
// It would take longer to do an insert to a temporary and a 64bit store than to just do this. // It would take longer to do an insert to a temporary and a 64bit store than to just do this.
if (was_dirty) m_float_emit->STR(128, INDEX_UNSIGNED, flush_reg, X29, PPCSTATE_OFF(ps[preg][0]));
m_float_emit->STR(128, INDEX_UNSIGNED, host_reg, X29, PPCSTATE_OFF(ps[preg][0])); break;
reg.LoadLowerReg(host_reg); case REG_DUP_SINGLE:
flush_reg = GetReg();
m_float_emit->FCVT(64, 32, EncodeRegToDouble(flush_reg), EncodeRegToDouble(host_reg));
// fall through
case REG_DUP:
// Store PSR1 (which is equal to PSR0) in memory.
m_float_emit->STR(64, INDEX_UNSIGNED, flush_reg, X29, PPCSTATE_OFF(ps[preg][1]));
break;
default:
// All other types doesn't store anything in PSR1.
break;
} }
else if (type == REG_DUP)
{
// If we are going from a full 128bit register to a duplicate
// then we can just change over
reg.LoadDup(host_reg);
}
}
break;
case REG_DUP:
{
ARM64Reg host_reg = reg.GetReg();
if (type == REG_REG)
{
// We are a duplicated register going to a full 128bit register
// Do an insert of our lower 64bits to the higher 64bits
m_float_emit->INS(64, host_reg, 1, host_reg, 0);
// Change over to the full 128bit register if (host_reg != flush_reg)
reg.LoadToReg(host_reg); Unlock(flush_reg);
}
else if (type == REG_LOWER_PAIR)
{
// We are duplicated changing over to a lower register
// We've got to be careful in this instance and do a store of our lower 64bits
// to the upper 64bits in the PowerPC state
// That way incase if we hit the path of DUP->LOWER->REG we get the correct bits back
if (was_dirty)
m_float_emit->STR(64, INDEX_UNSIGNED, host_reg, X29, PPCSTATE_OFF(ps[preg][1]));
reg.LoadLowerReg(host_reg);
}
}
break;
default:
// Do nothing
break;
} }
reg.Load(reg.GetReg(), type);
return reg.GetReg(); return reg.GetReg();
} }
@ -510,17 +512,37 @@ bool Arm64FPRCache::IsCalleeSaved(ARM64Reg reg)
void Arm64FPRCache::FlushRegister(u32 preg, bool maintain_state) void Arm64FPRCache::FlushRegister(u32 preg, bool maintain_state)
{ {
OpArg& reg = m_guest_registers[preg]; OpArg& reg = m_guest_registers[preg];
if (reg.GetType() == REG_REG || ARM64Reg host_reg = reg.GetReg();
reg.GetType() == REG_LOWER_PAIR) RegType type = reg.GetType();
bool dirty = reg.IsDirty();
// If we're in single mode, just convert it back to a double.
if (type == REG_REG_SINGLE)
{
if (dirty)
m_float_emit->FCVTL(64, EncodeRegToDouble(host_reg), EncodeRegToDouble(host_reg));
type = REG_REG;
}
if (type == REG_DUP_SINGLE || type == REG_LOWER_PAIR_SINGLE)
{
if (dirty)
m_float_emit->FCVT(64, 32, EncodeRegToDouble(host_reg), EncodeRegToDouble(host_reg));
if (type == REG_DUP_SINGLE)
type = REG_DUP;
else
type = REG_LOWER_PAIR;
}
if (type == REG_REG || type == REG_LOWER_PAIR)
{ {
ARM64Reg host_reg = reg.GetReg();
u32 store_size; u32 store_size;
if (reg.GetType() == REG_REG) if (type == REG_REG)
store_size = 128; store_size = 128;
else else
store_size = 64; store_size = 64;
if (reg.IsDirty()) if (dirty)
m_float_emit->STR(store_size, INDEX_UNSIGNED, host_reg, X29, PPCSTATE_OFF(ps[preg][0])); m_float_emit->STR(store_size, INDEX_UNSIGNED, host_reg, X29, PPCSTATE_OFF(ps[preg][0]));
if (!maintain_state) if (!maintain_state)
@ -529,10 +551,9 @@ void Arm64FPRCache::FlushRegister(u32 preg, bool maintain_state)
reg.Flush(); reg.Flush();
} }
} }
else if (reg.GetType() == REG_DUP) else if (type == REG_DUP)
{ {
ARM64Reg host_reg = reg.GetReg(); if (dirty)
if (reg.IsDirty())
{ {
// If the paired registers were at the start of ppcState we could do an STP here. // If the paired registers were at the start of ppcState we could do an STP here.
// Too bad moving them would break savestate compatibility between x86_64 and AArch64 // Too bad moving them would break savestate compatibility between x86_64 and AArch64
@ -564,18 +585,25 @@ BitSet32 Arm64FPRCache::GetCallerSavedUsed()
return registers; return registers;
} }
bool Arm64FPRCache::IsSingle(u32 preg, bool lower_only)
{
RegType type = m_guest_registers[preg].GetType();
return type == REG_REG_SINGLE || type == REG_DUP_SINGLE || (lower_only && type == REG_LOWER_PAIR_SINGLE);
}
void Arm64FPRCache::FixSinglePrecision(u32 preg) void Arm64FPRCache::FixSinglePrecision(u32 preg)
{ {
ARM64Reg host_reg = m_guest_registers[preg].GetReg(); OpArg& reg = m_guest_registers[preg];
switch (m_guest_registers[preg].GetType()) ARM64Reg host_reg = reg.GetReg();
switch (reg.GetType())
{ {
case REG_DUP: // only PS0 needs to be converted case REG_DUP: // only PS0 needs to be converted
m_float_emit->FCVT(32, 64, EncodeRegToDouble(host_reg), EncodeRegToDouble(host_reg)); m_float_emit->FCVT(32, 64, EncodeRegToDouble(host_reg), EncodeRegToDouble(host_reg));
m_float_emit->FCVT(64, 32, EncodeRegToDouble(host_reg), EncodeRegToDouble(host_reg)); reg.Load(host_reg, REG_DUP_SINGLE);
break; break;
case REG_REG: // PS0 and PS1 needs to be converted case REG_REG: // PS0 and PS1 needs to be converted
m_float_emit->FCVTN(32, EncodeRegToDouble(host_reg), EncodeRegToDouble(host_reg)); m_float_emit->FCVTN(32, EncodeRegToDouble(host_reg), EncodeRegToDouble(host_reg));
m_float_emit->FCVTL(64, EncodeRegToDouble(host_reg), EncodeRegToDouble(host_reg)); reg.Load(host_reg, REG_REG_SINGLE);
break; break;
default: default:
break; break;

View File

@ -22,7 +22,9 @@ enum RegType
REG_IMM, // Reg is really a IMM REG_IMM, // Reg is really a IMM
REG_LOWER_PAIR, // Only the lower pair of a paired register REG_LOWER_PAIR, // Only the lower pair of a paired register
REG_DUP, // The lower reg is the same as the upper one (physical upper doesn't actually have the duplicated value) REG_DUP, // The lower reg is the same as the upper one (physical upper doesn't actually have the duplicated value)
REG_IS_LOADED, // We don't care what type it is, as long as the lower 64bits are loaded REG_REG_SINGLE, // Both registers are loaded as single
REG_LOWER_PAIR_SINGLE, // Only the lower pair of a paired register, as single
REG_DUP_SINGLE, // The lower one contains both registers, as single
}; };
enum FlushMode enum FlushMode
@ -56,19 +58,9 @@ public:
{ {
return m_value; return m_value;
} }
void LoadToReg(ARM64Reg reg) void Load(ARM64Reg reg, RegType type = REG_REG)
{ {
m_type = REG_REG; m_type = type;
m_reg = reg;
}
void LoadLowerReg(ARM64Reg reg)
{
m_type = REG_LOWER_PAIR;
m_reg = reg;
}
void LoadDup(ARM64Reg reg)
{
m_type = REG_DUP;
m_reg = reg; m_reg = reg;
} }
void LoadToImm(u32 imm) void LoadToImm(u32 imm)
@ -278,6 +270,8 @@ public:
BitSet32 GetCallerSavedUsed() override; BitSet32 GetCallerSavedUsed() override;
bool IsSingle(u32 preg, bool lower_only = false);
void FixSinglePrecision(u32 preg); void FixSinglePrecision(u32 preg);
protected: protected:

View File

@ -9,22 +9,24 @@ struct BackPatchInfo
{ {
enum enum
{ {
FLAG_STORE = (1 << 0), FLAG_STORE = (1 << 0),
FLAG_LOAD = (1 << 1), FLAG_LOAD = (1 << 1),
FLAG_SIZE_8 = (1 << 2), FLAG_SIZE_8 = (1 << 2),
FLAG_SIZE_16 = (1 << 3), FLAG_SIZE_16 = (1 << 3),
FLAG_SIZE_32 = (1 << 4), FLAG_SIZE_32 = (1 << 4),
FLAG_SIZE_F32 = (1 << 5), FLAG_SIZE_F32 = (1 << 5),
FLAG_SIZE_F32X2 = (1 << 6), FLAG_SIZE_F32X2 = (1 << 6),
FLAG_SIZE_F64 = (1 << 7), FLAG_SIZE_F32X2I = (1 << 7),
FLAG_REVERSE = (1 << 8), FLAG_SIZE_F64 = (1 << 8),
FLAG_EXTEND = (1 << 9), FLAG_REVERSE = (1 << 9),
FLAG_SIZE_F32I = (1 << 10), FLAG_EXTEND = (1 << 10),
FLAG_ZERO_256 = (1 << 11), FLAG_SIZE_F32I = (1 << 11),
FLAG_MASK_FLOAT = FLAG_SIZE_F32 | FLAG_ZERO_256 = (1 << 12),
FLAG_SIZE_F32X2 | FLAG_MASK_FLOAT = FLAG_SIZE_F32 |
FLAG_SIZE_F64 | FLAG_SIZE_F32X2 |
FLAG_SIZE_F32I, FLAG_SIZE_F32X2I |
FLAG_SIZE_F64 |
FLAG_SIZE_F32I,
}; };
static u32 GetFlagSize(u32 flags) static u32 GetFlagSize(u32 flags)