Merge pull request #4395 from degasus/master

JitArm64: Implement 4 ppc instructions.
This commit is contained in:
Markus Wick 2016-10-29 14:50:18 +02:00 committed by GitHub
commit e7635a0089
5 changed files with 279 additions and 59 deletions

View File

@ -651,6 +651,11 @@ public:
ARM64Reg zr = Is64Bit(Rd) ? ZR : WZR;
CSINC(Rd, zr, zr, (CCFlags)((u32)cond ^ 1));
}
void CSETM(ARM64Reg Rd, CCFlags cond)
{
ARM64Reg zr = Is64Bit(Rd) ? ZR : WZR;
CSINV(Rd, zr, zr, (CCFlags)((u32)cond ^ 1));
}
void NEG(ARM64Reg Rd, ARM64Reg Rs) { SUB(Rd, Is64Bit(Rd) ? ZR : WZR, Rs); }
// Data-Processing 1 source
void RBIT(ARM64Reg Rd, ARM64Reg Rn);

View File

@ -81,12 +81,15 @@ public:
void addic(UGeckoInstruction inst);
void mulli(UGeckoInstruction inst);
void addzex(UGeckoInstruction inst);
void divwx(UGeckoInstruction inst);
void subfx(UGeckoInstruction inst);
void addcx(UGeckoInstruction inst);
void slwx(UGeckoInstruction inst);
void srwx(UGeckoInstruction inst);
void srawx(UGeckoInstruction inst);
void rlwimix(UGeckoInstruction inst);
void subfex(UGeckoInstruction inst);
void subfzex(UGeckoInstruction inst);
void subfcx(UGeckoInstruction inst);
void subfic(UGeckoInstruction inst);
void addex(UGeckoInstruction inst);
@ -96,6 +99,7 @@ public:
void mtmsr(UGeckoInstruction inst);
void mfmsr(UGeckoInstruction inst);
void mcrf(UGeckoInstruction inst);
void mcrxr(UGeckoInstruction inst);
void mfsr(UGeckoInstruction inst);
void mtsr(UGeckoInstruction inst);
void mfsrin(UGeckoInstruction inst);

View File

@ -869,6 +869,28 @@ void JitArm64::subfcx(UGeckoInstruction inst)
}
}
void JitArm64::subfzex(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITIntegerOff);
FALLBACK_IF(inst.OE);
int a = inst.RA, d = inst.RD;
gpr.BindToRegister(d, d == a);
ARM64Reg WA = gpr.GetReg();
LDRB(INDEX_UNSIGNED, WA, PPC_REG, PPCSTATE_OFF(xer_ca));
MVN(gpr.R(d), gpr.R(a));
ADDS(gpr.R(d), gpr.R(d), WA);
gpr.Unlock(WA);
ComputeCarry();
if (inst.Rc)
ComputeRC(gpr.R(d));
}
void JitArm64::subfic(UGeckoInstruction inst)
{
INSTRUCTION_START
@ -1009,6 +1031,79 @@ void JitArm64::divwux(UGeckoInstruction inst)
}
}
void JitArm64::divwx(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITIntegerOff);
FALLBACK_IF(inst.OE);
int a = inst.RA, b = inst.RB, d = inst.RD;
if (gpr.IsImm(a) && gpr.IsImm(b))
{
s32 imm_a = gpr.GetImm(a);
s32 imm_b = gpr.GetImm(b);
s32 imm_d;
if (imm_b == 0 || ((u32)imm_a == 0x80000000 && imm_b == -1))
{
if (((u32)imm_a & 0x80000000) && imm_b == 0)
imm_d = -1;
else
imm_d = 0;
}
else
{
imm_d = (u32)(imm_a / imm_b);
}
gpr.SetImmediate(d, imm_d);
if (inst.Rc)
ComputeRC(imm_d);
}
else if (gpr.IsImm(b) && gpr.GetImm(b) != 0 && gpr.GetImm(b) != -1)
{
ARM64Reg WA = gpr.GetReg();
MOVI2R(WA, gpr.GetImm(b));
gpr.BindToRegister(d, d == a);
SDIV(gpr.R(d), gpr.R(a), WA);
gpr.Unlock(WA);
if (inst.Rc)
ComputeRC(gpr.R(d));
}
else
{
gpr.BindToRegister(d, d == a || d == b);
ARM64Reg WA = gpr.GetReg();
FixupBranch slow1 = CBZ(gpr.R(b));
MOVI2R(WA, -0x80000000LL);
CMP(gpr.R(a), WA);
CCMN(gpr.R(b), 1, 0, CC_EQ);
FixupBranch slow2 = B(CC_EQ);
SDIV(gpr.R(d), gpr.R(a), gpr.R(b));
FixupBranch done = B();
SetJumpTarget(slow1);
SetJumpTarget(slow2);
CMP(gpr.R(b), 0);
CCMP(gpr.R(a), 0, 0, CC_EQ);
CSETM(gpr.R(d), CC_LT);
SetJumpTarget(done);
gpr.Unlock(WA);
if (inst.Rc)
ComputeRC(gpr.R(d));
}
}
void JitArm64::slwx(UGeckoInstruction inst)
{
INSTRUCTION_START
@ -1104,6 +1199,92 @@ void JitArm64::srwx(UGeckoInstruction inst)
}
}
void JitArm64::srawx(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITIntegerOff);
int a = inst.RA, b = inst.RB, s = inst.RS;
if (gpr.IsImm(b) && gpr.IsImm(s))
{
s32 i = gpr.GetImm(s), amount = gpr.GetImm(b);
if (amount & 0x20)
{
gpr.SetImmediate(a, i & 0x80000000 ? 0xFFFFFFFF : 0);
ComputeCarry(i & 0x80000000 ? true : false);
}
else
{
amount &= 0x1F;
gpr.SetImmediate(a, i >> amount);
ComputeCarry(amount != 0 && i < 0 && (i << (32 - amount)));
}
if (inst.Rc)
ComputeRC(gpr.GetImm(a), 0);
return;
}
else if (gpr.IsImm(b) && (gpr.GetImm(b) & 0x20) == 0 && !js.op->wantsCA)
{
gpr.BindToRegister(a, a == s);
ASR(gpr.R(a), gpr.R(a), gpr.GetImm(b) & 0x1F);
}
else if (!js.op->wantsCA)
{
gpr.BindToRegister(a, a == b || a == s);
ARM64Reg WA = gpr.GetReg();
LSL(EncodeRegTo64(WA), EncodeRegTo64(gpr.R(s)), 32);
ASRV(EncodeRegTo64(WA), EncodeRegTo64(WA), EncodeRegTo64(gpr.R(b)));
LSR(EncodeRegTo64(gpr.R(a)), EncodeRegTo64(WA), 32);
gpr.Unlock(WA);
}
else
{
gpr.BindToRegister(a, a == b || a == s);
ARM64Reg WA = gpr.GetReg();
ARM64Reg WB = gpr.GetReg();
ARM64Reg WC = gpr.GetReg();
ANDI2R(WA, gpr.R(b), 32);
FixupBranch bit_is_not_zero = TBNZ(gpr.R(b), 5);
ANDSI2R(WC, gpr.R(b), 31);
MOV(WB, gpr.R(s));
FixupBranch is_zero = B(CC_EQ);
ASRV(WB, gpr.R(s), WC);
FixupBranch bit_is_zero = TBZ(gpr.R(s), 31);
MOVI2R(WA, 32);
SUB(WC, WA, WC);
LSL(WC, gpr.R(s), WC);
CMP(WC, 0);
CSET(WA, CC_NEQ);
FixupBranch end = B();
SetJumpTarget(bit_is_not_zero);
CMP(gpr.R(s), 0);
CSET(WA, CC_LT);
CSINV(WB, WZR, WZR, CC_GE);
SetJumpTarget(is_zero);
SetJumpTarget(bit_is_zero);
SetJumpTarget(end);
MOV(gpr.R(a), WB);
STRB(INDEX_UNSIGNED, WA, PPC_REG, PPCSTATE_OFF(xer_ca));
gpr.Unlock(WA, WB, WC);
}
if (inst.Rc)
ComputeRC(gpr.R(a), 0);
}
void JitArm64::rlwimix(UGeckoInstruction inst)
{
INSTRUCTION_START

View File

@ -84,6 +84,36 @@ void JitArm64::mcrf(UGeckoInstruction inst)
}
}
void JitArm64::mcrxr(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITSystemRegistersOff);
ARM64Reg WA = gpr.GetReg();
ARM64Reg XA = EncodeRegTo64(WA);
ARM64Reg WB = gpr.GetReg();
ARM64Reg XB = EncodeRegTo64(WB);
// Copy XER[0-3] into CR[inst.CRFD]
LDRB(INDEX_UNSIGNED, WA, PPC_REG, PPCSTATE_OFF(xer_ca));
LDRB(INDEX_UNSIGNED, WB, PPC_REG, PPCSTATE_OFF(xer_so_ov));
// [0 SO OV CA]
ADD(WA, WA, WB, ArithOption(WB, ST_LSL, 2));
// [SO OV CA 0] << 3
LSL(WA, WA, 4);
MOVP2R(XB, m_crTable);
LDR(XB, XB, XA);
STR(INDEX_UNSIGNED, XB, PPC_REG, PPCSTATE_OFF(cr_val[inst.CRFD]));
// Clear XER[0-3]
STRB(INDEX_UNSIGNED, WZR, PPC_REG, PPCSTATE_OFF(xer_ca));
STRB(INDEX_UNSIGNED, WZR, PPC_REG, PPCSTATE_OFF(xer_so_ov));
gpr.Unlock(WA, WB);
}
void JitArm64::mfsr(UGeckoInstruction inst)
{
INSTRUCTION_START

View File

@ -178,54 +178,54 @@ static GekkoOPTemplate table19[] = {
};
static GekkoOPTemplate table31[] = {
{266, &JitArm64::addx}, // addx
{778, &JitArm64::addx}, // addox
{10, &JitArm64::addcx}, // addcx
{522, &JitArm64::addcx}, // addcox
{138, &JitArm64::addex}, // addex
{650, &JitArm64::addex}, // addeox
{234, &JitArm64::FallBackToInterpreter}, // addmex
{746, &JitArm64::FallBackToInterpreter}, // addmeox
{202, &JitArm64::addzex}, // addzex
{714, &JitArm64::addzex}, // addzeox
{491, &JitArm64::FallBackToInterpreter}, // divwx
{1003, &JitArm64::FallBackToInterpreter}, // divwox
{459, &JitArm64::divwux}, // divwux
{971, &JitArm64::divwux}, // divwuox
{75, &JitArm64::mulhwx}, // mulhwx
{11, &JitArm64::mulhwux}, // mulhwux
{235, &JitArm64::mullwx}, // mullwx
{747, &JitArm64::mullwx}, // mullwox
{104, &JitArm64::negx}, // negx
{616, &JitArm64::negx}, // negox
{40, &JitArm64::subfx}, // subfx
{552, &JitArm64::subfx}, // subfox
{8, &JitArm64::subfcx}, // subfcx
{520, &JitArm64::subfcx}, // subfcox
{136, &JitArm64::subfex}, // subfex
{648, &JitArm64::subfex}, // subfeox
{232, &JitArm64::FallBackToInterpreter}, // subfmex
{744, &JitArm64::FallBackToInterpreter}, // subfmeox
{200, &JitArm64::FallBackToInterpreter}, // subfzex
{712, &JitArm64::FallBackToInterpreter}, // subfzeox
{266, &JitArm64::addx}, // addx
{778, &JitArm64::addx}, // addox
{10, &JitArm64::addcx}, // addcx
{522, &JitArm64::addcx}, // addcox
{138, &JitArm64::addex}, // addex
{650, &JitArm64::addex}, // addeox
{234, &JitArm64::FallBackToInterpreter}, // addmex
{746, &JitArm64::FallBackToInterpreter}, // addmeox
{202, &JitArm64::addzex}, // addzex
{714, &JitArm64::addzex}, // addzeox
{491, &JitArm64::divwx}, // divwx
{1003, &JitArm64::divwx}, // divwox
{459, &JitArm64::divwux}, // divwux
{971, &JitArm64::divwux}, // divwuox
{75, &JitArm64::mulhwx}, // mulhwx
{11, &JitArm64::mulhwux}, // mulhwux
{235, &JitArm64::mullwx}, // mullwx
{747, &JitArm64::mullwx}, // mullwox
{104, &JitArm64::negx}, // negx
{616, &JitArm64::negx}, // negox
{40, &JitArm64::subfx}, // subfx
{552, &JitArm64::subfx}, // subfox
{8, &JitArm64::subfcx}, // subfcx
{520, &JitArm64::subfcx}, // subfcox
{136, &JitArm64::subfex}, // subfex
{648, &JitArm64::subfex}, // subfeox
{232, &JitArm64::FallBackToInterpreter}, // subfmex
{744, &JitArm64::FallBackToInterpreter}, // subfmeox
{200, &JitArm64::subfzex}, // subfzex
{712, &JitArm64::subfzex}, // subfzeox
{28, &JitArm64::boolX}, // andx
{60, &JitArm64::boolX}, // andcx
{444, &JitArm64::boolX}, // orx
{124, &JitArm64::boolX}, // norx
{316, &JitArm64::boolX}, // xorx
{412, &JitArm64::boolX}, // orcx
{476, &JitArm64::boolX}, // nandx
{284, &JitArm64::boolX}, // eqvx
{0, &JitArm64::cmp}, // cmp
{32, &JitArm64::cmpl}, // cmpl
{26, &JitArm64::cntlzwx}, // cntlzwx
{922, &JitArm64::extsXx}, // extshx
{954, &JitArm64::extsXx}, // extsbx
{536, &JitArm64::srwx}, // srwx
{792, &JitArm64::FallBackToInterpreter}, // srawx
{824, &JitArm64::srawix}, // srawix
{24, &JitArm64::slwx}, // slwx
{28, &JitArm64::boolX}, // andx
{60, &JitArm64::boolX}, // andcx
{444, &JitArm64::boolX}, // orx
{124, &JitArm64::boolX}, // norx
{316, &JitArm64::boolX}, // xorx
{412, &JitArm64::boolX}, // orcx
{476, &JitArm64::boolX}, // nandx
{284, &JitArm64::boolX}, // eqvx
{0, &JitArm64::cmp}, // cmp
{32, &JitArm64::cmpl}, // cmpl
{26, &JitArm64::cntlzwx}, // cntlzwx
{922, &JitArm64::extsXx}, // extshx
{954, &JitArm64::extsXx}, // extsbx
{536, &JitArm64::srwx}, // srwx
{792, &JitArm64::srawx}, // srawx
{824, &JitArm64::srawix}, // srawix
{24, &JitArm64::slwx}, // slwx
{54, &JitArm64::dcbx}, // dcbst
{86, &JitArm64::dcbx}, // dcbf
@ -294,18 +294,18 @@ static GekkoOPTemplate table31[] = {
{759, &JitArm64::stfXX}, // stfdux
{983, &JitArm64::stfXX}, // stfiwx
{19, &JitArm64::mfcr}, // mfcr
{83, &JitArm64::mfmsr}, // mfmsr
{144, &JitArm64::mtcrf}, // mtcrf
{146, &JitArm64::mtmsr}, // mtmsr
{210, &JitArm64::mtsr}, // mtsr
{242, &JitArm64::mtsrin}, // mtsrin
{339, &JitArm64::mfspr}, // mfspr
{467, &JitArm64::mtspr}, // mtspr
{371, &JitArm64::mftb}, // mftb
{512, &JitArm64::FallBackToInterpreter}, // mcrxr
{595, &JitArm64::mfsr}, // mfsr
{659, &JitArm64::mfsrin}, // mfsrin
{19, &JitArm64::mfcr}, // mfcr
{83, &JitArm64::mfmsr}, // mfmsr
{144, &JitArm64::mtcrf}, // mtcrf
{146, &JitArm64::mtmsr}, // mtmsr
{210, &JitArm64::mtsr}, // mtsr
{242, &JitArm64::mtsrin}, // mtsrin
{339, &JitArm64::mfspr}, // mfspr
{467, &JitArm64::mtspr}, // mtspr
{371, &JitArm64::mftb}, // mftb
{512, &JitArm64::mcrxr}, // mcrxr
{595, &JitArm64::mfsr}, // mfsr
{659, &JitArm64::mfsrin}, // mfsrin
{4, &JitArm64::twx}, // tw
{598, &JitArm64::DoNothing}, // sync