JITIL: optimize branches.

This commit is contained in:
magumagu 2014-06-23 21:43:38 -07:00 committed by Pierre Bourdon
parent 79cc000d62
commit 5bb428c685
4 changed files with 101 additions and 16 deletions

View File

@ -665,6 +665,10 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) {
case FDNeg:
case ConvertFromFastCR:
case ConvertToFastCR:
case FastCRSOSet:
case FastCREQSet:
case FastCRGTSet:
case FastCRLTSet:
if (thisUsed)
regMarkUse(RI, I, getOp1(I), 1);
break;
@ -1209,6 +1213,52 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) {
regNormalRegClear(RI, I);
break;
}
case FastCRSOSet:
{
if (!thisUsed) break;
X64Reg reg = regUReg(RI, I);
Jit->MOV(64, R(RAX), Imm64(1ull << 61));
Jit->TEST(64, regLocForInst(RI, getOp1(I)), R(RAX));
Jit->SETcc(CC_NZ, R(AL));
Jit->MOVZX(32, 8, reg, R(AL));
RI.regs[reg] = I;
regNormalRegClear(RI, I);
break;
}
case FastCREQSet:
{
if (!thisUsed) break;
X64Reg reg = regUReg(RI, I);
Jit->CMP(32, regLocForInst(RI, getOp1(I)), Imm32(0));
Jit->SETcc(CC_Z, R(AL));
Jit->MOVZX(32, 8, reg, R(AL));
RI.regs[reg] = I;
regNormalRegClear(RI, I);
break;
}
case FastCRGTSet:
{
if (!thisUsed) break;
X64Reg reg = regUReg(RI, I);
Jit->CMP(64, regLocForInst(RI, getOp1(I)), Imm8(0));
Jit->SETcc(CC_G, R(AL));
Jit->MOVZX(32, 8, reg, R(AL));
RI.regs[reg] = I;
regNormalRegClear(RI, I);
break;
}
case FastCRLTSet:
{
if (!thisUsed) break;
X64Reg reg = regUReg(RI, I);
Jit->MOV(64, R(RAX), Imm64(1ull << 62));
Jit->TEST(64, regLocForInst(RI, getOp1(I)), R(RAX));
Jit->SETcc(CC_NZ, R(AL));
Jit->MOVZX(32, 8, reg, R(AL));
RI.regs[reg] = I;
regNormalRegClear(RI, I);
break;
}
case LoadSingle: {
if (!thisUsed) break;
X64Reg reg = fregFindFreeReg(RI);

View File

@ -1065,9 +1065,9 @@ unsigned IRBuilder::getNumberOfOperands(InstLoc I) const {
numberOfOperands[CInt16] = 0;
numberOfOperands[CInt32] = 0;
static unsigned ZeroOp[] = {LoadCR, LoadLink, LoadMSR, LoadGReg, LoadCTR, InterpreterBranch, LoadCarry, RFIExit, LoadFReg, LoadFRegDENToZero, LoadGQR, Int3, };
static unsigned UOp[] = {StoreLink, BranchUncond, StoreCR, StoreMSR, StoreFPRF, StoreGReg, StoreCTR, Load8, Load16, Load32, SExt16, SExt8, Cntlzw, Not, StoreCarry, SystemCall, ShortIdleLoop, LoadSingle, LoadDouble, LoadPaired, StoreFReg, DupSingleToMReg, DupSingleToPacked, ExpandPackedToMReg, CompactMRegToPacked, FSNeg, FDNeg, FPDup0, FPDup1, FPNeg, DoubleToSingle, StoreGQR, StoreSRR, ConvertFromFastCR, ConvertToFastCR};
static unsigned BiOp[] = {BranchCond, IdleBranch, And, Xor, Sub, Or, Add, Mul, Rol, Shl, Shrl, Sarl, ICmpEq, ICmpNe, ICmpUgt, ICmpUlt, ICmpSgt, ICmpSlt, ICmpSge, ICmpSle, Store8, Store16, Store32, ICmpCRSigned, ICmpCRUnsigned, FallBackToInterpreter, StoreSingle, StoreDouble, StorePaired, InsertDoubleInMReg, FSMul, FSAdd, FSSub, FDMul, FDAdd, FDSub, FPAdd, FPMul, FPSub, FPMerge00, FPMerge01, FPMerge10, FPMerge11, FDCmpCR, };
static unsigned ZeroOp[] = { LoadCR, LoadLink, LoadMSR, LoadGReg, LoadCTR, InterpreterBranch, LoadCarry, RFIExit, LoadFReg, LoadFRegDENToZero, LoadGQR, Int3, };
static unsigned UOp[] = { StoreLink, BranchUncond, StoreCR, StoreMSR, StoreFPRF, StoreGReg, StoreCTR, Load8, Load16, Load32, SExt16, SExt8, Cntlzw, Not, StoreCarry, SystemCall, ShortIdleLoop, LoadSingle, LoadDouble, LoadPaired, StoreFReg, DupSingleToMReg, DupSingleToPacked, ExpandPackedToMReg, CompactMRegToPacked, FSNeg, FDNeg, FPDup0, FPDup1, FPNeg, DoubleToSingle, StoreGQR, StoreSRR, ConvertFromFastCR, ConvertToFastCR, FastCRSOSet, FastCREQSet, FastCRGTSet, FastCRLTSet, };
static unsigned BiOp[] = { BranchCond, IdleBranch, And, Xor, Sub, Or, Add, Mul, Rol, Shl, Shrl, Sarl, ICmpEq, ICmpNe, ICmpUgt, ICmpUlt, ICmpSgt, ICmpSlt, ICmpSge, ICmpSle, Store8, Store16, Store32, ICmpCRSigned, ICmpCRUnsigned, FallBackToInterpreter, StoreSingle, StoreDouble, StorePaired, InsertDoubleInMReg, FSMul, FSAdd, FSSub, FDMul, FDAdd, FDSub, FPAdd, FPMul, FPSub, FPMerge00, FPMerge01, FPMerge10, FPMerge11, FDCmpCR, };
for (auto& op : ZeroOp) {
numberOfOperands[op] = 0;
}
@ -1174,6 +1174,7 @@ static const std::string opcodeNames[] = {
"Not", "Load8", "Load16", "Load32", "BranchUncond", "ConvertFromFastCR",
"ConvertToFastCR", "StoreGReg", "StoreCR", "StoreLink", "StoreCarry",
"StoreCTR", "StoreMSR", "StoreFPRF", "StoreGQR", "StoreSRR",
"FastCRSOSet", "FastCREQSet", "FastCRGTSet", "FastCRLTSet",
"FallBackToInterpreter", "Add", "Mul", "And", "Or", "Xor",
"MulHighUnsigned", "Sub", "Shl", "Shrl", "Sarl", "Rol",
"ICmpCRSigned", "ICmpCRUnsigned", "ICmpEq", "ICmpNe", "ICmpUgt",

View File

@ -48,6 +48,11 @@ enum Opcode {
StoreFPRF,
StoreGQR,
StoreSRR,
// Branch conditions
FastCRSOSet,
FastCREQSet,
FastCRGTSet,
FastCRLTSet,
// Arbitrary interpreter instruction
FallBackToInterpreter,
@ -77,6 +82,7 @@ enum Opcode {
ICmpSlt,
ICmpSge,
ICmpSle, // Opposite of sgt
// Memory store operators
Store8,
Store16,
@ -382,6 +388,18 @@ public:
InstLoc EmitConvertToFastCR(InstLoc op1) {
return FoldUOp(ConvertToFastCR, op1);
}
InstLoc EmitFastCRSOSet(InstLoc op1) {
return FoldUOp(FastCRSOSet, op1);
}
InstLoc EmitFastCREQSet(InstLoc op1) {
return FoldUOp(FastCREQSet, op1);
}
InstLoc EmitFastCRLTSet(InstLoc op1) {
return FoldUOp(FastCRLTSet, op1);
}
InstLoc EmitFastCRGTSet(InstLoc op1) {
return FoldUOp(FastCRGTSet, op1);
}
InstLoc EmitFallBackToInterpreter(InstLoc op1, InstLoc op2) {
return FoldBiOp(FallBackToInterpreter, op1, op2);
}

View File

@ -61,16 +61,35 @@ void JitILBase::bx(UGeckoInstruction inst)
ibuild.EmitBranchUncond(ibuild.EmitIntConst(destination));
}
static IREmitter::InstLoc EmitCRTest(IREmitter::IRBuilder& ibuild, UGeckoInstruction inst)
{
IREmitter::InstLoc CRReg = ibuild.EmitLoadCR(inst.BI >> 2);
IREmitter::InstLoc CRTest;
switch (3 - (inst.BI & 3))
{
case CR_SO_BIT:
CRTest = ibuild.EmitFastCRSOSet(CRReg);
break;
case CR_EQ_BIT:
CRTest = ibuild.EmitFastCREQSet(CRReg);
break;
case CR_GT_BIT:
CRTest = ibuild.EmitFastCRGTSet(CRReg);
break;
case CR_LT_BIT:
CRTest = ibuild.EmitFastCRLTSet(CRReg);
break;
}
if (!(inst.BO & 8))
CRTest = ibuild.EmitXor(CRTest, ibuild.EmitIntConst(1));
return CRTest;
}
static IREmitter::InstLoc TestBranch(IREmitter::IRBuilder& ibuild, UGeckoInstruction inst) {
IREmitter::InstLoc CRTest = nullptr, CTRTest = nullptr;
if ((inst.BO & 16) == 0) // Test a CR bit
{
IREmitter::InstLoc CRReg = ibuild.EmitLoadCR(inst.BI >> 2);
CRReg = ibuild.EmitConvertFromFastCR(CRReg);
IREmitter::InstLoc CRCmp = ibuild.EmitIntConst(8 >> (inst.BI & 3));
CRTest = ibuild.EmitAnd(CRReg, CRCmp);
if (!(inst.BO & 8))
CRTest = ibuild.EmitXor(CRCmp, CRTest);
CRTest = EmitCRTest(ibuild, inst);
}
if ((inst.BO & 4) == 0) {
@ -141,13 +160,10 @@ void JitILBase::bcctrx(UGeckoInstruction inst)
IREmitter::InstLoc test;
if ((inst.BO & 16) == 0) // Test a CR bit
{
IREmitter::InstLoc CRReg = ibuild.EmitLoadCR(inst.BI >> 2);
CRReg = ibuild.EmitConvertFromFastCR(CRReg);
IREmitter::InstLoc CRCmp = ibuild.EmitIntConst(8 >> (inst.BI & 3));
test = ibuild.EmitAnd(CRReg, CRCmp);
if (!(inst.BO & 8))
test = ibuild.EmitXor(test, CRCmp);
} else {
test = EmitCRTest(ibuild, inst);
}
else
{
test = ibuild.EmitIntConst(1);
}
test = ibuild.EmitICmpEq(test, ibuild.EmitIntConst(0));