Initial unoptimized JITIL flag optimization.

This commit is contained in:
magumagu 2014-06-23 19:19:22 -07:00 committed by Pierre Bourdon
parent 5506e57ab8
commit 1429fccb97
7 changed files with 114 additions and 18 deletions

View File

@ -85,7 +85,7 @@ static unsigned regReadUse(RegInfo& R, InstLoc I) {
return R.IInfo[I - R.FirstI] & 3;
}
static unsigned SlotSet[1000];
static u64 SlotSet[1000];
static u8 GC_ALIGNED16(FSlotSet[16*1000]);
static OpArg regLocForSlot(RegInfo& RI, unsigned slot) {
@ -107,7 +107,7 @@ static void regSpill(RegInfo& RI, X64Reg reg) {
unsigned slot = regGetSpill(RI, RI.regs[reg]);
if (!slot) {
slot = regCreateSpill(RI, RI.regs[reg]);
RI.Jit->MOV(32, regLocForSlot(RI, slot), R(reg));
RI.Jit->MOV(64, regLocForSlot(RI, slot), R(reg));
}
RI.regs[reg] = nullptr;
}
@ -621,6 +621,8 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) {
case FPDup1:
case FSNeg:
case FDNeg:
case ConvertFromFastCR:
case ConvertToFastCR:
if (thisUsed)
regMarkUse(RI, I, getOp1(I), 1);
break;
@ -763,8 +765,7 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) {
if (!thisUsed) break;
X64Reg reg = regFindFreeReg(RI);
unsigned ppcreg = *I >> 8;
// TODO(delroth): unbreak
//Jit->MOVZX(32, 8, reg, M(&PowerPC::ppcState.cr_fast[ppcreg]));
Jit->MOV(64, R(reg), M(&PowerPC::ppcState.cr_val[ppcreg]));
RI.regs[reg] = I;
break;
}
@ -814,11 +815,9 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) {
break;
}
case StoreCR: {
Jit->MOV(32, R(ECX), regLocForInst(RI, getOp1(I)));
Jit->MOV(64, R(RCX), regLocForInst(RI, getOp1(I)));
unsigned ppcreg = *I >> 16;
// CAUTION: uses 8-bit reg!
// TODO(delroth): Unbreak.
//Jit->MOV(8, M(&PowerPC::ppcState.cr_fast[ppcreg]), R(ECX));
Jit->MOV(64, M(&PowerPC::ppcState.cr_val[ppcreg]), R(RCX));
regNormalRegClear(RI, I);
break;
}
@ -1116,6 +1115,84 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) {
regNormalRegClear(RI, I);
break;
}
case ConvertFromFastCR:
{
if (!thisUsed) break;
X64Reg cr_val = regUReg(RI, I);
Jit->MOV(64, R(cr_val), regLocForInst(RI, getOp1(I)));
Jit->XOR(32, R(EAX), R(EAX));
// SO: Bit 61 set.
Jit->MOV(64, R(RCX), R(cr_val));
Jit->SHR(64, R(RCX), Imm8(61));
Jit->AND(32, R(ECX), Imm8(1));
Jit->OR(32, R(EAX), R(ECX));
// EQ: Bits 31-0 == 0.
Jit->XOR(32, R(ECX), R(ECX));
Jit->TEST(32, R(cr_val), R(cr_val));
Jit->SETcc(CC_Z, R(ECX));
Jit->SHL(32, R(ECX), Imm8(1));
Jit->OR(32, R(EAX), R(ECX));
// GT: Value > 0.
Jit->XOR(32, R(ECX), R(ECX));
Jit->TEST(64, R(cr_val), R(cr_val));
Jit->SETcc(CC_G, R(ECX));
Jit->SHL(32, R(ECX), Imm8(2));
Jit->OR(32, R(EAX), R(ECX));
// LT: Bit 62 set.
Jit->MOV(64, R(ECX), R(cr_val));
Jit->SHR(64, R(ECX), Imm8(62 - 3));
Jit->AND(32, R(ECX), Imm8(0x8));
Jit->OR(32, R(EAX), R(ECX));
Jit->MOV(32, R(cr_val), R(EAX));
RI.regs[cr_val] = I;
regNormalRegClear(RI, I);
break;
}
case ConvertToFastCR:
{
if (!thisUsed) break;
X64Reg cr_val = regUReg(RI, I);
Jit->MOV(64, R(cr_val), regLocForInst(RI, getOp1(I)));
Jit->MOV(64, R(RCX), Imm64(1ull << 32));
// SO
Jit->MOV(64, R(RAX), R(cr_val));
Jit->SHL(64, R(RAX), Imm8(63));
Jit->SHR(64, R(RAX), Imm8(63 - 61));
Jit->OR(64, R(RCX), R(RAX));
// EQ
Jit->MOV(64, R(RAX), R(cr_val));
Jit->NOT(64, R(RAX));
Jit->AND(64, R(RAX), Imm8(CR_EQ));
Jit->OR(64, R(RCX), R(RAX));
// GT
Jit->MOV(64, R(RAX), R(cr_val));
Jit->NOT(64, R(RAX));
Jit->AND(64, R(RAX), Imm8(CR_GT));
Jit->SHL(64, R(RAX), Imm8(63 - 2));
Jit->OR(64, R(RCX), R(RAX));
// LT
Jit->MOV(64, R(RAX), R(cr_val));
Jit->AND(64, R(RAX), Imm8(CR_LT));
Jit->SHL(64, R(RAX), Imm8(62 - 3));
Jit->OR(64, R(RCX), R(RAX));
Jit->MOV(64, R(cr_val), R(RCX));
RI.regs[cr_val] = I;
regNormalRegClear(RI, I);
break;
}
case LoadSingle: {
if (!thisUsed) break;
X64Reg reg = fregFindFreeReg(RI);

View File

@ -1130,7 +1130,7 @@ unsigned IRBuilder::getNumberOfOperands(InstLoc I) const {
numberOfOperands[CInt32] = 0;
static unsigned ZeroOp[] = {LoadCR, LoadLink, LoadMSR, LoadGReg, LoadCTR, InterpreterBranch, LoadCarry, RFIExit, LoadFReg, LoadFRegDENToZero, LoadGQR, Int3, };
static unsigned UOp[] = {StoreLink, BranchUncond, StoreCR, StoreMSR, StoreFPRF, StoreGReg, StoreCTR, Load8, Load16, Load32, SExt16, SExt8, Cntlzw, Not, StoreCarry, SystemCall, ShortIdleLoop, LoadSingle, LoadDouble, LoadPaired, StoreFReg, DupSingleToMReg, DupSingleToPacked, ExpandPackedToMReg, CompactMRegToPacked, FSNeg, FDNeg, FPDup0, FPDup1, FPNeg, DoubleToSingle, StoreGQR, StoreSRR, };
static unsigned UOp[] = {StoreLink, BranchUncond, StoreCR, StoreMSR, StoreFPRF, StoreGReg, StoreCTR, Load8, Load16, Load32, SExt16, SExt8, Cntlzw, Not, StoreCarry, SystemCall, ShortIdleLoop, LoadSingle, LoadDouble, LoadPaired, StoreFReg, DupSingleToMReg, DupSingleToPacked, ExpandPackedToMReg, CompactMRegToPacked, FSNeg, FDNeg, FPDup0, FPDup1, FPNeg, DoubleToSingle, StoreGQR, StoreSRR, ConvertFromFastCR, ConvertToFastCR};
static unsigned BiOp[] = {BranchCond, IdleBranch, And, Xor, Sub, Or, Add, Mul, Rol, Shl, Shrl, Sarl, ICmpEq, ICmpNe, ICmpUgt, ICmpUlt, ICmpSgt, ICmpSlt, ICmpSge, ICmpSle, Store8, Store16, Store32, ICmpCRSigned, ICmpCRUnsigned, FallBackToInterpreter, StoreSingle, StoreDouble, StorePaired, InsertDoubleInMReg, FSMul, FSAdd, FSSub, FDMul, FDAdd, FDSub, FPAdd, FPMul, FPSub, FPMerge00, FPMerge01, FPMerge10, FPMerge11, FDCmpCR, };
for (auto& op : ZeroOp) {
numberOfOperands[op] = 0;
@ -1235,10 +1235,11 @@ static std::unique_ptr<Writer> writer;
static const std::string opcodeNames[] = {
"Nop", "LoadGReg", "LoadLink", "LoadCR", "LoadCarry", "LoadCTR",
"LoadMSR", "LoadGQR", "SExt8", "SExt16", "BSwap32", "BSwap16", "Cntlzw",
"Not", "Load8", "Load16", "Load32", "BranchUncond", "StoreGReg",
"StoreCR", "StoreLink", "StoreCarry", "StoreCTR", "StoreMSR", "StoreFPRF",
"StoreGQR", "StoreSRR", "FallBackToInterpreter", "Add", "Mul", "And", "Or",
"Xor", "MulHighUnsigned", "Sub", "Shl", "Shrl", "Sarl", "Rol",
"Not", "Load8", "Load16", "Load32", "BranchUncond", "ConvertFromFastCR",
"ConvertToFastCR", "StoreGReg", "StoreCR", "StoreLink", "StoreCarry",
"StoreCTR", "StoreMSR", "StoreFPRF", "StoreGQR", "StoreSRR",
"FallBackToInterpreter", "Add", "Mul", "And", "Or", "Xor",
"MulHighUnsigned", "Sub", "Shl", "Shrl", "Sarl", "Rol",
"ICmpCRSigned", "ICmpCRUnsigned", "ICmpEq", "ICmpNe", "ICmpUgt",
"ICmpUlt", "ICmpUge", "ICmpUle", "ICmpSgt", "ICmpSlt", "ICmpSge",
"ICmpSle", "Store8", "Store16", "Store32", "BranchCond", "FResult_Start",

View File

@ -33,6 +33,9 @@ enum Opcode {
Load8, // These loads zext
Load16,
Load32,
// CR conversions
ConvertFromFastCR,
ConvertToFastCR,
// Branches
BranchUncond,
// Register store operators
@ -373,6 +376,12 @@ public:
InstLoc EmitICmpCRUnsigned(InstLoc op1, InstLoc op2) {
return FoldBiOp(ICmpCRUnsigned, op1, op2);
}
InstLoc EmitConvertFromFastCR(InstLoc op1) {
return FoldUOp(ConvertFromFastCR, op1);
}
InstLoc EmitConvertToFastCR(InstLoc op1) {
return FoldUOp(ConvertToFastCR, op1);
}
InstLoc EmitFallBackToInterpreter(InstLoc op1, InstLoc op2) {
return FoldBiOp(FallBackToInterpreter, op1, op2);
}

View File

@ -66,6 +66,7 @@ static IREmitter::InstLoc TestBranch(IREmitter::IRBuilder& ibuild, UGeckoInstruc
if ((inst.BO & 16) == 0) // Test a CR bit
{
IREmitter::InstLoc CRReg = ibuild.EmitLoadCR(inst.BI >> 2);
CRReg = ibuild.EmitConvertFromFastCR(CRReg);
IREmitter::InstLoc CRCmp = ibuild.EmitIntConst(8 >> (inst.BI & 3));
CRTest = ibuild.EmitAnd(CRReg, CRCmp);
if (!(inst.BO & 8))
@ -141,6 +142,7 @@ void JitILBase::bcctrx(UGeckoInstruction inst)
if ((inst.BO & 16) == 0) // Test a CR bit
{
IREmitter::InstLoc CRReg = ibuild.EmitLoadCR(inst.BI >> 2);
CRReg = ibuild.EmitConvertFromFastCR(CRReg);
IREmitter::InstLoc CRCmp = ibuild.EmitIntConst(8 >> (inst.BI & 3));
test = ibuild.EmitAnd(CRReg, CRCmp);
if (!(inst.BO & 8))

View File

@ -86,7 +86,7 @@ void JitILBase::fcmpx(UGeckoInstruction inst)
int ordered = (inst.SUBOP10 == 32) ? 1 : 0;
res = ibuild.EmitFDCmpCR(lhs, rhs, ordered);
ibuild.EmitStoreFPRF(res);
ibuild.EmitStoreCR(res, inst.CRFD);
ibuild.EmitStoreCR(ibuild.EmitConvertToFastCR(res), inst.CRFD);
}
void JitILBase::fsign(UGeckoInstruction inst)

View File

@ -12,7 +12,7 @@ static void ComputeRC(IREmitter::IRBuilder& ibuild, IREmitter::InstLoc val)
{
IREmitter::InstLoc res =
ibuild.EmitICmpCRSigned(val, ibuild.EmitIntConst(0));
ibuild.EmitStoreCR(res, 0);
ibuild.EmitStoreCR(ibuild.EmitConvertToFastCR(res), 0);
}
void JitILBase::reg_imm(UGeckoInstruction inst)
@ -114,7 +114,7 @@ void JitILBase::cmpXX(UGeckoInstruction inst)
js.downcountAmount++; //TODO: should this be somewhere else?
ibuild.EmitStoreCR(res, inst.CRFD);
ibuild.EmitStoreCR(ibuild.EmitConvertToFastCR(res), inst.CRFD);
}
void JitILBase::boolX(UGeckoInstruction inst)

View File

@ -107,8 +107,10 @@ void JitILBase::mfcr(UGeckoInstruction inst)
IREmitter::InstLoc d = ibuild.EmitIntConst(0);
for (int i = 0; i < 8; ++i)
{
d = ibuild.EmitShl(d, ibuild.EmitIntConst(4));
d = ibuild.EmitOr(d, ibuild.EmitLoadCR(i));
IREmitter::InstLoc cr = ibuild.EmitLoadCR(i);
cr = ibuild.EmitConvertFromFastCR(cr);
cr = ibuild.EmitShl(cr, ibuild.EmitIntConst(28 - 4 * i));
d = ibuild.EmitOr(d, cr);
}
ibuild.EmitStoreGReg(d, inst.RD);
}
@ -126,6 +128,7 @@ void JitILBase::mtcrf(UGeckoInstruction inst)
IREmitter::InstLoc value;
value = ibuild.EmitShrl(s, ibuild.EmitIntConst(28 - i * 4));
value = ibuild.EmitAnd(value, ibuild.EmitIntConst(0xF));
value = ibuild.EmitConvertToFastCR(value);
ibuild.EmitStoreCR(value, i);
}
}
@ -150,6 +153,7 @@ void JitILBase::crXX(UGeckoInstruction inst)
// Get bit CRBA in EAX aligned with bit CRBD
const int shiftA = (inst.CRBD & 3) - (inst.CRBA & 3);
IREmitter::InstLoc eax = ibuild.EmitLoadCR(inst.CRBA >> 2);
eax = ibuild.EmitConvertFromFastCR(eax);
if (shiftA < 0)
eax = ibuild.EmitShl(eax, ibuild.EmitIntConst(-shiftA));
else if (shiftA > 0)
@ -158,6 +162,7 @@ void JitILBase::crXX(UGeckoInstruction inst)
// Get bit CRBB in ECX aligned with bit CRBD
const int shiftB = (inst.CRBD & 3) - (inst.CRBB & 3);
IREmitter::InstLoc ecx = ibuild.EmitLoadCR(inst.CRBB >> 2);
ecx = ibuild.EmitConvertFromFastCR(ecx);
if (shiftB < 0)
ecx = ibuild.EmitShl(ecx, ibuild.EmitIntConst(-shiftB));
else if (shiftB > 0)
@ -211,7 +216,9 @@ void JitILBase::crXX(UGeckoInstruction inst)
// Store result bit in CRBD
eax = ibuild.EmitAnd(eax, ibuild.EmitIntConst(0x8 >> (inst.CRBD & 3)));
IREmitter::InstLoc bd = ibuild.EmitLoadCR(inst.CRBD >> 2);
bd = ibuild.EmitConvertFromFastCR(bd);
bd = ibuild.EmitAnd(bd, ibuild.EmitIntConst(~(0x8 >> (inst.CRBD & 3))));
bd = ibuild.EmitOr(bd, eax);
bd = ibuild.EmitConvertToFastCR(bd);
ibuild.EmitStoreCR(bd, inst.CRBD >> 2);
}