mirror of
https://github.com/dolphin-emu/dolphin.git
synced 2025-01-25 15:31:17 +01:00
Initial unoptimized JITIL flag optimization.
This commit is contained in:
parent
5506e57ab8
commit
1429fccb97
@ -85,7 +85,7 @@ static unsigned regReadUse(RegInfo& R, InstLoc I) {
|
||||
return R.IInfo[I - R.FirstI] & 3;
|
||||
}
|
||||
|
||||
static unsigned SlotSet[1000];
|
||||
static u64 SlotSet[1000];
|
||||
static u8 GC_ALIGNED16(FSlotSet[16*1000]);
|
||||
|
||||
static OpArg regLocForSlot(RegInfo& RI, unsigned slot) {
|
||||
@ -107,7 +107,7 @@ static void regSpill(RegInfo& RI, X64Reg reg) {
|
||||
unsigned slot = regGetSpill(RI, RI.regs[reg]);
|
||||
if (!slot) {
|
||||
slot = regCreateSpill(RI, RI.regs[reg]);
|
||||
RI.Jit->MOV(32, regLocForSlot(RI, slot), R(reg));
|
||||
RI.Jit->MOV(64, regLocForSlot(RI, slot), R(reg));
|
||||
}
|
||||
RI.regs[reg] = nullptr;
|
||||
}
|
||||
@ -621,6 +621,8 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) {
|
||||
case FPDup1:
|
||||
case FSNeg:
|
||||
case FDNeg:
|
||||
case ConvertFromFastCR:
|
||||
case ConvertToFastCR:
|
||||
if (thisUsed)
|
||||
regMarkUse(RI, I, getOp1(I), 1);
|
||||
break;
|
||||
@ -763,8 +765,7 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) {
|
||||
if (!thisUsed) break;
|
||||
X64Reg reg = regFindFreeReg(RI);
|
||||
unsigned ppcreg = *I >> 8;
|
||||
// TODO(delroth): unbreak
|
||||
//Jit->MOVZX(32, 8, reg, M(&PowerPC::ppcState.cr_fast[ppcreg]));
|
||||
Jit->MOV(64, R(reg), M(&PowerPC::ppcState.cr_val[ppcreg]));
|
||||
RI.regs[reg] = I;
|
||||
break;
|
||||
}
|
||||
@ -814,11 +815,9 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) {
|
||||
break;
|
||||
}
|
||||
case StoreCR: {
|
||||
Jit->MOV(32, R(ECX), regLocForInst(RI, getOp1(I)));
|
||||
Jit->MOV(64, R(RCX), regLocForInst(RI, getOp1(I)));
|
||||
unsigned ppcreg = *I >> 16;
|
||||
// CAUTION: uses 8-bit reg!
|
||||
// TODO(delroth): Unbreak.
|
||||
//Jit->MOV(8, M(&PowerPC::ppcState.cr_fast[ppcreg]), R(ECX));
|
||||
Jit->MOV(64, M(&PowerPC::ppcState.cr_val[ppcreg]), R(RCX));
|
||||
regNormalRegClear(RI, I);
|
||||
break;
|
||||
}
|
||||
@ -1116,6 +1115,84 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) {
|
||||
regNormalRegClear(RI, I);
|
||||
break;
|
||||
}
|
||||
case ConvertFromFastCR:
|
||||
{
|
||||
if (!thisUsed) break;
|
||||
X64Reg cr_val = regUReg(RI, I);
|
||||
Jit->MOV(64, R(cr_val), regLocForInst(RI, getOp1(I)));
|
||||
|
||||
Jit->XOR(32, R(EAX), R(EAX));
|
||||
|
||||
// SO: Bit 61 set.
|
||||
Jit->MOV(64, R(RCX), R(cr_val));
|
||||
Jit->SHR(64, R(RCX), Imm8(61));
|
||||
Jit->AND(32, R(ECX), Imm8(1));
|
||||
Jit->OR(32, R(EAX), R(ECX));
|
||||
|
||||
// EQ: Bits 31-0 == 0.
|
||||
Jit->XOR(32, R(ECX), R(ECX));
|
||||
Jit->TEST(32, R(cr_val), R(cr_val));
|
||||
Jit->SETcc(CC_Z, R(ECX));
|
||||
Jit->SHL(32, R(ECX), Imm8(1));
|
||||
Jit->OR(32, R(EAX), R(ECX));
|
||||
|
||||
// GT: Value > 0.
|
||||
Jit->XOR(32, R(ECX), R(ECX));
|
||||
Jit->TEST(64, R(cr_val), R(cr_val));
|
||||
Jit->SETcc(CC_G, R(ECX));
|
||||
Jit->SHL(32, R(ECX), Imm8(2));
|
||||
Jit->OR(32, R(EAX), R(ECX));
|
||||
|
||||
// LT: Bit 62 set.
|
||||
Jit->MOV(64, R(ECX), R(cr_val));
|
||||
Jit->SHR(64, R(ECX), Imm8(62 - 3));
|
||||
Jit->AND(32, R(ECX), Imm8(0x8));
|
||||
Jit->OR(32, R(EAX), R(ECX));
|
||||
|
||||
Jit->MOV(32, R(cr_val), R(EAX));
|
||||
RI.regs[cr_val] = I;
|
||||
regNormalRegClear(RI, I);
|
||||
break;
|
||||
}
|
||||
case ConvertToFastCR:
|
||||
{
|
||||
if (!thisUsed) break;
|
||||
X64Reg cr_val = regUReg(RI, I);
|
||||
Jit->MOV(64, R(cr_val), regLocForInst(RI, getOp1(I)));
|
||||
|
||||
Jit->MOV(64, R(RCX), Imm64(1ull << 32));
|
||||
|
||||
// SO
|
||||
Jit->MOV(64, R(RAX), R(cr_val));
|
||||
Jit->SHL(64, R(RAX), Imm8(63));
|
||||
Jit->SHR(64, R(RAX), Imm8(63 - 61));
|
||||
Jit->OR(64, R(RCX), R(RAX));
|
||||
|
||||
// EQ
|
||||
Jit->MOV(64, R(RAX), R(cr_val));
|
||||
Jit->NOT(64, R(RAX));
|
||||
Jit->AND(64, R(RAX), Imm8(CR_EQ));
|
||||
Jit->OR(64, R(RCX), R(RAX));
|
||||
|
||||
// GT
|
||||
Jit->MOV(64, R(RAX), R(cr_val));
|
||||
Jit->NOT(64, R(RAX));
|
||||
Jit->AND(64, R(RAX), Imm8(CR_GT));
|
||||
Jit->SHL(64, R(RAX), Imm8(63 - 2));
|
||||
Jit->OR(64, R(RCX), R(RAX));
|
||||
|
||||
// LT
|
||||
Jit->MOV(64, R(RAX), R(cr_val));
|
||||
Jit->AND(64, R(RAX), Imm8(CR_LT));
|
||||
Jit->SHL(64, R(RAX), Imm8(62 - 3));
|
||||
Jit->OR(64, R(RCX), R(RAX));
|
||||
|
||||
Jit->MOV(64, R(cr_val), R(RCX));
|
||||
|
||||
RI.regs[cr_val] = I;
|
||||
regNormalRegClear(RI, I);
|
||||
break;
|
||||
}
|
||||
case LoadSingle: {
|
||||
if (!thisUsed) break;
|
||||
X64Reg reg = fregFindFreeReg(RI);
|
||||
|
@ -1130,7 +1130,7 @@ unsigned IRBuilder::getNumberOfOperands(InstLoc I) const {
|
||||
numberOfOperands[CInt32] = 0;
|
||||
|
||||
static unsigned ZeroOp[] = {LoadCR, LoadLink, LoadMSR, LoadGReg, LoadCTR, InterpreterBranch, LoadCarry, RFIExit, LoadFReg, LoadFRegDENToZero, LoadGQR, Int3, };
|
||||
static unsigned UOp[] = {StoreLink, BranchUncond, StoreCR, StoreMSR, StoreFPRF, StoreGReg, StoreCTR, Load8, Load16, Load32, SExt16, SExt8, Cntlzw, Not, StoreCarry, SystemCall, ShortIdleLoop, LoadSingle, LoadDouble, LoadPaired, StoreFReg, DupSingleToMReg, DupSingleToPacked, ExpandPackedToMReg, CompactMRegToPacked, FSNeg, FDNeg, FPDup0, FPDup1, FPNeg, DoubleToSingle, StoreGQR, StoreSRR, };
|
||||
static unsigned UOp[] = {StoreLink, BranchUncond, StoreCR, StoreMSR, StoreFPRF, StoreGReg, StoreCTR, Load8, Load16, Load32, SExt16, SExt8, Cntlzw, Not, StoreCarry, SystemCall, ShortIdleLoop, LoadSingle, LoadDouble, LoadPaired, StoreFReg, DupSingleToMReg, DupSingleToPacked, ExpandPackedToMReg, CompactMRegToPacked, FSNeg, FDNeg, FPDup0, FPDup1, FPNeg, DoubleToSingle, StoreGQR, StoreSRR, ConvertFromFastCR, ConvertToFastCR};
|
||||
static unsigned BiOp[] = {BranchCond, IdleBranch, And, Xor, Sub, Or, Add, Mul, Rol, Shl, Shrl, Sarl, ICmpEq, ICmpNe, ICmpUgt, ICmpUlt, ICmpSgt, ICmpSlt, ICmpSge, ICmpSle, Store8, Store16, Store32, ICmpCRSigned, ICmpCRUnsigned, FallBackToInterpreter, StoreSingle, StoreDouble, StorePaired, InsertDoubleInMReg, FSMul, FSAdd, FSSub, FDMul, FDAdd, FDSub, FPAdd, FPMul, FPSub, FPMerge00, FPMerge01, FPMerge10, FPMerge11, FDCmpCR, };
|
||||
for (auto& op : ZeroOp) {
|
||||
numberOfOperands[op] = 0;
|
||||
@ -1235,10 +1235,11 @@ static std::unique_ptr<Writer> writer;
|
||||
static const std::string opcodeNames[] = {
|
||||
"Nop", "LoadGReg", "LoadLink", "LoadCR", "LoadCarry", "LoadCTR",
|
||||
"LoadMSR", "LoadGQR", "SExt8", "SExt16", "BSwap32", "BSwap16", "Cntlzw",
|
||||
"Not", "Load8", "Load16", "Load32", "BranchUncond", "StoreGReg",
|
||||
"StoreCR", "StoreLink", "StoreCarry", "StoreCTR", "StoreMSR", "StoreFPRF",
|
||||
"StoreGQR", "StoreSRR", "FallBackToInterpreter", "Add", "Mul", "And", "Or",
|
||||
"Xor", "MulHighUnsigned", "Sub", "Shl", "Shrl", "Sarl", "Rol",
|
||||
"Not", "Load8", "Load16", "Load32", "BranchUncond", "ConvertFromFastCR",
|
||||
"ConvertToFastCR", "StoreGReg", "StoreCR", "StoreLink", "StoreCarry",
|
||||
"StoreCTR", "StoreMSR", "StoreFPRF", "StoreGQR", "StoreSRR",
|
||||
"FallBackToInterpreter", "Add", "Mul", "And", "Or", "Xor",
|
||||
"MulHighUnsigned", "Sub", "Shl", "Shrl", "Sarl", "Rol",
|
||||
"ICmpCRSigned", "ICmpCRUnsigned", "ICmpEq", "ICmpNe", "ICmpUgt",
|
||||
"ICmpUlt", "ICmpUge", "ICmpUle", "ICmpSgt", "ICmpSlt", "ICmpSge",
|
||||
"ICmpSle", "Store8", "Store16", "Store32", "BranchCond", "FResult_Start",
|
||||
|
@ -33,6 +33,9 @@ enum Opcode {
|
||||
Load8, // These loads zext
|
||||
Load16,
|
||||
Load32,
|
||||
// CR conversions
|
||||
ConvertFromFastCR,
|
||||
ConvertToFastCR,
|
||||
// Branches
|
||||
BranchUncond,
|
||||
// Register store operators
|
||||
@ -373,6 +376,12 @@ public:
|
||||
InstLoc EmitICmpCRUnsigned(InstLoc op1, InstLoc op2) {
|
||||
return FoldBiOp(ICmpCRUnsigned, op1, op2);
|
||||
}
|
||||
InstLoc EmitConvertFromFastCR(InstLoc op1) {
|
||||
return FoldUOp(ConvertFromFastCR, op1);
|
||||
}
|
||||
InstLoc EmitConvertToFastCR(InstLoc op1) {
|
||||
return FoldUOp(ConvertToFastCR, op1);
|
||||
}
|
||||
InstLoc EmitFallBackToInterpreter(InstLoc op1, InstLoc op2) {
|
||||
return FoldBiOp(FallBackToInterpreter, op1, op2);
|
||||
}
|
||||
|
@ -66,6 +66,7 @@ static IREmitter::InstLoc TestBranch(IREmitter::IRBuilder& ibuild, UGeckoInstruc
|
||||
if ((inst.BO & 16) == 0) // Test a CR bit
|
||||
{
|
||||
IREmitter::InstLoc CRReg = ibuild.EmitLoadCR(inst.BI >> 2);
|
||||
CRReg = ibuild.EmitConvertFromFastCR(CRReg);
|
||||
IREmitter::InstLoc CRCmp = ibuild.EmitIntConst(8 >> (inst.BI & 3));
|
||||
CRTest = ibuild.EmitAnd(CRReg, CRCmp);
|
||||
if (!(inst.BO & 8))
|
||||
@ -141,6 +142,7 @@ void JitILBase::bcctrx(UGeckoInstruction inst)
|
||||
if ((inst.BO & 16) == 0) // Test a CR bit
|
||||
{
|
||||
IREmitter::InstLoc CRReg = ibuild.EmitLoadCR(inst.BI >> 2);
|
||||
CRReg = ibuild.EmitConvertFromFastCR(CRReg);
|
||||
IREmitter::InstLoc CRCmp = ibuild.EmitIntConst(8 >> (inst.BI & 3));
|
||||
test = ibuild.EmitAnd(CRReg, CRCmp);
|
||||
if (!(inst.BO & 8))
|
||||
|
@ -86,7 +86,7 @@ void JitILBase::fcmpx(UGeckoInstruction inst)
|
||||
int ordered = (inst.SUBOP10 == 32) ? 1 : 0;
|
||||
res = ibuild.EmitFDCmpCR(lhs, rhs, ordered);
|
||||
ibuild.EmitStoreFPRF(res);
|
||||
ibuild.EmitStoreCR(res, inst.CRFD);
|
||||
ibuild.EmitStoreCR(ibuild.EmitConvertToFastCR(res), inst.CRFD);
|
||||
}
|
||||
|
||||
void JitILBase::fsign(UGeckoInstruction inst)
|
||||
|
@ -12,7 +12,7 @@ static void ComputeRC(IREmitter::IRBuilder& ibuild, IREmitter::InstLoc val)
|
||||
{
|
||||
IREmitter::InstLoc res =
|
||||
ibuild.EmitICmpCRSigned(val, ibuild.EmitIntConst(0));
|
||||
ibuild.EmitStoreCR(res, 0);
|
||||
ibuild.EmitStoreCR(ibuild.EmitConvertToFastCR(res), 0);
|
||||
}
|
||||
|
||||
void JitILBase::reg_imm(UGeckoInstruction inst)
|
||||
@ -114,7 +114,7 @@ void JitILBase::cmpXX(UGeckoInstruction inst)
|
||||
|
||||
js.downcountAmount++; //TODO: should this be somewhere else?
|
||||
|
||||
ibuild.EmitStoreCR(res, inst.CRFD);
|
||||
ibuild.EmitStoreCR(ibuild.EmitConvertToFastCR(res), inst.CRFD);
|
||||
}
|
||||
|
||||
void JitILBase::boolX(UGeckoInstruction inst)
|
||||
|
@ -107,8 +107,10 @@ void JitILBase::mfcr(UGeckoInstruction inst)
|
||||
IREmitter::InstLoc d = ibuild.EmitIntConst(0);
|
||||
for (int i = 0; i < 8; ++i)
|
||||
{
|
||||
d = ibuild.EmitShl(d, ibuild.EmitIntConst(4));
|
||||
d = ibuild.EmitOr(d, ibuild.EmitLoadCR(i));
|
||||
IREmitter::InstLoc cr = ibuild.EmitLoadCR(i);
|
||||
cr = ibuild.EmitConvertFromFastCR(cr);
|
||||
cr = ibuild.EmitShl(cr, ibuild.EmitIntConst(28 - 4 * i));
|
||||
d = ibuild.EmitOr(d, cr);
|
||||
}
|
||||
ibuild.EmitStoreGReg(d, inst.RD);
|
||||
}
|
||||
@ -126,6 +128,7 @@ void JitILBase::mtcrf(UGeckoInstruction inst)
|
||||
IREmitter::InstLoc value;
|
||||
value = ibuild.EmitShrl(s, ibuild.EmitIntConst(28 - i * 4));
|
||||
value = ibuild.EmitAnd(value, ibuild.EmitIntConst(0xF));
|
||||
value = ibuild.EmitConvertToFastCR(value);
|
||||
ibuild.EmitStoreCR(value, i);
|
||||
}
|
||||
}
|
||||
@ -150,6 +153,7 @@ void JitILBase::crXX(UGeckoInstruction inst)
|
||||
// Get bit CRBA in EAX aligned with bit CRBD
|
||||
const int shiftA = (inst.CRBD & 3) - (inst.CRBA & 3);
|
||||
IREmitter::InstLoc eax = ibuild.EmitLoadCR(inst.CRBA >> 2);
|
||||
eax = ibuild.EmitConvertFromFastCR(eax);
|
||||
if (shiftA < 0)
|
||||
eax = ibuild.EmitShl(eax, ibuild.EmitIntConst(-shiftA));
|
||||
else if (shiftA > 0)
|
||||
@ -158,6 +162,7 @@ void JitILBase::crXX(UGeckoInstruction inst)
|
||||
// Get bit CRBB in ECX aligned with bit CRBD
|
||||
const int shiftB = (inst.CRBD & 3) - (inst.CRBB & 3);
|
||||
IREmitter::InstLoc ecx = ibuild.EmitLoadCR(inst.CRBB >> 2);
|
||||
ecx = ibuild.EmitConvertFromFastCR(ecx);
|
||||
if (shiftB < 0)
|
||||
ecx = ibuild.EmitShl(ecx, ibuild.EmitIntConst(-shiftB));
|
||||
else if (shiftB > 0)
|
||||
@ -211,7 +216,9 @@ void JitILBase::crXX(UGeckoInstruction inst)
|
||||
// Store result bit in CRBD
|
||||
eax = ibuild.EmitAnd(eax, ibuild.EmitIntConst(0x8 >> (inst.CRBD & 3)));
|
||||
IREmitter::InstLoc bd = ibuild.EmitLoadCR(inst.CRBD >> 2);
|
||||
bd = ibuild.EmitConvertFromFastCR(bd);
|
||||
bd = ibuild.EmitAnd(bd, ibuild.EmitIntConst(~(0x8 >> (inst.CRBD & 3))));
|
||||
bd = ibuild.EmitOr(bd, eax);
|
||||
bd = ibuild.EmitConvertToFastCR(bd);
|
||||
ibuild.EmitStoreCR(bd, inst.CRBD >> 2);
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user