From 5bb428c68543fea11233b3f3f7e74b6daa5013f7 Mon Sep 17 00:00:00 2001 From: magumagu Date: Mon, 23 Jun 2014 21:43:38 -0700 Subject: [PATCH] JITIL: optimize branches. --- Source/Core/Core/PowerPC/Jit64IL/IR_X86.cpp | 50 +++++++++++++++++++ Source/Core/Core/PowerPC/JitILCommon/IR.cpp | 7 +-- Source/Core/Core/PowerPC/JitILCommon/IR.h | 18 +++++++ .../PowerPC/JitILCommon/JitILBase_Branch.cpp | 42 +++++++++++----- 4 files changed, 101 insertions(+), 16 deletions(-) diff --git a/Source/Core/Core/PowerPC/Jit64IL/IR_X86.cpp b/Source/Core/Core/PowerPC/Jit64IL/IR_X86.cpp index e87d24ff76..94d61dc761 100644 --- a/Source/Core/Core/PowerPC/Jit64IL/IR_X86.cpp +++ b/Source/Core/Core/PowerPC/Jit64IL/IR_X86.cpp @@ -665,6 +665,10 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) { case FDNeg: case ConvertFromFastCR: case ConvertToFastCR: + case FastCRSOSet: + case FastCREQSet: + case FastCRGTSet: + case FastCRLTSet: if (thisUsed) regMarkUse(RI, I, getOp1(I), 1); break; @@ -1209,6 +1213,52 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) { regNormalRegClear(RI, I); break; } + case FastCRSOSet: + { + if (!thisUsed) break; + X64Reg reg = regUReg(RI, I); + Jit->MOV(64, R(RAX), Imm64(1ull << 61)); + Jit->TEST(64, regLocForInst(RI, getOp1(I)), R(RAX)); + Jit->SETcc(CC_NZ, R(AL)); + Jit->MOVZX(32, 8, reg, R(AL)); + RI.regs[reg] = I; + regNormalRegClear(RI, I); + break; + } + case FastCREQSet: + { + if (!thisUsed) break; + X64Reg reg = regUReg(RI, I); + Jit->CMP(32, regLocForInst(RI, getOp1(I)), Imm32(0)); + Jit->SETcc(CC_Z, R(AL)); + Jit->MOVZX(32, 8, reg, R(AL)); + RI.regs[reg] = I; + regNormalRegClear(RI, I); + break; + } + case FastCRGTSet: + { + if (!thisUsed) break; + X64Reg reg = regUReg(RI, I); + Jit->CMP(64, regLocForInst(RI, getOp1(I)), Imm8(0)); + Jit->SETcc(CC_G, R(AL)); + Jit->MOVZX(32, 8, reg, R(AL)); + RI.regs[reg] = I; + regNormalRegClear(RI, I); + break; + } + case FastCRLTSet: + { + if (!thisUsed) break; + X64Reg reg = regUReg(RI, I); + Jit->MOV(64, R(RAX), Imm64(1ull << 62)); + Jit->TEST(64, regLocForInst(RI, getOp1(I)), R(RAX)); + Jit->SETcc(CC_NZ, R(AL)); + Jit->MOVZX(32, 8, reg, R(AL)); + RI.regs[reg] = I; + regNormalRegClear(RI, I); + break; + } case LoadSingle: { if (!thisUsed) break; X64Reg reg = fregFindFreeReg(RI); diff --git a/Source/Core/Core/PowerPC/JitILCommon/IR.cpp b/Source/Core/Core/PowerPC/JitILCommon/IR.cpp index 032b953d3e..4125b21d0b 100644 --- a/Source/Core/Core/PowerPC/JitILCommon/IR.cpp +++ b/Source/Core/Core/PowerPC/JitILCommon/IR.cpp @@ -1065,9 +1065,9 @@ unsigned IRBuilder::getNumberOfOperands(InstLoc I) const { numberOfOperands[CInt16] = 0; numberOfOperands[CInt32] = 0; - static unsigned ZeroOp[] = {LoadCR, LoadLink, LoadMSR, LoadGReg, LoadCTR, InterpreterBranch, LoadCarry, RFIExit, LoadFReg, LoadFRegDENToZero, LoadGQR, Int3, }; - static unsigned UOp[] = {StoreLink, BranchUncond, StoreCR, StoreMSR, StoreFPRF, StoreGReg, StoreCTR, Load8, Load16, Load32, SExt16, SExt8, Cntlzw, Not, StoreCarry, SystemCall, ShortIdleLoop, LoadSingle, LoadDouble, LoadPaired, StoreFReg, DupSingleToMReg, DupSingleToPacked, ExpandPackedToMReg, CompactMRegToPacked, FSNeg, FDNeg, FPDup0, FPDup1, FPNeg, DoubleToSingle, StoreGQR, StoreSRR, ConvertFromFastCR, ConvertToFastCR}; - static unsigned BiOp[] = {BranchCond, IdleBranch, And, Xor, Sub, Or, Add, Mul, Rol, Shl, Shrl, Sarl, ICmpEq, ICmpNe, ICmpUgt, ICmpUlt, ICmpSgt, ICmpSlt, ICmpSge, ICmpSle, Store8, Store16, Store32, ICmpCRSigned, ICmpCRUnsigned, FallBackToInterpreter, StoreSingle, StoreDouble, StorePaired, InsertDoubleInMReg, FSMul, FSAdd, FSSub, FDMul, FDAdd, FDSub, FPAdd, FPMul, FPSub, FPMerge00, FPMerge01, FPMerge10, FPMerge11, FDCmpCR, }; + static unsigned ZeroOp[] = { LoadCR, LoadLink, LoadMSR, LoadGReg, LoadCTR, InterpreterBranch, LoadCarry, RFIExit, LoadFReg, LoadFRegDENToZero, LoadGQR, Int3, }; + static unsigned UOp[] = { StoreLink, BranchUncond, StoreCR, StoreMSR, StoreFPRF, StoreGReg, StoreCTR, Load8, Load16, Load32, SExt16, SExt8, Cntlzw, Not, StoreCarry, SystemCall, ShortIdleLoop, LoadSingle, LoadDouble, LoadPaired, StoreFReg, DupSingleToMReg, DupSingleToPacked, ExpandPackedToMReg, CompactMRegToPacked, FSNeg, FDNeg, FPDup0, FPDup1, FPNeg, DoubleToSingle, StoreGQR, StoreSRR, ConvertFromFastCR, ConvertToFastCR, FastCRSOSet, FastCREQSet, FastCRGTSet, FastCRLTSet, }; + static unsigned BiOp[] = { BranchCond, IdleBranch, And, Xor, Sub, Or, Add, Mul, Rol, Shl, Shrl, Sarl, ICmpEq, ICmpNe, ICmpUgt, ICmpUlt, ICmpSgt, ICmpSlt, ICmpSge, ICmpSle, Store8, Store16, Store32, ICmpCRSigned, ICmpCRUnsigned, FallBackToInterpreter, StoreSingle, StoreDouble, StorePaired, InsertDoubleInMReg, FSMul, FSAdd, FSSub, FDMul, FDAdd, FDSub, FPAdd, FPMul, FPSub, FPMerge00, FPMerge01, FPMerge10, FPMerge11, FDCmpCR, }; for (auto& op : ZeroOp) { numberOfOperands[op] = 0; } @@ -1174,6 +1174,7 @@ static const std::string opcodeNames[] = { "Not", "Load8", "Load16", "Load32", "BranchUncond", "ConvertFromFastCR", "ConvertToFastCR", "StoreGReg", "StoreCR", "StoreLink", "StoreCarry", "StoreCTR", "StoreMSR", "StoreFPRF", "StoreGQR", "StoreSRR", + "FastCRSOSet", "FastCREQSet", "FastCRGTSet", "FastCRLTSet", "FallBackToInterpreter", "Add", "Mul", "And", "Or", "Xor", "MulHighUnsigned", "Sub", "Shl", "Shrl", "Sarl", "Rol", "ICmpCRSigned", "ICmpCRUnsigned", "ICmpEq", "ICmpNe", "ICmpUgt", diff --git a/Source/Core/Core/PowerPC/JitILCommon/IR.h b/Source/Core/Core/PowerPC/JitILCommon/IR.h index c09de3b9b2..37887891e1 100644 --- a/Source/Core/Core/PowerPC/JitILCommon/IR.h +++ b/Source/Core/Core/PowerPC/JitILCommon/IR.h @@ -48,6 +48,11 @@ enum Opcode { StoreFPRF, StoreGQR, StoreSRR, + // Branch conditions + FastCRSOSet, + FastCREQSet, + FastCRGTSet, + FastCRLTSet, // Arbitrary interpreter instruction FallBackToInterpreter, @@ -77,6 +82,7 @@ enum Opcode { ICmpSlt, ICmpSge, ICmpSle, // Opposite of sgt + // Memory store operators Store8, Store16, @@ -382,6 +388,18 @@ public: InstLoc EmitConvertToFastCR(InstLoc op1) { return FoldUOp(ConvertToFastCR, op1); } + InstLoc EmitFastCRSOSet(InstLoc op1) { + return FoldUOp(FastCRSOSet, op1); + } + InstLoc EmitFastCREQSet(InstLoc op1) { + return FoldUOp(FastCREQSet, op1); + } + InstLoc EmitFastCRLTSet(InstLoc op1) { + return FoldUOp(FastCRLTSet, op1); + } + InstLoc EmitFastCRGTSet(InstLoc op1) { + return FoldUOp(FastCRGTSet, op1); + } InstLoc EmitFallBackToInterpreter(InstLoc op1, InstLoc op2) { return FoldBiOp(FallBackToInterpreter, op1, op2); } diff --git a/Source/Core/Core/PowerPC/JitILCommon/JitILBase_Branch.cpp b/Source/Core/Core/PowerPC/JitILCommon/JitILBase_Branch.cpp index 9cb80059ec..c60080e6a9 100644 --- a/Source/Core/Core/PowerPC/JitILCommon/JitILBase_Branch.cpp +++ b/Source/Core/Core/PowerPC/JitILCommon/JitILBase_Branch.cpp @@ -61,16 +61,35 @@ void JitILBase::bx(UGeckoInstruction inst) ibuild.EmitBranchUncond(ibuild.EmitIntConst(destination)); } +static IREmitter::InstLoc EmitCRTest(IREmitter::IRBuilder& ibuild, UGeckoInstruction inst) +{ + IREmitter::InstLoc CRReg = ibuild.EmitLoadCR(inst.BI >> 2); + IREmitter::InstLoc CRTest; + switch (3 - (inst.BI & 3)) + { + case CR_SO_BIT: + CRTest = ibuild.EmitFastCRSOSet(CRReg); + break; + case CR_EQ_BIT: + CRTest = ibuild.EmitFastCREQSet(CRReg); + break; + case CR_GT_BIT: + CRTest = ibuild.EmitFastCRGTSet(CRReg); + break; + case CR_LT_BIT: + CRTest = ibuild.EmitFastCRLTSet(CRReg); + break; + } + if (!(inst.BO & 8)) + CRTest = ibuild.EmitXor(CRTest, ibuild.EmitIntConst(1)); + return CRTest; +} + static IREmitter::InstLoc TestBranch(IREmitter::IRBuilder& ibuild, UGeckoInstruction inst) { IREmitter::InstLoc CRTest = nullptr, CTRTest = nullptr; if ((inst.BO & 16) == 0) // Test a CR bit { - IREmitter::InstLoc CRReg = ibuild.EmitLoadCR(inst.BI >> 2); - CRReg = ibuild.EmitConvertFromFastCR(CRReg); - IREmitter::InstLoc CRCmp = ibuild.EmitIntConst(8 >> (inst.BI & 3)); - CRTest = ibuild.EmitAnd(CRReg, CRCmp); - if (!(inst.BO & 8)) - CRTest = ibuild.EmitXor(CRCmp, CRTest); + CRTest = EmitCRTest(ibuild, inst); } if ((inst.BO & 4) == 0) { @@ -141,13 +160,10 @@ void JitILBase::bcctrx(UGeckoInstruction inst) IREmitter::InstLoc test; if ((inst.BO & 16) == 0) // Test a CR bit { - IREmitter::InstLoc CRReg = ibuild.EmitLoadCR(inst.BI >> 2); - CRReg = ibuild.EmitConvertFromFastCR(CRReg); - IREmitter::InstLoc CRCmp = ibuild.EmitIntConst(8 >> (inst.BI & 3)); - test = ibuild.EmitAnd(CRReg, CRCmp); - if (!(inst.BO & 8)) - test = ibuild.EmitXor(test, CRCmp); - } else { + test = EmitCRTest(ibuild, inst); + } + else + { test = ibuild.EmitIntConst(1); } test = ibuild.EmitICmpEq(test, ibuild.EmitIntConst(0));