Merge pull request #1021 from FioraAeterna/optimizeca3

JIT: Carry optimizations!
This commit is contained in:
comex 2014-09-14 15:08:08 -04:00
commit db7617248f
12 changed files with 361 additions and 317 deletions

View File

@ -34,7 +34,7 @@ static GekkoOPTemplate primarytable[] =
{10, Interpreter::cmpli, {"cmpli", OPTYPE_INTEGER, FL_IN_A | FL_SET_CRn, 1, 0, 0, 0}}, {10, Interpreter::cmpli, {"cmpli", OPTYPE_INTEGER, FL_IN_A | FL_SET_CRn, 1, 0, 0, 0}},
{11, Interpreter::cmpi, {"cmpi", OPTYPE_INTEGER, FL_IN_A | FL_SET_CRn, 1, 0, 0, 0}}, {11, Interpreter::cmpi, {"cmpi", OPTYPE_INTEGER, FL_IN_A | FL_SET_CRn, 1, 0, 0, 0}},
{12, Interpreter::addic, {"addic", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A | FL_SET_CA, 1, 0, 0, 0}}, {12, Interpreter::addic, {"addic", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A | FL_SET_CA, 1, 0, 0, 0}},
{13, Interpreter::addic_rc, {"addic_rc", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A | FL_SET_CR0, 1, 0, 0, 0}}, {13, Interpreter::addic_rc, {"addic_rc", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A | FL_SET_CA | FL_SET_CR0, 1, 0, 0, 0}},
{14, Interpreter::addi, {"addi", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A0, 1, 0, 0, 0}}, {14, Interpreter::addi, {"addi", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A0, 1, 0, 0, 0}},
{15, Interpreter::addis, {"addis", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A0, 1, 0, 0, 0}}, {15, Interpreter::addis, {"addis", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A0, 1, 0, 0, 0}},
@ -180,8 +180,8 @@ static GekkoOPTemplate table31[] =
{922, Interpreter::extshx, {"extshx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_RC_BIT, 1, 0, 0, 0}}, {922, Interpreter::extshx, {"extshx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_RC_BIT, 1, 0, 0, 0}},
{954, Interpreter::extsbx, {"extsbx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_RC_BIT, 1, 0, 0, 0}}, {954, Interpreter::extsbx, {"extsbx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_RC_BIT, 1, 0, 0, 0}},
{536, Interpreter::srwx, {"srwx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_B | FL_IN_S | FL_RC_BIT, 1, 0, 0, 0}}, {536, Interpreter::srwx, {"srwx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_B | FL_IN_S | FL_RC_BIT, 1, 0, 0, 0}},
{792, Interpreter::srawx, {"srawx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_B | FL_IN_S | FL_RC_BIT, 1, 0, 0, 0}}, {792, Interpreter::srawx, {"srawx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_B | FL_IN_S | FL_SET_CA | FL_RC_BIT, 1, 0, 0, 0}},
{824, Interpreter::srawix, {"srawix", OPTYPE_INTEGER, FL_OUT_A | FL_IN_B | FL_IN_S | FL_RC_BIT, 1, 0, 0, 0}}, {824, Interpreter::srawix, {"srawix", OPTYPE_INTEGER, FL_OUT_A | FL_IN_B | FL_IN_S | FL_SET_CA | FL_RC_BIT, 1, 0, 0, 0}},
{24, Interpreter::slwx, {"slwx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_B | FL_IN_S | FL_RC_BIT, 1, 0, 0, 0}}, {24, Interpreter::slwx, {"slwx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_B | FL_IN_S | FL_RC_BIT, 1, 0, 0, 0}},
{54, Interpreter::dcbst, {"dcbst", OPTYPE_DCACHE, 0, 5, 0, 0, 0}}, {54, Interpreter::dcbst, {"dcbst", OPTYPE_DCACHE, 0, 5, 0, 0, 0}},
@ -260,7 +260,7 @@ static GekkoOPTemplate table31[] =
{339, Interpreter::mfspr, {"mfspr", OPTYPE_SPR, FL_OUT_D, 1, 0, 0, 0}}, {339, Interpreter::mfspr, {"mfspr", OPTYPE_SPR, FL_OUT_D, 1, 0, 0, 0}},
{467, Interpreter::mtspr, {"mtspr", OPTYPE_SPR, 0, 2, 0, 0, 0}}, {467, Interpreter::mtspr, {"mtspr", OPTYPE_SPR, 0, 2, 0, 0, 0}},
{371, Interpreter::mftb, {"mftb", OPTYPE_SYSTEM, FL_OUT_D | FL_TIMER, 1, 0, 0, 0}}, {371, Interpreter::mftb, {"mftb", OPTYPE_SYSTEM, FL_OUT_D | FL_TIMER, 1, 0, 0, 0}},
{512, Interpreter::mcrxr, {"mcrxr", OPTYPE_SYSTEM, 0, 1, 0, 0, 0}}, {512, Interpreter::mcrxr, {"mcrxr", OPTYPE_SYSTEM, FL_READ_CA | FL_SET_CA, 1, 0, 0, 0}},
{595, Interpreter::mfsr, {"mfsr", OPTYPE_SYSTEM, FL_OUT_D, 3, 0, 0, 0}}, {595, Interpreter::mfsr, {"mfsr", OPTYPE_SYSTEM, FL_OUT_D, 3, 0, 0, 0}},
{659, Interpreter::mfsrin, {"mfsrin", OPTYPE_SYSTEM, FL_OUT_D, 3, 0, 0, 0}}, {659, Interpreter::mfsrin, {"mfsrin", OPTYPE_SYSTEM, FL_OUT_D, 3, 0, 0, 0}},
@ -280,26 +280,26 @@ static GekkoOPTemplate table31[] =
static GekkoOPTemplate table31_2[] = static GekkoOPTemplate table31_2[] =
{ {
{266, Interpreter::addx, {"addx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 1, 0, 0, 0}}, {266, Interpreter::addx, {"addx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 1, 0, 0, 0}},
{778, Interpreter::addx, {"addox", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 1, 0, 0, 0}}, {778, Interpreter::addx, {"addox", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 1, 0, 0, 0}},
{10, Interpreter::addcx, {"addcx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_SET_CA | FL_RC_BIT, 1, 0, 0, 0}}, {10, Interpreter::addcx, {"addcx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_SET_CA | FL_RC_BIT, 1, 0, 0, 0}},
{522, Interpreter::addcx, {"addcox", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_SET_CA | FL_RC_BIT, 1, 0, 0, 0}}, {522, Interpreter::addcx, {"addcox", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_SET_CA | FL_RC_BIT, 1, 0, 0, 0}},
{138, Interpreter::addex, {"addex", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | FL_SET_CA | FL_RC_BIT, 1, 0, 0, 0}}, {138, Interpreter::addex, {"addex", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | FL_SET_CA | FL_RC_BIT, 1, 0, 0, 0}},
{650, Interpreter::addex, {"addeox", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | FL_SET_CA | FL_RC_BIT, 1, 0, 0, 0}}, {650, Interpreter::addex, {"addeox", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | FL_SET_CA | FL_RC_BIT | FL_SET_OE, 1, 0, 0, 0}},
{234, Interpreter::addmex, {"addmex", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | FL_SET_CA | FL_RC_BIT, 1, 0, 0, 0}}, {234, Interpreter::addmex, {"addmex", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | FL_SET_CA | FL_RC_BIT, 1, 0, 0, 0}},
{202, Interpreter::addzex, {"addzex", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | FL_SET_CA | FL_RC_BIT, 1, 0, 0, 0}}, {202, Interpreter::addzex, {"addzex", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | FL_SET_CA | FL_RC_BIT, 1, 0, 0, 0}},
{491, Interpreter::divwx, {"divwx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 40, 0, 0, 0}}, {491, Interpreter::divwx, {"divwx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 40, 0, 0, 0}},
{1003, Interpreter::divwx, {"divwox", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 40, 0, 0, 0}}, {1003, Interpreter::divwx, {"divwox", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT | FL_SET_OE, 40, 0, 0, 0}},
{459, Interpreter::divwux, {"divwux", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 40, 0, 0, 0}}, {459, Interpreter::divwux, {"divwux", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 40, 0, 0, 0}},
{971, Interpreter::divwux, {"divwuox", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 40, 0, 0, 0}}, {971, Interpreter::divwux, {"divwuox", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT | FL_SET_OE, 40, 0, 0, 0}},
{75, Interpreter::mulhwx, {"mulhwx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 5, 0, 0, 0}}, {75, Interpreter::mulhwx, {"mulhwx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 5, 0, 0, 0}},
{11, Interpreter::mulhwux, {"mulhwux", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 5, 0, 0, 0}}, {11, Interpreter::mulhwux, {"mulhwux", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 5, 0, 0, 0}},
{235, Interpreter::mullwx, {"mullwx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 5, 0, 0, 0}}, {235, Interpreter::mullwx, {"mullwx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 5, 0, 0, 0}},
{747, Interpreter::mullwx, {"mullwox", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 5, 0, 0, 0}}, {747, Interpreter::mullwx, {"mullwox", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT | FL_SET_OE, 5, 0, 0, 0}},
{104, Interpreter::negx, {"negx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 1, 0, 0, 0}}, {104, Interpreter::negx, {"negx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 1, 0, 0, 0}},
{40, Interpreter::subfx, {"subfx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 1, 0, 0, 0}}, {40, Interpreter::subfx, {"subfx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 1, 0, 0, 0}},
{552, Interpreter::subfx, {"subox", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 1, 0, 0, 0}}, {552, Interpreter::subfx, {"subox", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT | FL_SET_OE, 1, 0, 0, 0}},
{8, Interpreter::subfcx, {"subfcx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_SET_CA | FL_RC_BIT, 1, 0, 0, 0}}, {8, Interpreter::subfcx, {"subfcx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_SET_CA | FL_RC_BIT, 1, 0, 0, 0}},
{520, Interpreter::subfcx, {"subfcox", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_SET_CA | FL_RC_BIT, 1, 0, 0, 0}}, {520, Interpreter::subfcx, {"subfcox", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_SET_CA | FL_RC_BIT | FL_SET_OE, 1, 0, 0, 0}},
{136, Interpreter::subfex, {"subfex", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | FL_SET_CA | FL_RC_BIT, 1, 0, 0, 0}}, {136, Interpreter::subfex, {"subfex", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | FL_SET_CA | FL_RC_BIT, 1, 0, 0, 0}},
{232, Interpreter::subfmex, {"subfmex", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | FL_SET_CA | FL_RC_BIT, 1, 0, 0, 0}}, {232, Interpreter::subfmex, {"subfmex", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | FL_SET_CA | FL_RC_BIT, 1, 0, 0, 0}},
{200, Interpreter::subfzex, {"subfzex", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | FL_SET_CA | FL_RC_BIT, 1, 0, 0, 0}}, {200, Interpreter::subfzex, {"subfzex", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | FL_SET_CA | FL_RC_BIT, 1, 0, 0, 0}},

View File

@ -178,6 +178,8 @@ void Jit64::Init()
code_block.m_gpa = &js.gpa; code_block.m_gpa = &js.gpa;
code_block.m_fpa = &js.fpa; code_block.m_fpa = &js.fpa;
analyzer.SetOption(PPCAnalyst::PPCAnalyzer::OPTION_CONDITIONAL_CONTINUE); analyzer.SetOption(PPCAnalyst::PPCAnalyzer::OPTION_CONDITIONAL_CONTINUE);
analyzer.SetOption(PPCAnalyst::PPCAnalyzer::OPTION_BRANCH_MERGE);
analyzer.SetOption(PPCAnalyst::PPCAnalyzer::OPTION_CARRY_MERGE);
} }
void Jit64::ClearCache() void Jit64::ClearCache()
@ -461,6 +463,8 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc
js.downcountAmount += PatchEngine::GetSpeedhackCycles(code_block.m_address); js.downcountAmount += PatchEngine::GetSpeedhackCycles(code_block.m_address);
js.skipnext = false; js.skipnext = false;
js.carryFlagSet = false;
js.carryFlagInverted = false;
js.compilerPC = nextPC; js.compilerPC = nextPC;
// Translate instructions // Translate instructions
for (u32 i = 0; i < code_block.m_num_instructions; i++) for (u32 i = 0; i < code_block.m_num_instructions; i++)
@ -492,6 +496,7 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc
// help peephole optimizations // help peephole optimizations
js.next_inst = ops[i + 1].inst; js.next_inst = ops[i + 1].inst;
js.next_compilerPC = ops[i + 1].address; js.next_compilerPC = ops[i + 1].address;
js.next_op = &ops[i + 1];
} }
if (jo.optimizeGatherPipe && js.fifoBytesThisBlock >= 32) if (jo.optimizeGatherPipe && js.fifoBytesThisBlock >= 32)

View File

@ -101,6 +101,8 @@ public:
void GenerateConstantOverflow(s64 val); void GenerateConstantOverflow(s64 val);
void GenerateOverflow(); void GenerateOverflow();
void FinalizeCarryOverflow(bool oe, bool inv = false); void FinalizeCarryOverflow(bool oe, bool inv = false);
void FinalizeCarry(Gen::CCFlags cond);
void FinalizeCarry(bool ca);
void ComputeRC(const Gen::OpArg & arg); void ComputeRC(const Gen::OpArg & arg);
// Use to extract bytes from a register using the regcache. offset is in bytes. // Use to extract bytes from a register using the regcache. offset is in bytes.
@ -139,7 +141,7 @@ public:
void DynaRunTable63(UGeckoInstruction _inst); void DynaRunTable63(UGeckoInstruction _inst);
void addx(UGeckoInstruction inst); void addx(UGeckoInstruction inst);
void addcx(UGeckoInstruction inst); void arithcx(UGeckoInstruction inst);
void mulli(UGeckoInstruction inst); void mulli(UGeckoInstruction inst);
void mulhwXx(UGeckoInstruction inst); void mulhwXx(UGeckoInstruction inst);
void mullwx(UGeckoInstruction inst); void mullwx(UGeckoInstruction inst);
@ -147,9 +149,7 @@ public:
void divwx(UGeckoInstruction inst); void divwx(UGeckoInstruction inst);
void srawix(UGeckoInstruction inst); void srawix(UGeckoInstruction inst);
void srawx(UGeckoInstruction inst); void srawx(UGeckoInstruction inst);
void addex(UGeckoInstruction inst); void arithXex(UGeckoInstruction inst);
void addmex(UGeckoInstruction inst);
void addzex(UGeckoInstruction inst);
void extsXx(UGeckoInstruction inst); void extsXx(UGeckoInstruction inst);
@ -217,11 +217,7 @@ public:
void dcbz(UGeckoInstruction inst); void dcbz(UGeckoInstruction inst);
void subfic(UGeckoInstruction inst); void subfic(UGeckoInstruction inst);
void subfcx(UGeckoInstruction inst);
void subfx(UGeckoInstruction inst); void subfx(UGeckoInstruction inst);
void subfex(UGeckoInstruction inst);
void subfmex(UGeckoInstruction inst);
void subfzex(UGeckoInstruction inst);
void twx(UGeckoInstruction inst); void twx(UGeckoInstruction inst);

View File

@ -48,7 +48,7 @@ static GekkoOPTemplate primarytable[] =
{10, &Jit64::cmpXX}, //"cmpli", OPTYPE_INTEGER, FL_IN_A | FL_SET_CRn}}, {10, &Jit64::cmpXX}, //"cmpli", OPTYPE_INTEGER, FL_IN_A | FL_SET_CRn}},
{11, &Jit64::cmpXX}, //"cmpi", OPTYPE_INTEGER, FL_IN_A | FL_SET_CRn}}, {11, &Jit64::cmpXX}, //"cmpi", OPTYPE_INTEGER, FL_IN_A | FL_SET_CRn}},
{12, &Jit64::reg_imm}, //"addic", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A | FL_SET_CA}}, {12, &Jit64::reg_imm}, //"addic", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A | FL_SET_CA}},
{13, &Jit64::reg_imm}, //"addic_rc", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A | FL_SET_CR0}}, {13, &Jit64::reg_imm}, //"addic_rc", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A | FL_SET_CA | FL_SET_CR0}},
{14, &Jit64::reg_imm}, //"addi", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A0}}, {14, &Jit64::reg_imm}, //"addi", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A0}},
{15, &Jit64::reg_imm}, //"addis", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A0}}, {15, &Jit64::reg_imm}, //"addis", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A0}},
@ -193,8 +193,8 @@ static GekkoOPTemplate table31[] =
{922, &Jit64::extsXx}, //"extshx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_RC_BIT}}, {922, &Jit64::extsXx}, //"extshx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_RC_BIT}},
{954, &Jit64::extsXx}, //"extsbx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_RC_BIT}}, {954, &Jit64::extsXx}, //"extsbx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_RC_BIT}},
{536, &Jit64::srwx}, //"srwx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_B | FL_IN_S | FL_RC_BIT}}, {536, &Jit64::srwx}, //"srwx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_B | FL_IN_S | FL_RC_BIT}},
{792, &Jit64::srawx}, //"srawx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_B | FL_IN_S | FL_RC_BIT}}, {792, &Jit64::srawx}, //"srawx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_B | FL_IN_S | FL_SET_CA | FL_RC_BIT}},
{824, &Jit64::srawix}, //"srawix", OPTYPE_INTEGER, FL_OUT_A | FL_IN_B | FL_IN_S | FL_RC_BIT}}, {824, &Jit64::srawix}, //"srawix", OPTYPE_INTEGER, FL_OUT_A | FL_IN_B | FL_IN_S | FL_SET_CA | FL_RC_BIT}},
{24, &Jit64::slwx}, //"slwx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_B | FL_IN_S | FL_RC_BIT}}, {24, &Jit64::slwx}, //"slwx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_B | FL_IN_S | FL_RC_BIT}},
{54, &Jit64::dcbst}, //"dcbst", OPTYPE_DCACHE, 0, 4}}, {54, &Jit64::dcbst}, //"dcbst", OPTYPE_DCACHE, 0, 4}},
@ -273,7 +273,7 @@ static GekkoOPTemplate table31[] =
{339, &Jit64::mfspr}, //"mfspr", OPTYPE_SPR, FL_OUT_D}}, {339, &Jit64::mfspr}, //"mfspr", OPTYPE_SPR, FL_OUT_D}},
{467, &Jit64::mtspr}, //"mtspr", OPTYPE_SPR, 0, 2}}, {467, &Jit64::mtspr}, //"mtspr", OPTYPE_SPR, 0, 2}},
{371, &Jit64::mftb}, //"mftb", OPTYPE_SYSTEM, FL_OUT_D | FL_TIMER}}, {371, &Jit64::mftb}, //"mftb", OPTYPE_SYSTEM, FL_OUT_D | FL_TIMER}},
{512, &Jit64::mcrxr}, //"mcrxr", OPTYPE_SYSTEM, 0}}, {512, &Jit64::mcrxr}, //"mcrxr", OPTYPE_SYSTEM, FL_READ_CA | FL_SET_CA}},
{595, &Jit64::FallBackToInterpreter}, //"mfsr", OPTYPE_SYSTEM, FL_OUT_D, 2}}, {595, &Jit64::FallBackToInterpreter}, //"mfsr", OPTYPE_SYSTEM, FL_OUT_D, 2}},
{659, &Jit64::FallBackToInterpreter}, //"mfsrin", OPTYPE_SYSTEM, FL_OUT_D, 2}}, {659, &Jit64::FallBackToInterpreter}, //"mfsrin", OPTYPE_SYSTEM, FL_OUT_D, 2}},
@ -294,12 +294,12 @@ static GekkoOPTemplate table31_2[] =
{ {
{266, &Jit64::addx}, //"addx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT}}, {266, &Jit64::addx}, //"addx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT}},
{778, &Jit64::addx}, //"addx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT}}, {778, &Jit64::addx}, //"addx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT}},
{10, &Jit64::addcx}, //"addcx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_SET_CA | FL_RC_BIT}}, {10, &Jit64::arithcx}, //"addcx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_SET_CA | FL_RC_BIT}},
{522, &Jit64::addcx}, //"addcox", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_SET_CA | FL_RC_BIT}}, {522, &Jit64::arithcx}, //"addcox", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_SET_CA | FL_RC_BIT}},
{138, &Jit64::addex}, //"addex", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | FL_SET_CA | FL_RC_BIT}}, {138, &Jit64::arithXex}, //"addex", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | FL_SET_CA | FL_RC_BIT}},
{650, &Jit64::addex}, //"addeox", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | FL_SET_CA | FL_RC_BIT}}, {650, &Jit64::arithXex}, //"addeox", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | FL_SET_CA | FL_RC_BIT}},
{234, &Jit64::addmex}, //"addmex", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | FL_SET_CA | FL_RC_BIT}}, {234, &Jit64::arithXex}, //"addmex", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | FL_SET_CA | FL_RC_BIT}},
{202, &Jit64::addzex}, //"addzex", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | FL_SET_CA | FL_RC_BIT}}, {202, &Jit64::arithXex}, //"addzex", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | FL_SET_CA | FL_RC_BIT}},
{491, &Jit64::divwx}, //"divwx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 39}}, {491, &Jit64::divwx}, //"divwx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 39}},
{1003, &Jit64::divwx}, //"divwox", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 39}}, {1003, &Jit64::divwx}, //"divwox", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 39}},
{459, &Jit64::divwux}, //"divwux", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 39}}, {459, &Jit64::divwux}, //"divwux", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 39}},
@ -311,11 +311,11 @@ static GekkoOPTemplate table31_2[] =
{104, &Jit64::negx}, //"negx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT}}, {104, &Jit64::negx}, //"negx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT}},
{40, &Jit64::subfx}, //"subfx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT}}, {40, &Jit64::subfx}, //"subfx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT}},
{552, &Jit64::subfx}, //"subox", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT}}, {552, &Jit64::subfx}, //"subox", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT}},
{8, &Jit64::subfcx}, //"subfcx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_SET_CA | FL_RC_BIT}}, {8, &Jit64::arithcx}, //"subfcx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_SET_CA | FL_RC_BIT}},
{520, &Jit64::subfcx}, //"subfcox", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_SET_CA | FL_RC_BIT}}, {520, &Jit64::arithcx}, //"subfcox", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_SET_CA | FL_RC_BIT}},
{136, &Jit64::subfex}, //"subfex", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | FL_SET_CA | FL_RC_BIT}}, {136, &Jit64::arithXex}, //"subfex", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | FL_SET_CA | FL_RC_BIT}},
{232, &Jit64::subfmex}, //"subfmex", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | FL_SET_CA | FL_RC_BIT}}, {232, &Jit64::arithXex}, //"subfmex", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | FL_SET_CA | FL_RC_BIT}},
{200, &Jit64::subfzex}, //"subfzex", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | FL_SET_CA | FL_RC_BIT}}, {200, &Jit64::arithXex}, //"subfzex", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | FL_SET_CA | FL_RC_BIT}},
}; };
static GekkoOPTemplate table59[] = static GekkoOPTemplate table59[] =

View File

@ -44,28 +44,76 @@ void Jit64::GenerateOverflow()
SetJumpTarget(exit); SetJumpTarget(exit);
} }
void Jit64::FinalizeCarry(CCFlags cond)
{
js.carryFlagSet = false;
js.carryFlagInverted = false;
if (js.op->wantsCA)
{
if (js.next_op->wantsCAInFlags)
{
if (cond == CC_C || cond == CC_NC)
{
js.carryFlagInverted = cond == CC_NC;
}
else
{
// convert the condition to a carry flag (is there a better way?)
SETcc(cond, R(RSCRATCH));
BT(8, R(RSCRATCH), Imm8(0));
}
js.carryFlagSet = true;
}
else
{
JitSetCAIf(cond);
}
}
}
// Unconditional version
void Jit64::FinalizeCarry(bool ca)
{
js.carryFlagSet = false;
js.carryFlagInverted = false;
if (js.op->wantsCA)
{
if (js.next_op->wantsCAInFlags)
{
if (ca)
STC();
else
CLC();
js.carryFlagSet = true;
}
else if (ca)
{
JitSetCA();
}
else
{
JitClearCAOV(true, false);
}
}
}
// Assumes CA,OV are clear // Assumes CA,OV are clear
void Jit64::FinalizeCarryOverflow(bool oe, bool inv) void Jit64::FinalizeCarryOverflow(bool oe, bool inv)
{ {
// USES_XER // USES_XER
if (oe) if (oe)
{ {
// this is slightly messy because JitSetCAIf modifies x86 flags, so we have to do it in both // Make sure not to lose the carry flags (not a big deal, this path is rare).
// sides of the branch. PUSHF();
AND(32, PPCSTATE(spr[SPR_XER]), Imm32(~(XER_SO_MASK | XER_OV_MASK)));
FixupBranch jno = J_CC(CC_NO); FixupBranch jno = J_CC(CC_NO);
JitSetCAIf(inv ? CC_NC : CC_C);
//XER[OV/SO] = 1 //XER[OV/SO] = 1
OR(32, PPCSTATE(spr[SPR_XER]), Imm32(XER_SO_MASK | XER_OV_MASK)); OR(32, PPCSTATE(spr[SPR_XER]), Imm32(XER_SO_MASK | XER_OV_MASK));
FixupBranch exit = J();
SetJumpTarget(jno); SetJumpTarget(jno);
JitSetCAIf(inv ? CC_NC : CC_C); POPF();
SetJumpTarget(exit);
}
else
{
// Do carry
JitSetCAIf(inv ? CC_NC : CC_C);
} }
// Do carry
FinalizeCarry(inv ? CC_NC : CC_C);
} }
void Jit64::ComputeRC(const Gen::OpArg & arg) void Jit64::ComputeRC(const Gen::OpArg & arg)
@ -129,10 +177,10 @@ static u32 Xor(u32 a, u32 b)
void Jit64::regimmop(int d, int a, bool binary, u32 value, Operation doop, void (XEmitter::*op)(int, const Gen::OpArg&, const Gen::OpArg&), bool Rc, bool carry) void Jit64::regimmop(int d, int a, bool binary, u32 value, Operation doop, void (XEmitter::*op)(int, const Gen::OpArg&, const Gen::OpArg&), bool Rc, bool carry)
{ {
gpr.Lock(d, a); gpr.Lock(d, a);
if (a || binary || carry) // yeh nasty special case addic // Be careful; addic treats r0 as r0, but addi treats r0 as zero.
if (a || binary || carry)
{ {
if (carry) carry &= js.op->wantsCA;
JitClearCAOV(false);
if (gpr.R(a).IsImm() && !carry) if (gpr.R(a).IsImm() && !carry)
{ {
gpr.SetImmediate32(d, doop((u32)gpr.R(a).offset, value)); gpr.SetImmediate32(d, doop((u32)gpr.R(a).offset, value));
@ -156,7 +204,7 @@ void Jit64::regimmop(int d, int a, bool binary, u32 value, Operation doop, void
} }
} }
if (carry) if (carry)
JitSetCAIf(CC_C); FinalizeCarry(CC_C);
if (Rc) if (Rc)
ComputeRC(gpr.R(d)); ComputeRC(gpr.R(d));
} }
@ -239,6 +287,9 @@ void Jit64::reg_imm(UGeckoInstruction inst)
bool Jit64::CheckMergedBranch(int crf) bool Jit64::CheckMergedBranch(int crf)
{ {
if (!analyzer.HasOption(PPCAnalyst::PPCAnalyzer::OPTION_BRANCH_MERGE))
return false;
const UGeckoInstruction& next = js.next_inst; const UGeckoInstruction& next = js.next_inst;
return (((next.OPCD == 16 /* bcx */) || return (((next.OPCD == 16 /* bcx */) ||
((next.OPCD == 19) && (next.SUBOP10 == 528) /* bcctrx */) || ((next.OPCD == 19) && (next.SUBOP10 == 528) /* bcctrx */) ||
@ -721,148 +772,36 @@ void Jit64::subfic(UGeckoInstruction inst)
{ {
if (imm == 0) if (imm == 0)
{ {
JitClearCAOV(false);
// Flags act exactly like subtracting from 0 // Flags act exactly like subtracting from 0
NEG(32, gpr.R(d)); NEG(32, gpr.R(d));
// Output carry is inverted // Output carry is inverted
JitSetCAIf(CC_NC); FinalizeCarry(CC_NC);
} }
else if (imm == -1) else if (imm == -1)
{ {
// CA is always set in this case
JitSetCA();
NOT(32, gpr.R(d)); NOT(32, gpr.R(d));
// CA is always set in this case
FinalizeCarry(true);
} }
else else
{ {
JitClearCAOV(false);
NOT(32, gpr.R(d)); NOT(32, gpr.R(d));
ADD(32, gpr.R(d), Imm32(imm+1)); ADD(32, gpr.R(d), Imm32(imm+1));
// Output carry is normal // Output carry is normal
JitSetCAIf(CC_C); FinalizeCarry(CC_C);
} }
} }
else else
{ {
JitClearCAOV(false);
MOV(32, gpr.R(d), Imm32(imm)); MOV(32, gpr.R(d), Imm32(imm));
SUB(32, gpr.R(d), gpr.R(a)); SUB(32, gpr.R(d), gpr.R(a));
// Output carry is inverted // Output carry is inverted
JitSetCAIf(CC_NC); FinalizeCarry(CC_NC);
} }
gpr.UnlockAll(); gpr.UnlockAll();
// This instruction has no RC flag // This instruction has no RC flag
} }
void Jit64::subfcx(UGeckoInstruction inst)
{
INSTRUCTION_START;
JITDISABLE(bJITIntegerOff);
int a = inst.RA, b = inst.RB, d = inst.RD;
gpr.Lock(a, b, d);
gpr.BindToRegister(d, (d == a || d == b), true);
JitClearCAOV(inst.OE);
if (d == b)
{
SUB(32, gpr.R(d), gpr.R(a));
}
else if (d == a)
{
MOV(32, R(RSCRATCH), gpr.R(a));
MOV(32, gpr.R(d), gpr.R(b));
SUB(32, gpr.R(d), R(RSCRATCH));
}
else
{
MOV(32, gpr.R(d), gpr.R(b));
SUB(32, gpr.R(d), gpr.R(a));
}
if (inst.Rc)
ComputeRC(gpr.R(d));
FinalizeCarryOverflow(inst.OE, true);
gpr.UnlockAll();
}
void Jit64::subfex(UGeckoInstruction inst)
{
INSTRUCTION_START;
JITDISABLE(bJITIntegerOff);
int a = inst.RA, b = inst.RB, d = inst.RD;
gpr.Lock(a, b, d);
gpr.BindToRegister(d, (d == a || d == b), true);
JitGetAndClearCAOV(inst.OE);
bool invertedCarry = false;
if (d == b)
{
// Convert carry to borrow
CMC();
SBB(32, gpr.R(d), gpr.R(a));
invertedCarry = true;
}
else if (d == a)
{
NOT(32, gpr.R(d));
ADC(32, gpr.R(d), gpr.R(b));
}
else
{
MOV(32, gpr.R(d), gpr.R(a));
NOT(32, gpr.R(d));
ADC(32, gpr.R(d), gpr.R(b));
}
FinalizeCarryOverflow(inst.OE, invertedCarry);
if (inst.Rc)
ComputeRC(gpr.R(d));
gpr.UnlockAll();
}
void Jit64::subfmex(UGeckoInstruction inst)
{
// USES_XER
INSTRUCTION_START
JITDISABLE(bJITIntegerOff);
int a = inst.RA, d = inst.RD;
gpr.Lock(a, d);
gpr.BindToRegister(d, d == a);
JitGetAndClearCAOV(inst.OE);
if (d != a)
MOV(32, gpr.R(d), gpr.R(a));
NOT(32, gpr.R(d));
ADC(32, gpr.R(d), Imm32(0xFFFFFFFF));
FinalizeCarryOverflow(inst.OE);
if (inst.Rc)
ComputeRC(gpr.R(d));
gpr.UnlockAll();
}
void Jit64::subfzex(UGeckoInstruction inst)
{
// USES_XER
INSTRUCTION_START
JITDISABLE(bJITIntegerOff);
int a = inst.RA, d = inst.RD;
gpr.Lock(a, d);
gpr.BindToRegister(d, d == a);
JitGetAndClearCAOV(inst.OE);
if (d != a)
MOV(32, gpr.R(d), gpr.R(a));
NOT(32, gpr.R(d));
ADC(32, gpr.R(d), Imm8(0));
FinalizeCarryOverflow(inst.OE);
if (inst.Rc)
ComputeRC(gpr.R(d));
gpr.UnlockAll();
}
void Jit64::subfx(UGeckoInstruction inst) void Jit64::subfx(UGeckoInstruction inst)
{ {
INSTRUCTION_START INSTRUCTION_START
@ -1329,96 +1268,93 @@ void Jit64::addx(UGeckoInstruction inst)
} }
} }
void Jit64::addex(UGeckoInstruction inst) void Jit64::arithXex(UGeckoInstruction inst)
{ {
// USES_XER
INSTRUCTION_START INSTRUCTION_START
JITDISABLE(bJITIntegerOff); JITDISABLE(bJITIntegerOff);
int a = inst.RA, b = inst.RB, d = inst.RD; bool regsource = !(inst.SUBOP10 & 64); // addex or subfex
bool mex = !!(inst.SUBOP10 & 32); // addmex/subfmex or addzex/subfzex
bool add = !!(inst.SUBOP10 & 2); // add or sub
int a = inst.RA;
int b = regsource ? inst.RB : a;
int d = inst.RD;
bool same_input_sub = !add && regsource && a == b;
gpr.Lock(a, b, d); gpr.Lock(a, b, d);
gpr.BindToRegister(d, (d == a) || (d == b)); gpr.BindToRegister(d, !same_input_sub && (d == a || d == b));
JitGetAndClearCAOV(inst.OE); if (!js.carryFlagSet)
if ((d == a) || (d == b)) JitGetAndClearCAOV(inst.OE);
bool invertedCarry = false;
// Special case: subfe A, B, B is a common compiler idiom
if (same_input_sub)
{ {
ADC(32, gpr.R(d), gpr.R((d == a) ? b : a)); // Convert carry to borrow
if (!js.carryFlagInverted)
CMC();
SBB(32, gpr.R(d), gpr.R(d));
invertedCarry = true;
}
else if (!add && regsource && d == b)
{
if (!js.carryFlagInverted)
CMC();
if (d != b)
MOV(32, gpr.R(d), gpr.R(b));
SBB(32, gpr.R(d), gpr.R(a));
invertedCarry = true;
} }
else else
{ {
MOV(32, gpr.R(d), gpr.R(a)); OpArg source = regsource ? gpr.R(d == b ? a : b) : Imm32(mex ? 0xFFFFFFFF : 0);
ADC(32, gpr.R(d), gpr.R(b)); if (js.carryFlagInverted)
CMC();
if (d != a && d != b)
MOV(32, gpr.R(d), gpr.R(a));
if (!add)
NOT(32, gpr.R(d));
ADC(32, gpr.R(d), source);
} }
FinalizeCarryOverflow(inst.OE); FinalizeCarryOverflow(inst.OE, invertedCarry);
if (inst.Rc) if (inst.Rc)
ComputeRC(gpr.R(d)); ComputeRC(gpr.R(d));
gpr.UnlockAll(); gpr.UnlockAll();
} }
void Jit64::addcx(UGeckoInstruction inst) void Jit64::arithcx(UGeckoInstruction inst)
{ {
INSTRUCTION_START INSTRUCTION_START
JITDISABLE(bJITIntegerOff); JITDISABLE(bJITIntegerOff);
bool add = !!(inst.SUBOP10 & 2); // add or sub
int a = inst.RA, b = inst.RB, d = inst.RD; int a = inst.RA, b = inst.RB, d = inst.RD;
gpr.Lock(a, b, d);
gpr.BindToRegister(d, d == a || d == b, true);
if ((d == a) || (d == b)) if (d == a && d != b)
{ {
int operand = ((d == a) ? b : a); if (add)
gpr.Lock(a, b, d); {
gpr.BindToRegister(d, true); ADD(32, gpr.R(d), gpr.R(b));
JitClearCAOV(inst.OE); }
ADD(32, gpr.R(d), gpr.R(operand)); else
FinalizeCarryOverflow(inst.OE); {
if (inst.Rc) // special case, because sub isn't reversible
ComputeRC(gpr.R(d)); MOV(32, R(RSCRATCH), gpr.R(a));
gpr.UnlockAll(); MOV(32, gpr.R(d), gpr.R(b));
SUB(32, gpr.R(d), R(RSCRATCH));
}
} }
else else
{ {
gpr.Lock(a, b, d); if (d != b)
gpr.BindToRegister(d, false); MOV(32, gpr.R(d), gpr.R(b));
JitClearCAOV(inst.OE); if (add)
MOV(32, gpr.R(d), gpr.R(a)); ADD(32, gpr.R(d), gpr.R(a));
ADD(32, gpr.R(d), gpr.R(b)); else
FinalizeCarryOverflow(inst.OE); SUB(32, gpr.R(d), gpr.R(a));
if (inst.Rc)
ComputeRC(gpr.R(d));
gpr.UnlockAll();
} }
}
void Jit64::addmex(UGeckoInstruction inst) FinalizeCarryOverflow(inst.OE, !add);
{
// USES_XER
INSTRUCTION_START
JITDISABLE(bJITIntegerOff);
int a = inst.RA, d = inst.RD;
gpr.Lock(d);
gpr.BindToRegister(d, d == a);
JitGetAndClearCAOV(inst.OE);
if (d != a)
MOV(32, gpr.R(d), gpr.R(a));
ADC(32, gpr.R(d), Imm32(0xFFFFFFFF));
FinalizeCarryOverflow(inst.OE);
if (inst.Rc)
ComputeRC(gpr.R(d));
gpr.UnlockAll();
}
void Jit64::addzex(UGeckoInstruction inst)
{
// USES_XER
INSTRUCTION_START
JITDISABLE(bJITIntegerOff);
int a = inst.RA, d = inst.RD;
gpr.Lock(d);
gpr.BindToRegister(d, d == a);
JitGetAndClearCAOV(inst.OE);
if (d != a)
MOV(32, gpr.R(d), gpr.R(a));
ADC(32, gpr.R(d), Imm8(0));
FinalizeCarryOverflow(inst.OE);
if (inst.Rc) if (inst.Rc)
ComputeRC(gpr.R(d)); ComputeRC(gpr.R(d));
gpr.UnlockAll(); gpr.UnlockAll();
@ -1811,16 +1747,22 @@ void Jit64::srawx(UGeckoInstruction inst)
gpr.FlushLockX(ECX); gpr.FlushLockX(ECX);
gpr.Lock(a, s, b); gpr.Lock(a, s, b);
gpr.BindToRegister(a, (a == s || a == b), true); gpr.BindToRegister(a, (a == s || a == b), true);
JitClearCAOV(false);
MOV(32, R(ECX), gpr.R(b)); MOV(32, R(ECX), gpr.R(b));
if (a != s) if (a != s)
MOV(32, gpr.R(a), gpr.R(s)); MOV(32, gpr.R(a), gpr.R(s));
SHL(64, gpr.R(a), Imm8(32)); SHL(64, gpr.R(a), Imm8(32));
SAR(64, gpr.R(a), R(ECX)); SAR(64, gpr.R(a), R(ECX));
MOV(32, R(RSCRATCH), gpr.R(a)); if (js.op->wantsCA)
SHR(64, gpr.R(a), Imm8(32)); {
TEST(32, gpr.R(a), R(RSCRATCH)); MOV(32, R(RSCRATCH), gpr.R(a));
JitSetCAIf(CC_NZ); SHR(64, gpr.R(a), Imm8(32));
TEST(32, gpr.R(a), R(RSCRATCH));
}
else
{
SHR(64, gpr.R(a), Imm8(32));
}
FinalizeCarry(CC_NZ);
gpr.UnlockAll(); gpr.UnlockAll();
gpr.UnlockAllX(); gpr.UnlockAllX();
if (inst.Rc) if (inst.Rc)
@ -1838,41 +1780,50 @@ void Jit64::srawix(UGeckoInstruction inst)
{ {
gpr.Lock(a, s); gpr.Lock(a, s);
gpr.BindToRegister(a, a == s, true); gpr.BindToRegister(a, a == s, true);
MOV(32, R(RSCRATCH), gpr.R(s)); if (!js.op->wantsCA)
if (a != s)
MOV(32, gpr.R(a), R(RSCRATCH));
// some optimized common cases that can be done in slightly fewer ops
if (amount == 31)
{ {
JitSetCA(); if (a != s)
SAR(32, gpr.R(a), Imm8(31)); MOV(32, gpr.R(a), gpr.R(s));
NEG(32, R(RSCRATCH)); // RSCRATCH = input == INT_MIN ? INT_MIN : -input; SAR(32, gpr.R(a), Imm8(amount));
AND(32, R(RSCRATCH), Imm32(0x80000000)); // RSCRATCH = input < 0 && input != INT_MIN ? 0 : 0x80000000
SHR(32, R(RSCRATCH), Imm8(31 - XER_CA_SHIFT));
XOR(32, PPCSTATE(spr[SPR_XER]), R(RSCRATCH)); // XER.CA = (input < 0 && input != INT_MIN)
}
else if (amount == 1)
{
JitClearCAOV(false);
SHR(32, R(RSCRATCH), Imm8(31)); // sign
AND(32, R(RSCRATCH), gpr.R(a)); // (sign && carry)
SAR(32, gpr.R(a), Imm8(1));
SHL(32, R(RSCRATCH), Imm8(XER_CA_SHIFT));
OR(32, PPCSTATE(spr[SPR_XER]), R(RSCRATCH)); // XER.CA = sign && carry, aka (input&0x80000001) == 0x80000001
} }
else else
{ {
JitClearCAOV(false); MOV(32, R(RSCRATCH), gpr.R(s));
SAR(32, gpr.R(a), Imm8(amount)); if (a != s)
SHL(32, R(RSCRATCH), Imm8(32 - amount)); MOV(32, gpr.R(a), R(RSCRATCH));
TEST(32, R(RSCRATCH), gpr.R(a)); // some optimized common cases that can be done in slightly fewer ops
JitSetCAIf(CC_NZ); if (amount == 31)
{
JitSetCA();
SAR(32, gpr.R(a), Imm8(31));
NEG(32, R(RSCRATCH)); // RSCRATCH = input == INT_MIN ? INT_MIN : -input;
AND(32, R(RSCRATCH), Imm32(0x80000000)); // RSCRATCH = input < 0 && input != INT_MIN ? 0 : 0x80000000
SHR(32, R(RSCRATCH), Imm8(31 - XER_CA_SHIFT));
XOR(32, PPCSTATE(spr[SPR_XER]), R(RSCRATCH)); // XER.CA = (input < 0 && input != INT_MIN)
}
else if (amount == 1)
{
JitClearCAOV(true, false);
SHR(32, R(RSCRATCH), Imm8(31)); // sign
AND(32, R(RSCRATCH), gpr.R(a)); // (sign && carry)
SAR(32, gpr.R(a), Imm8(1));
SHL(32, R(RSCRATCH), Imm8(XER_CA_SHIFT));
OR(32, PPCSTATE(spr[SPR_XER]), R(RSCRATCH)); // XER.CA = sign && carry, aka (input&0x80000001) == 0x80000001
}
else
{
JitClearCAOV(true, false);
SAR(32, gpr.R(a), Imm8(amount));
SHL(32, R(RSCRATCH), Imm8(32 - amount));
TEST(32, R(RSCRATCH), gpr.R(a));
FinalizeCarry(CC_NZ);
}
} }
} }
else else
{ {
gpr.Lock(a, s); gpr.Lock(a, s);
JitClearCAOV(false); FinalizeCarry(false);
gpr.BindToRegister(a, a == s, true); gpr.BindToRegister(a, a == s, true);
if (a != s) if (a != s)

View File

@ -1106,7 +1106,7 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress)
Jit->JitSetCA(); Jit->JitSetCA();
FixupBranch cont = Jit->J(); FixupBranch cont = Jit->J();
Jit->SetJumpTarget(nocarry); Jit->SetJumpTarget(nocarry);
Jit->JitClearCAOV(false); Jit->JitClearCAOV(true, false);
Jit->SetJumpTarget(cont); Jit->SetJumpTarget(cont);
regNormalRegClear(RI, I); regNormalRegClear(RI, I);
break; break;

View File

@ -81,13 +81,16 @@ protected:
bool isLastInstruction; bool isLastInstruction;
bool memcheck; bool memcheck;
bool skipnext; bool skipnext;
bool carryFlagSet;
bool carryFlagInverted;
int fifoBytesThisBlock; int fifoBytesThisBlock;
PPCAnalyst::BlockStats st; PPCAnalyst::BlockStats st;
PPCAnalyst::BlockRegStats gpa; PPCAnalyst::BlockRegStats gpa;
PPCAnalyst::BlockRegStats fpa; PPCAnalyst::BlockRegStats fpa;
PPCAnalyst::CodeOp *op; PPCAnalyst::CodeOp* op;
PPCAnalyst::CodeOp* next_op;
u8* rewriteStart; u8* rewriteStart;
JitBlock *curBlock; JitBlock *curBlock;

View File

@ -845,13 +845,14 @@ void EmuCodeBlock::JitSetCAIf(CCFlags conditionCode)
SETcc(conditionCode, R(RSCRATCH)); SETcc(conditionCode, R(RSCRATCH));
MOVZX(32, 8, RSCRATCH, R(RSCRATCH)); MOVZX(32, 8, RSCRATCH, R(RSCRATCH));
SHL(32, R(RSCRATCH), Imm8(XER_CA_SHIFT)); SHL(32, R(RSCRATCH), Imm8(XER_CA_SHIFT));
AND(32, PPCSTATE(spr[SPR_XER]), Imm32(~XER_CA_MASK));
OR(32, PPCSTATE(spr[SPR_XER]), R(RSCRATCH)); //XER.CA = 1 OR(32, PPCSTATE(spr[SPR_XER]), R(RSCRATCH)); //XER.CA = 1
} }
void EmuCodeBlock::JitClearCAOV(bool oe) void EmuCodeBlock::JitClearCAOV(bool ca, bool oe)
{ {
if (oe) u32 mask = (ca ? ~XER_CA_MASK : 0xFFFFFFFF) & (oe ? ~XER_OV_MASK : 0xFFFFFFFF);
AND(32, PPCSTATE(spr[SPR_XER]), Imm32(~XER_CA_MASK & ~XER_OV_MASK)); //XER.CA, XER.OV = 0 if (mask == 0xFFFFFFFF)
else return;
AND(32, PPCSTATE(spr[SPR_XER]), Imm32(~XER_CA_MASK)); //XER.CA = 0 AND(32, PPCSTATE(spr[SPR_XER]), Imm32(mask));
} }

View File

@ -111,7 +111,7 @@ public:
void JitGetAndClearCAOV(bool oe); void JitGetAndClearCAOV(bool oe);
void JitSetCA(); void JitSetCA();
void JitSetCAIf(Gen::CCFlags conditionCode); void JitSetCAIf(Gen::CCFlags conditionCode);
void JitClearCAOV(bool oe); void JitClearCAOV(bool ca, bool oe);
void ForceSinglePrecisionS(Gen::X64Reg xmm); void ForceSinglePrecisionS(Gen::X64Reg xmm);
void ForceSinglePrecisionP(Gen::X64Reg xmm); void ForceSinglePrecisionP(Gen::X64Reg xmm);

View File

@ -213,14 +213,17 @@ static void AnalyzeFunction2(Symbol *func)
func->flags = flags; func->flags = flags;
} }
// IMPORTANT - CURRENTLY ASSUMES THAT A IS A COMPARE
static bool CanSwapAdjacentOps(const CodeOp &a, const CodeOp &b) static bool CanSwapAdjacentOps(const CodeOp &a, const CodeOp &b)
{ {
const GekkoOPInfo *a_info = a.opinfo;
const GekkoOPInfo *b_info = b.opinfo; const GekkoOPInfo *b_info = b.opinfo;
int a_flags = a_info->flags;
int b_flags = b_info->flags; int b_flags = b_info->flags;
if (b_flags & (FL_SET_CRx | FL_ENDBLOCK | FL_TIMER | FL_EVIL)) if (b_flags & (FL_SET_CRx | FL_ENDBLOCK | FL_TIMER | FL_EVIL | FL_SET_OE))
return false; return false;
if ((b_flags & (FL_RC_BIT | FL_RC_BIT_F)) && (b.inst.hex & 1)) if ((b_flags & (FL_RC_BIT | FL_RC_BIT_F)) && (b.inst.Rc))
return false;
if ((a_flags & (FL_SET_CA | FL_READ_CA)) && (b_flags & (FL_SET_CA | FL_READ_CA)))
return false; return false;
switch (b.inst.OPCD) switch (b.inst.OPCD)
@ -250,20 +253,16 @@ static bool CanSwapAdjacentOps(const CodeOp &a, const CodeOp &b)
{ {
int regInA = a.regsIn[j]; int regInA = a.regsIn[j];
int regInB = b.regsIn[j]; int regInB = b.regsIn[j];
if (regInA >= 0 && // register collision: b outputs to one of a's inputs
(b.regsOut[0] == regInA || if (regInA >= 0 && (b.regsOut[0] == regInA || b.regsOut[1] == regInA))
b.regsOut[1] == regInA))
{
// reg collision! don't swap
return false; return false;
} // register collision: a outputs to one of b's inputs
if (regInB >= 0 && if (regInB >= 0 && (a.regsOut[0] == regInB || a.regsOut[1] == regInB))
(a.regsOut[0] == regInB ||
a.regsOut[1] == regInB))
{
// reg collision! don't swap
return false; return false;
} // register collision: b outputs to one of a's outputs (overwriting it)
for (int k = 0; k < 2; k++)
if (b.regsOut[k] >= 0 && (b.regsOut[k] == a.regsOut[0] || b.regsOut[k] == a.regsOut[1]))
return false;
} }
return true; return true;
@ -403,34 +402,84 @@ void FindFunctions(u32 startAddr, u32 endAddr, PPCSymbolDB *func_db)
leafSize, niceSize, unniceSize); leafSize, niceSize, unniceSize);
} }
void PPCAnalyzer::ReorderInstructions(u32 instructions, CodeOp *code) static bool isCmp(const CodeOp& a)
{ {
// Instruction Reordering Pass return (a.inst.OPCD == 10 || a.inst.OPCD == 11) || (a.inst.OPCD == 31 && (a.inst.SUBOP10 == 0 || a.inst.SUBOP10 == 32));
// Bubble down compares towards branches, so that they can be merged. }
// -2: -1 for the pair, -1 for not swapping with the final instruction which is probably the branch.
for (u32 i = 0; i < (instructions - 2); ++i) static bool isRlwinm_rc(const CodeOp& a)
{
return a.inst.OPCD == 21 && a.inst.Rc;
}
static bool isCarryOp(const CodeOp& a)
{
return (a.opinfo->flags & FL_SET_CA) && !(a.opinfo->flags & FL_SET_OE) && a.opinfo->type == OPTYPE_INTEGER;
}
void PPCAnalyzer::ReorderInstructionsCore(u32 instructions, CodeOp* code, bool reverse, ReorderType type)
{
// Bubbling an instruction sometimes reveals another opportunity to bubble an instruction, so do
// multiple passes.
while (true)
{ {
CodeOp &a = code[i]; // Instruction Reordering Pass
CodeOp &b = code[i + 1]; // Carry pass: bubble carry-using instructions as close to each other as possible, so we can avoid
// All integer compares can be reordered. // storing the carry flag.
if ((a.inst.OPCD == 10 || a.inst.OPCD == 11) || // Compare pass: bubble compare instructions next to branches, so they can be merged.
(a.inst.OPCD == 31 && (a.inst.SUBOP10 == 0 || a.inst.SUBOP10 == 32))) bool swapped = false;
int increment = reverse ? -1 : 1;
int start = reverse ? instructions - 1 : 0;
int end = reverse ? 0 : instructions - 1;
for (int i = start; i != end; i += increment)
{ {
// Got a compare instruction. CodeOp &a = code[i];
if (CanSwapAdjacentOps(a, b)) CodeOp &b = code[i + increment];
// Reorder integer compares, rlwinm., and carry-affecting ops
// (if we add more merged branch instructions, add them here!)
if ((type == REORDER_CARRY && isCarryOp(a)) || (type == REORDER_CMP && (isCmp(a) || isRlwinm_rc(a))))
{ {
// Alright, let's bubble it down! // once we're next to a carry instruction, don't move away!
std::swap(a, b); if (type == REORDER_CARRY && i != start)
{
// if we read the CA flag, and the previous instruction sets it, don't move away.
if (!reverse && (a.opinfo->flags & FL_READ_CA) && (code[i - increment].opinfo->flags & FL_SET_CA))
continue;
// if we set the CA flag, and the next instruction reads it, don't move away.
if (reverse && (a.opinfo->flags & FL_SET_CA) && (code[i - increment].opinfo->flags & FL_READ_CA))
continue;
}
if (CanSwapAdjacentOps(a, b))
{
// Alright, let's bubble it!
std::swap(a, b);
swapped = true;
}
} }
} }
if (!swapped)
return;
} }
} }
void PPCAnalyzer::ReorderInstructions(u32 instructions, CodeOp *code)
{
// For carry, bubble instructions *towards* each other; one direction often isn't enough
// to get pairs like addc/adde next to each other.
if (HasOption(OPTION_CARRY_MERGE))
{
ReorderInstructionsCore(instructions, code, true, REORDER_CARRY);
ReorderInstructionsCore(instructions, code, false, REORDER_CARRY);
}
if (HasOption(OPTION_BRANCH_MERGE))
ReorderInstructionsCore(instructions, code, false, REORDER_CMP);
}
void PPCAnalyzer::SetInstructionStats(CodeBlock *block, CodeOp *code, GekkoOPInfo *opinfo, u32 index) void PPCAnalyzer::SetInstructionStats(CodeBlock *block, CodeOp *code, GekkoOPInfo *opinfo, u32 index)
{ {
code->wantsCR0 = false; code->wantsCR0 = false;
code->wantsCR1 = false; code->wantsCR1 = false;
code->wantsPS1 = false;
if (opinfo->flags & FL_USE_FPU) if (opinfo->flags & FL_USE_FPU)
block->m_fpa->any = true; block->m_fpa->any = true;
@ -458,6 +507,24 @@ void PPCAnalyzer::SetInstructionStats(CodeBlock *block, CodeOp *code, GekkoOPInf
code->outputFPRF = (opinfo->flags & FL_SET_FPRF) ? true : false; code->outputFPRF = (opinfo->flags & FL_SET_FPRF) ? true : false;
code->canEndBlock = (opinfo->flags & FL_ENDBLOCK) ? true : false; code->canEndBlock = (opinfo->flags & FL_ENDBLOCK) ? true : false;
code->wantsCA = (opinfo->flags & FL_READ_CA) ? true : false;
code->outputCA = (opinfo->flags & FL_SET_CA) ? true : false;
// We're going to try to avoid storing carry in XER if we can avoid it -- keep it in the x86 carry flag!
// If the instruction reads CA but doesn't write it, we still need to store CA in XER; we can't
// leave it in flags.
if (HasOption(OPTION_CARRY_MERGE))
code->wantsCAInFlags = code->wantsCA && code->outputCA && opinfo->type == OPTYPE_INTEGER;
else
code->wantsCAInFlags = false;
// mfspr/mtspr can affect/use XER, so be super careful here
// we need to note specifically that mfspr needs CA in XER, not in the x86 carry flag
if (code->inst.OPCD == 31 && code->inst.SUBOP10 == 339) // mfspr
code->wantsCA = ((code->inst.SPRU << 5) | (code->inst.SPRL & 0x1F)) == SPR_XER;
if (code->inst.OPCD == 31 && code->inst.SUBOP10 == 467) // mtspr
code->outputCA = ((code->inst.SPRU << 5) | (code->inst.SPRL & 0x1F)) == SPR_XER;
int numOut = 0; int numOut = 0;
int numIn = 0; int numIn = 0;
if (opinfo->flags & FL_OUT_A) if (opinfo->flags & FL_OUT_A)
@ -715,26 +782,30 @@ u32 PPCAnalyzer::Analyze(u32 address, CodeBlock *block, CodeBuffer *buffer, u32
block->m_broken = true; block->m_broken = true;
} }
// Scan for CR0 dependency // Scan for flag dependencies; assume the next block (or any branch that can leave the block)
// assume next block wants flags to be safe // wants flags, to be safe.
bool wantsCR0 = true; bool wantsCR0 = true;
bool wantsCR1 = true; bool wantsCR1 = true;
bool wantsPS1 = true;
bool wantsFPRF = true; bool wantsFPRF = true;
bool wantsCA = true;
for (int i = block->m_num_instructions - 1; i >= 0; i--) for (int i = block->m_num_instructions - 1; i >= 0; i--)
{ {
wantsCR0 |= code[i].wantsCR0 || code[i].canEndBlock; bool opWantsCR0 = code[i].wantsCR0;
wantsCR1 |= code[i].wantsCR1 || code[i].canEndBlock; bool opWantsCR1 = code[i].wantsCR1;
wantsPS1 |= code[i].wantsPS1 || code[i].canEndBlock; bool opWantsFPRF = code[i].wantsFPRF;
wantsFPRF |= code[i].wantsFPRF || code[i].canEndBlock; bool opWantsCA = code[i].wantsCA;
code[i].wantsCR0 = wantsCR0; code[i].wantsCR0 = wantsCR0 || code[i].canEndBlock;
code[i].wantsCR1 = wantsCR1; code[i].wantsCR1 = wantsCR1 || code[i].canEndBlock;
code[i].wantsPS1 = wantsPS1; code[i].wantsFPRF = wantsFPRF || code[i].canEndBlock;
code[i].wantsFPRF = wantsFPRF; code[i].wantsCA = wantsCA || code[i].canEndBlock;
wantsCR0 &= !code[i].outputCR0; wantsCR0 |= opWantsCR0 || code[i].canEndBlock;
wantsCR1 &= !code[i].outputCR1; wantsCR1 |= opWantsCR1 || code[i].canEndBlock;
wantsPS1 &= !code[i].outputPS1; wantsFPRF |= opWantsFPRF || code[i].canEndBlock;
wantsFPRF &= !code[i].outputFPRF; wantsCA |= opWantsCA || code[i].canEndBlock;
wantsCR0 &= !code[i].outputCR0 || opWantsCR0;
wantsCR1 &= !code[i].outputCR1 || opWantsCR1;
wantsFPRF &= !code[i].outputFPRF || opWantsFPRF;
wantsCA &= !code[i].outputCA || opWantsCA;
} }
return address; return address;
} }

View File

@ -33,12 +33,13 @@ struct CodeOp //16B
bool isBranchTarget; bool isBranchTarget;
bool wantsCR0; bool wantsCR0;
bool wantsCR1; bool wantsCR1;
bool wantsPS1;
bool wantsFPRF; bool wantsFPRF;
bool wantsCA;
bool wantsCAInFlags;
bool outputCR0; bool outputCR0;
bool outputCR1; bool outputCR1;
bool outputPS1;
bool outputFPRF; bool outputFPRF;
bool outputCA;
bool canEndBlock; bool canEndBlock;
bool skip; // followed BL-s for example bool skip; // followed BL-s for example
}; };
@ -143,6 +144,13 @@ class PPCAnalyzer
{ {
private: private:
enum ReorderType
{
REORDER_CARRY,
REORDER_CMP
};
void ReorderInstructionsCore(u32 instructions, CodeOp* code, bool reverse, ReorderType type);
void ReorderInstructions(u32 instructions, CodeOp *code); void ReorderInstructions(u32 instructions, CodeOp *code);
void SetInstructionStats(CodeBlock *block, CodeOp *code, GekkoOPInfo *opinfo, u32 index); void SetInstructionStats(CodeBlock *block, CodeOp *code, GekkoOPInfo *opinfo, u32 index);
@ -175,6 +183,14 @@ public:
// Requires JIT support to work. // Requires JIT support to work.
// XXX: NOT COMPLETE // XXX: NOT COMPLETE
OPTION_FORWARD_JUMP = (1 << 3), OPTION_FORWARD_JUMP = (1 << 3),
// Reorder compare/Rc instructions next to their associated branches and
// merge in the JIT (for common cases, anyway).
OPTION_BRANCH_MERGE = (1 << 4),
// Reorder carry instructions next to their associated branches and pass
// carry flags in the x86 flags between them, instead of in XER.
OPTION_CARRY_MERGE = (1 << 5),
}; };

View File

@ -38,6 +38,7 @@ enum
FL_LOADSTORE = (1<<19), FL_LOADSTORE = (1<<19),
FL_SET_FPRF = (1<<20), FL_SET_FPRF = (1<<20),
FL_READ_FPRF = (1<<21), FL_READ_FPRF = (1<<21),
FL_SET_OE = (1<<22),
}; };
enum enum