From 5b4d12c1f3dd2c6fd8f9eba3b0c9e0832a54835d Mon Sep 17 00:00:00 2001 From: Shawn Hoffman Date: Thu, 29 Oct 2009 04:40:26 +0000 Subject: [PATCH] small code cleanup in JIT: use JITIL's nice JITDISABLE macro git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@4477 8ced0084-cf51-0410-be5f-012b33b47a6e --- Source/Core/Core/Src/PowerPC/Jit64/Jit.h | 5 +- .../Src/PowerPC/Jit64/Jit_FloatingPoint.cpp | 24 +- .../Core/Src/PowerPC/Jit64/Jit_Integer.cpp | 1688 ++++++++--------- .../Core/Src/PowerPC/Jit64/Jit_LoadStore.cpp | 729 ++++--- .../PowerPC/Jit64/Jit_LoadStoreFloating.cpp | 31 +- .../Src/PowerPC/Jit64/Jit_LoadStorePaired.cpp | 10 +- .../Core/Src/PowerPC/Jit64/Jit_Paired.cpp | 687 ++++--- .../Src/PowerPC/Jit64/Jit_SystemRegisters.cpp | 279 ++- .../PowerPC/Jit64IL/Jit_SystemRegisters.cpp | 202 +- 9 files changed, 1788 insertions(+), 1867 deletions(-) diff --git a/Source/Core/Core/Src/PowerPC/Jit64/Jit.h b/Source/Core/Core/Src/PowerPC/Jit64/Jit.h index 017332014f..f429cc4e6d 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/Jit.h +++ b/Source/Core/Core/Src/PowerPC/Jit64/Jit.h @@ -77,7 +77,10 @@ void Jit(u32 em_address); // #define INSTRUCTION_START PPCTables::CountInstruction(inst); #define INSTRUCTION_START - +#define JITDISABLE(type) \ + if (Core::g_CoreStartupParameter.bJITOff || \ + Core::g_CoreStartupParameter.bJIT##type##Off) \ + {Default(inst); return;} class TrampolineCache : public Gen::XCodeBlock { diff --git a/Source/Core/Core/Src/PowerPC/Jit64/Jit_FloatingPoint.cpp b/Source/Core/Core/Src/PowerPC/Jit64/Jit_FloatingPoint.cpp index 71dc2a8948..95b06e85b9 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/Jit_FloatingPoint.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64/Jit_FloatingPoint.cpp @@ -73,9 +73,8 @@ void Jit64::fp_tri_op(int d, int a, int b, bool reversible, bool dupe, void (XEm void Jit64::fp_arith_s(UGeckoInstruction inst) { - if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITFloatingPointOff) - {Default(inst); return;} // turn off from debugger - INSTRUCTION_START; + INSTRUCTION_START + JITDISABLE(FloatingPoint) if (inst.Rc) { Default(inst); return; } @@ -104,9 +103,8 @@ void Jit64::fp_arith_s(UGeckoInstruction inst) void Jit64::fmaddXX(UGeckoInstruction inst) { - if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITFloatingPointOff) - {Default(inst); return;} // turn off from debugger - INSTRUCTION_START; + INSTRUCTION_START + JITDISABLE(FloatingPoint) if (inst.Rc) { Default(inst); return; } @@ -162,9 +160,8 @@ void Jit64::fmaddXX(UGeckoInstruction inst) void Jit64::fsign(UGeckoInstruction inst) { - if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITFloatingPointOff) - {Default(inst); return;} // turn off from debugger - INSTRUCTION_START; + INSTRUCTION_START + JITDISABLE(FloatingPoint) if (inst.Rc) { Default(inst); return; } @@ -195,8 +192,7 @@ void Jit64::fsign(UGeckoInstruction inst) void Jit64::fmrx(UGeckoInstruction inst) { INSTRUCTION_START - if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITFloatingPointOff) - {Default(inst); return;} // turn off from debugger + JITDISABLE(FloatingPoint) if (inst.Rc) { Default(inst); return; } @@ -213,9 +209,9 @@ void Jit64::fcmpx(UGeckoInstruction inst) { // TODO : This still causes crashes in Nights, and broken graphics // in Paper Mario, Super Paper Mario as well as SoulCalibur 2 prolly others too.. :( - INSTRUCTION_START; - if(Core::g_CoreStartupParameter.bJITOff || jo.fpAccurateFcmp - || Core::g_CoreStartupParameter.bJITFloatingPointOff) { + INSTRUCTION_START + JITDISABLE(FloatingPoint) + if (jo.fpAccurateFcmp) { Default(inst); return; // turn off from debugger } diff --git a/Source/Core/Core/Src/PowerPC/Jit64/Jit_Integer.cpp b/Source/Core/Core/Src/PowerPC/Jit64/Jit_Integer.cpp index 8b4a6c6a1c..e4c1a003e6 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/Jit_Integer.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64/Jit_Integer.cpp @@ -24,949 +24,903 @@ #include "JitRegCache.h" #include "JitAsm.h" - // Assumes that the flags were just set through an addition. - void Jit64::GenerateCarry(Gen::X64Reg temp_reg) { - // USES_XER - SETcc(CC_C, R(temp_reg)); - AND(32, M(&PowerPC::ppcState.spr[SPR_XER]), Imm32(~(1 << 29))); - SHL(32, R(temp_reg), Imm8(29)); - OR(32, M(&PowerPC::ppcState.spr[SPR_XER]), R(temp_reg)); - } +// Assumes that the flags were just set through an addition. +void Jit64::GenerateCarry(Gen::X64Reg temp_reg) { + // USES_XER + SETcc(CC_C, R(temp_reg)); + AND(32, M(&PowerPC::ppcState.spr[SPR_XER]), Imm32(~(1 << 29))); + SHL(32, R(temp_reg), Imm8(29)); + OR(32, M(&PowerPC::ppcState.spr[SPR_XER]), R(temp_reg)); +} - u32 Add(u32 a, u32 b) {return a + b;} - u32 Or (u32 a, u32 b) {return a | b;} - u32 And(u32 a, u32 b) {return a & b;} - u32 Xor(u32 a, u32 b) {return a ^ b;} +u32 Add(u32 a, u32 b) {return a + b;} +u32 Or (u32 a, u32 b) {return a | b;} +u32 And(u32 a, u32 b) {return a & b;} +u32 Xor(u32 a, u32 b) {return a ^ b;} - void Jit64::regimmop(int d, int a, bool binary, u32 value, Operation doop, void (XEmitter::*op)(int, const Gen::OpArg&, const Gen::OpArg&), bool Rc, bool carry) +void Jit64::regimmop(int d, int a, bool binary, u32 value, Operation doop, void (XEmitter::*op)(int, const Gen::OpArg&, const Gen::OpArg&), bool Rc, bool carry) +{ + gpr.Lock(d, a); + if (a || binary || carry) // yeh nasty special case addic { - gpr.Lock(d, a); - if (a || binary || carry) // yeh nasty special case addic + if (a == d) { - if (a == d) + if (gpr.R(d).IsImm() && !carry) { - if (gpr.R(d).IsImm() && !carry) - { - gpr.SetImmediate32(d, doop((u32)gpr.R(d).offset, value)); - } - else - { - if (gpr.R(d).IsImm()) - gpr.LoadToX64(d, false); - (this->*op)(32, gpr.R(d), Imm32(value)); //m_GPR[d] = m_GPR[_inst.RA] + _inst.SIMM_16; - if (carry) - GenerateCarry(EAX); - } + gpr.SetImmediate32(d, doop((u32)gpr.R(d).offset, value)); } else { - gpr.LoadToX64(d, false); - MOV(32, gpr.R(d), gpr.R(a)); + if (gpr.R(d).IsImm()) + gpr.LoadToX64(d, false); (this->*op)(32, gpr.R(d), Imm32(value)); //m_GPR[d] = m_GPR[_inst.RA] + _inst.SIMM_16; if (carry) GenerateCarry(EAX); } } - else if (doop == Add) - { - // a == 0, which for these instructions imply value = 0 - gpr.SetImmediate32(d, value); - } else { - _assert_msg_(DYNA_REC, 0, "WTF regimmop"); + gpr.LoadToX64(d, false); + MOV(32, gpr.R(d), gpr.R(a)); + (this->*op)(32, gpr.R(d), Imm32(value)); //m_GPR[d] = m_GPR[_inst.RA] + _inst.SIMM_16; + if (carry) + GenerateCarry(EAX); } - if (Rc) - { - // Todo - special case immediates. - MOV(32, R(EAX), gpr.R(d)); - CALL((u8*)asm_routines.computeRc); - } - gpr.UnlockAll(); } - - void Jit64::reg_imm(UGeckoInstruction inst) + else if (doop == Add) { - if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITIntegerOff) - {Default(inst); return;} // turn off from debugger - - INSTRUCTION_START; - int d = inst.RD, a = inst.RA, s = inst.RS; - switch (inst.OPCD) - { - case 14: // addi - // occasionally used as MOV - emulate, with immediate propagation - if (gpr.R(a).IsImm() && d != a && a != 0) { - gpr.SetImmediate32(d, (u32)gpr.R(a).offset + (u32)(s32)(s16)inst.SIMM_16); - } else if (inst.SIMM_16 == 0 && d != a && a != 0) { - gpr.Lock(a, d); - gpr.LoadToX64(d, false, true); - MOV(32, gpr.R(d), gpr.R(a)); - gpr.UnlockAll(); - } else { - regimmop(d, a, false, (u32)(s32)inst.SIMM_16, Add, &XEmitter::ADD); //addi - } - break; - case 15: regimmop(d, a, false, (u32)inst.SIMM_16 << 16, Add, &XEmitter::ADD); break; //addis - case 24: - if (a == 0 && s == 0 && inst.UIMM == 0 && !inst.Rc) //check for nop - {NOP(); return;} //make the nop visible in the generated code. not much use but interesting if we see one. - regimmop(a, s, true, inst.UIMM, Or, &XEmitter::OR); - break; //ori - case 25: regimmop(a, s, true, inst.UIMM << 16, Or, &XEmitter::OR, false); break;//oris - case 28: regimmop(a, s, true, inst.UIMM, And, &XEmitter::AND, true); break; - case 29: regimmop(a, s, true, inst.UIMM << 16, And, &XEmitter::AND, true); break; - case 26: regimmop(a, s, true, inst.UIMM, Xor, &XEmitter::XOR, false); break; //xori - case 27: regimmop(a, s, true, inst.UIMM << 16, Xor, &XEmitter::XOR, false); break; //xoris - case 12: //regimmop(d, a, false, (u32)(s32)inst.SIMM_16, Add, XEmitter::ADD, false, true); //addic - case 13: //regimmop(d, a, true, (u32)(s32)inst.SIMM_16, Add, XEmitter::ADD, true, true); //addic_rc - default: - Default(inst); - break; - } + // a == 0, which for these instructions imply value = 0 + gpr.SetImmediate32(d, value); } + else + { + _assert_msg_(DYNA_REC, 0, "WTF regimmop"); + } + if (Rc) + { + // Todo - special case immediates. + MOV(32, R(EAX), gpr.R(d)); + CALL((u8*)asm_routines.computeRc); + } + gpr.UnlockAll(); +} - void Jit64::cmpXX(UGeckoInstruction inst) - { - // USES_CR - if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITIntegerOff) - {Default(inst); return;} // turn off from debugger +void Jit64::reg_imm(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(Integer) + int d = inst.RD, a = inst.RA, s = inst.RS; + switch (inst.OPCD) + { + case 14: // addi + // occasionally used as MOV - emulate, with immediate propagation + if (gpr.R(a).IsImm() && d != a && a != 0) { + gpr.SetImmediate32(d, (u32)gpr.R(a).offset + (u32)(s32)(s16)inst.SIMM_16); + } else if (inst.SIMM_16 == 0 && d != a && a != 0) { + gpr.Lock(a, d); + gpr.LoadToX64(d, false, true); + MOV(32, gpr.R(d), gpr.R(a)); + gpr.UnlockAll(); + } else { + regimmop(d, a, false, (u32)(s32)inst.SIMM_16, Add, &XEmitter::ADD); //addi + } + break; + case 15: regimmop(d, a, false, (u32)inst.SIMM_16 << 16, Add, &XEmitter::ADD); break; //addis + case 24: + if (a == 0 && s == 0 && inst.UIMM == 0 && !inst.Rc) //check for nop + {NOP(); return;} //make the nop visible in the generated code. not much use but interesting if we see one. + regimmop(a, s, true, inst.UIMM, Or, &XEmitter::OR); + break; //ori + case 25: regimmop(a, s, true, inst.UIMM << 16, Or, &XEmitter::OR, false); break;//oris + case 28: regimmop(a, s, true, inst.UIMM, And, &XEmitter::AND, true); break; + case 29: regimmop(a, s, true, inst.UIMM << 16, And, &XEmitter::AND, true); break; + case 26: regimmop(a, s, true, inst.UIMM, Xor, &XEmitter::XOR, false); break; //xori + case 27: regimmop(a, s, true, inst.UIMM << 16, Xor, &XEmitter::XOR, false); break; //xoris + case 12: //regimmop(d, a, false, (u32)(s32)inst.SIMM_16, Add, XEmitter::ADD, false, true); //addic + case 13: //regimmop(d, a, true, (u32)(s32)inst.SIMM_16, Add, XEmitter::ADD, true, true); //addic_rc + default: + Default(inst); + break; + } +} - INSTRUCTION_START; - int a = inst.RA; - int b = inst.RB; - int crf = inst.CRFD; - - bool merge_branch = false; - int test_crf = js.next_inst.BI >> 2; - // Check if the next intruction is a branch - if it is, merge the two. - if (js.next_inst.OPCD == 16 && (js.next_inst.BO & BO_DONT_DECREMENT_FLAG) && - !(js.next_inst.BO & 16) && (js.next_inst.BO & 4) && !js.next_inst.LK) { +void Jit64::cmpXX(UGeckoInstruction inst) +{ + // USES_CR + INSTRUCTION_START + JITDISABLE(Integer) + int a = inst.RA; + int b = inst.RB; + int crf = inst.CRFD; + + bool merge_branch = false; + int test_crf = js.next_inst.BI >> 2; + // Check if the next intruction is a branch - if it is, merge the two. + if (js.next_inst.OPCD == 16 && (js.next_inst.BO & BO_DONT_DECREMENT_FLAG) && + !(js.next_inst.BO & 16) && (js.next_inst.BO & 4) && !js.next_inst.LK) { // Looks like a decent conditional branch that we can merge with. // It only test CR, not CTR. if (test_crf == crf) { merge_branch = true; } - } + } - Gen::CCFlags less_than, greater_than; - OpArg comparand; - if (inst.OPCD == 31) { - gpr.Lock(a, b); - gpr.LoadToX64(a, true, false); - comparand = gpr.R(b); - if (inst.SUBOP10 == 32) { - //cmpl - less_than = CC_B; - greater_than = CC_A; - } else { - //cmp - less_than = CC_L; - greater_than = CC_G; - } - } - else { - gpr.KillImmediate(a); // todo, optimize instead, but unlikely to make a difference - if (inst.OPCD == 10) { - //cmpli - less_than = CC_B; - greater_than = CC_A; - comparand = Imm32(inst.UIMM); - } else if (inst.OPCD == 11) { - //cmpi - less_than = CC_L; - greater_than = CC_G; - comparand = Imm32((s32)(s16)inst.UIMM); - } - } - - if (!merge_branch) { - // Keep the normal code separate for clarity. - CMP(32, gpr.R(a), comparand); - - FixupBranch pLesser = J_CC(less_than); - FixupBranch pGreater = J_CC(greater_than); - MOV(8, M(&PowerPC::ppcState.cr_fast[crf]), Imm8(0x2)); // _x86Reg == 0 - FixupBranch continue1 = J(); - SetJumpTarget(pGreater); - MOV(8, M(&PowerPC::ppcState.cr_fast[crf]), Imm8(0x4)); // _x86Reg > 0 - FixupBranch continue2 = J(); - SetJumpTarget(pLesser); - MOV(8, M(&PowerPC::ppcState.cr_fast[crf]), Imm8(0x8)); // _x86Reg < 0 - SetJumpTarget(continue1); - SetJumpTarget(continue2); - // TODO: If we ever care about SO, borrow a trick from - // http://maws.mameworld.info/maws/mamesrc/src/emu/cpu/powerpc/drc_ops.c : bt, adc + Gen::CCFlags less_than, greater_than; + OpArg comparand; + if (inst.OPCD == 31) { + gpr.Lock(a, b); + gpr.LoadToX64(a, true, false); + comparand = gpr.R(b); + if (inst.SUBOP10 == 32) { + //cmpl + less_than = CC_B; + greater_than = CC_A; } else { - int test_bit = 8 >> (js.next_inst.BI & 3); - bool condition = (js.next_inst.BO & 8) ? false : true; - CMP(32, gpr.R(a), comparand); - gpr.UnlockAll(); - - u32 destination1; - if (js.next_inst.AA) - destination1 = SignExt16(js.next_inst.BD << 2); - else - destination1 = js.next_compilerPC + SignExt16(js.next_inst.BD << 2); - u32 destination2 = js.next_compilerPC + 4; - - // Test swapping (in the future, will be used to inline across branches the right way) - // if (rand() & 1) - // std::swap(destination1, destination2), condition = !condition; - - gpr.Flush(FLUSH_ALL); - fpr.Flush(FLUSH_ALL); - FixupBranch pLesser = J_CC(less_than); - FixupBranch pGreater = J_CC(greater_than); - MOV(8, M(&PowerPC::ppcState.cr_fast[crf]), Imm8(0x2)); // == 0 - FixupBranch continue1 = J(); - - SetJumpTarget(pGreater); - MOV(8, M(&PowerPC::ppcState.cr_fast[crf]), Imm8(0x4)); // > 0 - FixupBranch continue2 = J(); - - SetJumpTarget(pLesser); - MOV(8, M(&PowerPC::ppcState.cr_fast[crf]), Imm8(0x8)); // < 0 - FixupBranch continue3; - if (!!(8 & test_bit) == condition) continue3 = J(); - if (!!(4 & test_bit) != condition) SetJumpTarget(continue2); - if (!!(2 & test_bit) != condition) SetJumpTarget(continue1); - - WriteExit(destination1, 0); - - if (!!(8 & test_bit) == condition) SetJumpTarget(continue3); - if (!!(4 & test_bit) == condition) SetJumpTarget(continue2); - if (!!(2 & test_bit) == condition) SetJumpTarget(continue1); - - WriteExit(destination2, 1); - - js.cancel = true; + //cmp + less_than = CC_L; + greater_than = CC_G; } + } + else { + gpr.KillImmediate(a); // todo, optimize instead, but unlikely to make a difference + if (inst.OPCD == 10) { + //cmpli + less_than = CC_B; + greater_than = CC_A; + comparand = Imm32(inst.UIMM); + } else if (inst.OPCD == 11) { + //cmpi + less_than = CC_L; + greater_than = CC_G; + comparand = Imm32((s32)(s16)inst.UIMM); + } + } + if (!merge_branch) { + // Keep the normal code separate for clarity. + CMP(32, gpr.R(a), comparand); + + FixupBranch pLesser = J_CC(less_than); + FixupBranch pGreater = J_CC(greater_than); + MOV(8, M(&PowerPC::ppcState.cr_fast[crf]), Imm8(0x2)); // _x86Reg == 0 + FixupBranch continue1 = J(); + SetJumpTarget(pGreater); + MOV(8, M(&PowerPC::ppcState.cr_fast[crf]), Imm8(0x4)); // _x86Reg > 0 + FixupBranch continue2 = J(); + SetJumpTarget(pLesser); + MOV(8, M(&PowerPC::ppcState.cr_fast[crf]), Imm8(0x8)); // _x86Reg < 0 + SetJumpTarget(continue1); + SetJumpTarget(continue2); + // TODO: If we ever care about SO, borrow a trick from + // http://maws.mameworld.info/maws/mamesrc/src/emu/cpu/powerpc/drc_ops.c : bt, adc + } else { + int test_bit = 8 >> (js.next_inst.BI & 3); + bool condition = (js.next_inst.BO & 8) ? false : true; + CMP(32, gpr.R(a), comparand); + gpr.UnlockAll(); + + u32 destination1; + if (js.next_inst.AA) + destination1 = SignExt16(js.next_inst.BD << 2); + else + destination1 = js.next_compilerPC + SignExt16(js.next_inst.BD << 2); + u32 destination2 = js.next_compilerPC + 4; + + // Test swapping (in the future, will be used to inline across branches the right way) + // if (rand() & 1) + // std::swap(destination1, destination2), condition = !condition; + + gpr.Flush(FLUSH_ALL); + fpr.Flush(FLUSH_ALL); + FixupBranch pLesser = J_CC(less_than); + FixupBranch pGreater = J_CC(greater_than); + MOV(8, M(&PowerPC::ppcState.cr_fast[crf]), Imm8(0x2)); // == 0 + FixupBranch continue1 = J(); + + SetJumpTarget(pGreater); + MOV(8, M(&PowerPC::ppcState.cr_fast[crf]), Imm8(0x4)); // > 0 + FixupBranch continue2 = J(); + + SetJumpTarget(pLesser); + MOV(8, M(&PowerPC::ppcState.cr_fast[crf]), Imm8(0x8)); // < 0 + FixupBranch continue3; + if (!!(8 & test_bit) == condition) continue3 = J(); + if (!!(4 & test_bit) != condition) SetJumpTarget(continue2); + if (!!(2 & test_bit) != condition) SetJumpTarget(continue1); + + WriteExit(destination1, 0); + + if (!!(8 & test_bit) == condition) SetJumpTarget(continue3); + if (!!(4 & test_bit) == condition) SetJumpTarget(continue2); + if (!!(2 & test_bit) == condition) SetJumpTarget(continue1); + + WriteExit(destination2, 1); + + js.cancel = true; + } + + gpr.UnlockAll(); +} + +void Jit64::orx(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(Integer) + int a = inst.RA; + int s = inst.RS; + int b = inst.RB; + + if (s == b && s != a) + { + gpr.Lock(a,s); + gpr.LoadToX64(a, false); + MOV(32, gpr.R(a), gpr.R(s)); + gpr.UnlockAll(); + } + else + { + gpr.Lock(a, s, b); + gpr.LoadToX64(a, (a == s || a == b), true); + if (a == s) + OR(32, gpr.R(a), gpr.R(b)); + else if (a == b) + OR(32, gpr.R(a), gpr.R(s)); + else { + MOV(32, gpr.R(a), gpr.R(b)); + OR(32, gpr.R(a), gpr.R(s)); + } gpr.UnlockAll(); } - void Jit64::orx(UGeckoInstruction inst) + if (inst.Rc) { - if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITIntegerOff) - {Default(inst); return;} // turn off from debugger - - INSTRUCTION_START; - int a = inst.RA; - int s = inst.RS; - int b = inst.RB; - - if (s == b && s != a) - { - gpr.Lock(a,s); - gpr.LoadToX64(a, false); - MOV(32, gpr.R(a), gpr.R(s)); - gpr.UnlockAll(); - } - else - { - gpr.Lock(a, s, b); - gpr.LoadToX64(a, (a == s || a == b), true); - if (a == s) - OR(32, gpr.R(a), gpr.R(b)); - else if (a == b) - OR(32, gpr.R(a), gpr.R(s)); - else { - MOV(32, gpr.R(a), gpr.R(b)); - OR(32, gpr.R(a), gpr.R(s)); - } - gpr.UnlockAll(); - } - - if (inst.Rc) - { - MOV(32, R(EAX), gpr.R(a)); - CALL((u8*)asm_routines.computeRc); - } + MOV(32, R(EAX), gpr.R(a)); + CALL((u8*)asm_routines.computeRc); } +} - - // m_GPR[_inst.RA] = m_GPR[_inst.RS] ^ m_GPR[_inst.RB]; - void Jit64::xorx(UGeckoInstruction inst) - { - if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITIntegerOff) - {Default(inst); return;} // turn off from debugger - INSTRUCTION_START; - int a = inst.RA; - int s = inst.RS; - int b = inst.RB; +// m_GPR[_inst.RA] = m_GPR[_inst.RS] ^ m_GPR[_inst.RB]; +void Jit64::xorx(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(Integer) + int a = inst.RA; + int s = inst.RS; + int b = inst.RB; - if (s == b) { - gpr.SetImmediate32(a, 0); - } - else - { - gpr.LoadToX64(a, a == s || a == b, true); - gpr.Lock(a, s, b); - MOV(32, R(EAX), gpr.R(s)); - XOR(32, R(EAX), gpr.R(b)); - MOV(32, gpr.R(a), R(EAX)); - gpr.UnlockAll(); - } - - if (inst.Rc) - { - MOV(32, R(EAX), gpr.R(a)); - CALL((u8*)asm_routines.computeRc); - } + if (s == b) { + gpr.SetImmediate32(a, 0); } - - void Jit64::andx(UGeckoInstruction inst) + else { - if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITIntegerOff) - {Default(inst); return;} // turn off from debugger - - INSTRUCTION_START; - int a = inst.RA, s = inst.RS, b = inst.RB; - if (a != s && a != b) { - gpr.LoadToX64(a, false, true); - } else { - gpr.LoadToX64(a, true, true); - } + gpr.LoadToX64(a, a == s || a == b, true); gpr.Lock(a, s, b); MOV(32, R(EAX), gpr.R(s)); - AND(32, R(EAX), gpr.R(b)); + XOR(32, R(EAX), gpr.R(b)); MOV(32, gpr.R(a), R(EAX)); gpr.UnlockAll(); - - if (inst.Rc) { - // result is already in eax - CALL((u8*)asm_routines.computeRc); - } } - void Jit64::extsbx(UGeckoInstruction inst) + if (inst.Rc) { - if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITIntegerOff) - {Default(inst); return;} // turn off from debugger - - INSTRUCTION_START; - int a = inst.RA, - s = inst.RS; - gpr.LoadToX64(a, a == s, true); - // Always force moving to EAX because it isn't possible - // to refer to the lowest byte of some registers, at least in - // 32-bit mode. - MOV(32, R(EAX), gpr.R(s)); - MOVSX(32, 8, gpr.RX(a), R(AL)); // watch out for ah and friends - if (inst.Rc) { - MOV(32, R(EAX), gpr.R(a)); - CALL((u8*)asm_routines.computeRc); - } - } - - void Jit64::extshx(UGeckoInstruction inst) - { - if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITIntegerOff) - {Default(inst); return;} // turn off from debugger - - INSTRUCTION_START; - int a = inst.RA, s = inst.RS; - gpr.KillImmediate(s); - gpr.LoadToX64(a, a == s, true); - // This looks a little dangerous, but it's safe because - // every 32-bit register has a 16-bit half at the same index - // as the 32-bit register. - MOVSX(32, 16, gpr.RX(a), gpr.R(s)); - if (inst.Rc) { - MOV(32, R(EAX), gpr.R(a)); - CALL((u8*)asm_routines.computeRc); - } - } - - void Jit64::subfic(UGeckoInstruction inst) - { - if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITIntegerOff) - {Default(inst); return;} // turn off from debugger - - INSTRUCTION_START; - int a = inst.RA, d = inst.RD; - gpr.FlushLockX(ECX); - gpr.Lock(a, d); - gpr.LoadToX64(d, a == d, true); - int imm = inst.SIMM_16; MOV(32, R(EAX), gpr.R(a)); - NOT(32, R(EAX)); - ADD(32, R(EAX), Imm32(imm + 1)); - MOV(32, gpr.R(d), R(EAX)); - GenerateCarry(ECX); - gpr.UnlockAll(); - gpr.UnlockAllX(); - // This instruction has no RC flag + CALL((u8*)asm_routines.computeRc); } +} - void Jit64::subfcx(UGeckoInstruction inst) - { - INSTRUCTION_START; - Default(inst); - return; - /* - u32 a = m_GPR[_inst.RA]; - u32 b = m_GPR[_inst.RB]; - m_GPR[_inst.RD] = b - a; - SetCarry(a == 0 || Helper_Carry(b, 0-a)); - - if (_inst.OE) PanicAlert("OE: subfcx"); - if (_inst.Rc) Helper_UpdateCR0(m_GPR[_inst.RD]); - */ +void Jit64::andx(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(Integer) + int a = inst.RA, s = inst.RS, b = inst.RB; + if (a != s && a != b) { + gpr.LoadToX64(a, false, true); + } else { + gpr.LoadToX64(a, true, true); } + gpr.Lock(a, s, b); + MOV(32, R(EAX), gpr.R(s)); + AND(32, R(EAX), gpr.R(b)); + MOV(32, gpr.R(a), R(EAX)); + gpr.UnlockAll(); - void Jit64::subfex(UGeckoInstruction inst) - { - INSTRUCTION_START; - Default(inst); - return; - /* - u32 a = m_GPR[_inst.RA]; - u32 b = m_GPR[_inst.RB]; - int carry = GetCarry(); - m_GPR[_inst.RD] = (~a) + b + carry; - SetCarry(Helper_Carry(~a, b) || Helper_Carry((~a) + b, carry)); - - if (_inst.OE) PanicAlert("OE: subfcx"); - if (_inst.Rc) Helper_UpdateCR0(m_GPR[_inst.RD]); - */ + if (inst.Rc) { + // result is already in eax + CALL((u8*)asm_routines.computeRc); } +} - void Jit64::subfx(UGeckoInstruction inst) - { - if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITIntegerOff) - {Default(inst); return;} // turn off from debugger - - INSTRUCTION_START; - int a = inst.RA, b = inst.RB, d = inst.RD; - gpr.Lock(a, b, d); - if (d != a && d != b) { - gpr.LoadToX64(d, false, true); - } else { - gpr.LoadToX64(d, true, true); - } - MOV(32, R(EAX), gpr.R(b)); - SUB(32, R(EAX), gpr.R(a)); - MOV(32, gpr.R(d), R(EAX)); - gpr.UnlockAll(); - if (inst.OE) PanicAlert("OE: subfx"); - if (inst.Rc) { - // result is already in eax - CALL((u8*)asm_routines.computeRc); - } - } - - void Jit64::mulli(UGeckoInstruction inst) - { - if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITIntegerOff) - {Default(inst); return;} // turn off from debugger - - INSTRUCTION_START; - int a = inst.RA, d = inst.RD; - gpr.Lock(a, d); - gpr.LoadToX64(d, (d == a), true); - gpr.KillImmediate(a); - IMUL(32, gpr.RX(d), gpr.R(a), Imm32((u32)(s32)inst.SIMM_16)); - gpr.UnlockAll(); - } - - void Jit64::mullwx(UGeckoInstruction inst) - { - if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITIntegerOff) - {Default(inst); return;} // turn off from debugger - - INSTRUCTION_START; - int a = inst.RA, b = inst.RB, d = inst.RD; - gpr.Lock(a, b, d); - gpr.LoadToX64(d, (d == a || d == b), true); - if (d == a) { - IMUL(32, gpr.RX(d), gpr.R(b)); - } else if (d == b) { - IMUL(32, gpr.RX(d), gpr.R(a)); - } else { - MOV(32, gpr.R(d), gpr.R(b)); - IMUL(32, gpr.RX(d), gpr.R(a)); - } - gpr.UnlockAll(); - if (inst.Rc) { - MOV(32, R(EAX), gpr.R(d)); - CALL((u8*)asm_routines.computeRc); - } - } - - void Jit64::mulhwux(UGeckoInstruction inst) - { - if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITIntegerOff) - {Default(inst); return;} // turn off from debugger - - INSTRUCTION_START; - int a = inst.RA, b = inst.RB, d = inst.RD; - gpr.FlushLockX(EDX); - gpr.Lock(a, b, d); - if (d != a && d != b) { - gpr.LoadToX64(d, false, true); - } else { - gpr.LoadToX64(d, true, true); - } - if (gpr.RX(d) == EDX) - PanicAlert("mulhwux : WTF"); +void Jit64::extsbx(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(Integer) + int a = inst.RA, + s = inst.RS; + gpr.LoadToX64(a, a == s, true); + // Always force moving to EAX because it isn't possible + // to refer to the lowest byte of some registers, at least in + // 32-bit mode. + MOV(32, R(EAX), gpr.R(s)); + MOVSX(32, 8, gpr.RX(a), R(AL)); // watch out for ah and friends + if (inst.Rc) { MOV(32, R(EAX), gpr.R(a)); - gpr.KillImmediate(b); - MUL(32, gpr.R(b)); - gpr.UnlockAll(); - gpr.UnlockAllX(); - if (inst.Rc) { - MOV(32, R(EAX), R(EDX)); - MOV(32, gpr.R(d), R(EDX)); - // result is already in eax - CALL((u8*)asm_routines.computeRc); - } else { - MOV(32, gpr.R(d), R(EDX)); - } + CALL((u8*)asm_routines.computeRc); } +} - // skipped some of the special handling in here - if we get crashes, let the interpreter handle this op - void Jit64::divwux(UGeckoInstruction inst) { - Default(inst); return; - - int a = inst.RA, b = inst.RB, d = inst.RD; - gpr.FlushLockX(EDX); - gpr.Lock(a, b, d); - if (d != a && d != b) { - gpr.LoadToX64(d, false, true); - } else { - gpr.LoadToX64(d, true, true); - } +void Jit64::extshx(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(Integer) + int a = inst.RA, s = inst.RS; + gpr.KillImmediate(s); + gpr.LoadToX64(a, a == s, true); + // This looks a little dangerous, but it's safe because + // every 32-bit register has a 16-bit half at the same index + // as the 32-bit register. + MOVSX(32, 16, gpr.RX(a), gpr.R(s)); + if (inst.Rc) { MOV(32, R(EAX), gpr.R(a)); - XOR(32, R(EDX), R(EDX)); - gpr.KillImmediate(b); - DIV(32, gpr.R(b)); - MOV(32, gpr.R(d), R(EAX)); - gpr.UnlockAll(); - gpr.UnlockAllX(); - if (inst.Rc) { - CALL((u8*)asm_routines.computeRc); - } + CALL((u8*)asm_routines.computeRc); } +} - u32 Helper_Mask(u8 mb, u8 me) - { - return (((mb > me) ? - ~(((u32)-1 >> mb) ^ ((me >= 31) ? 0 : (u32) -1 >> (me + 1))) - : - (((u32)-1 >> mb) ^ ((me >= 31) ? 0 : (u32) -1 >> (me + 1)))) - ); +void Jit64::subfic(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(Integer) + int a = inst.RA, d = inst.RD; + gpr.FlushLockX(ECX); + gpr.Lock(a, d); + gpr.LoadToX64(d, a == d, true); + int imm = inst.SIMM_16; + MOV(32, R(EAX), gpr.R(a)); + NOT(32, R(EAX)); + ADD(32, R(EAX), Imm32(imm + 1)); + MOV(32, gpr.R(d), R(EAX)); + GenerateCarry(ECX); + gpr.UnlockAll(); + gpr.UnlockAllX(); + // This instruction has no RC flag +} + +void Jit64::subfcx(UGeckoInstruction inst) +{ + INSTRUCTION_START; + Default(inst); + return; + /* + u32 a = m_GPR[_inst.RA]; + u32 b = m_GPR[_inst.RB]; + m_GPR[_inst.RD] = b - a; + SetCarry(a == 0 || Helper_Carry(b, 0-a)); + + if (_inst.OE) PanicAlert("OE: subfcx"); + if (_inst.Rc) Helper_UpdateCR0(m_GPR[_inst.RD]); + */ +} + +void Jit64::subfex(UGeckoInstruction inst) +{ + INSTRUCTION_START; + Default(inst); + return; + /* + u32 a = m_GPR[_inst.RA]; + u32 b = m_GPR[_inst.RB]; + int carry = GetCarry(); + m_GPR[_inst.RD] = (~a) + b + carry; + SetCarry(Helper_Carry(~a, b) || Helper_Carry((~a) + b, carry)); + + if (_inst.OE) PanicAlert("OE: subfcx"); + if (_inst.Rc) Helper_UpdateCR0(m_GPR[_inst.RD]); + */ +} + +void Jit64::subfx(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(Integer) + int a = inst.RA, b = inst.RB, d = inst.RD; + gpr.Lock(a, b, d); + if (d != a && d != b) { + gpr.LoadToX64(d, false, true); + } else { + gpr.LoadToX64(d, true, true); } - - void Jit64::addx(UGeckoInstruction inst) - { - if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITIntegerOff) - {Default(inst); return;} // turn off from debugger - - INSTRUCTION_START; - int a = inst.RA, b = inst.RB, d = inst.RD; - _assert_msg_(DYNA_REC, !inst.OE, "Add - OE enabled :("); - - if (a != d && b != d && a != b) - { - gpr.Lock(a, b, d); - gpr.LoadToX64(d, false); - if (gpr.R(a).IsSimpleReg() && gpr.R(b).IsSimpleReg()) { - LEA(32, gpr.RX(d), MComplex(gpr.RX(a), gpr.RX(b), 1, 0)); - } else { - MOV(32, gpr.R(d), gpr.R(a)); - ADD(32, gpr.R(d), gpr.R(b)); - } - if (inst.Rc) - { - MOV(32, R(EAX), gpr.R(d)); - CALL((u8*)asm_routines.computeRc); - } - gpr.UnlockAll(); - } - else if (d == a && d != b) - { - gpr.Lock(b, d); - gpr.LoadToX64(d, true); - ADD(32, gpr.R(d), gpr.R(b)); - if (inst.Rc) - { - MOV(32, R(EAX), gpr.R(d)); - CALL((u8*)asm_routines.computeRc); - } - gpr.UnlockAll(); - } - else if (d == b && d != a) - { - gpr.Lock(a, d); - gpr.LoadToX64(d, true); - ADD(32, gpr.R(d), gpr.R(a)); - if (inst.Rc) - { - MOV(32, R(EAX), gpr.R(d)); - CALL((u8*)asm_routines.computeRc); - } - gpr.UnlockAll(); - } - else - { - Default(inst); return; - } + MOV(32, R(EAX), gpr.R(b)); + SUB(32, R(EAX), gpr.R(a)); + MOV(32, gpr.R(d), R(EAX)); + gpr.UnlockAll(); + if (inst.OE) PanicAlert("OE: subfx"); + if (inst.Rc) { + // result is already in eax + CALL((u8*)asm_routines.computeRc); } +} - // This can be optimized - void Jit64::addex(UGeckoInstruction inst) +void Jit64::mulli(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(Integer) + int a = inst.RA, d = inst.RD; + gpr.Lock(a, d); + gpr.LoadToX64(d, (d == a), true); + gpr.KillImmediate(a); + IMUL(32, gpr.RX(d), gpr.R(a), Imm32((u32)(s32)inst.SIMM_16)); + gpr.UnlockAll(); +} + +void Jit64::mullwx(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(Integer) + int a = inst.RA, b = inst.RB, d = inst.RD; + gpr.Lock(a, b, d); + gpr.LoadToX64(d, (d == a || d == b), true); + if (d == a) { + IMUL(32, gpr.RX(d), gpr.R(b)); + } else if (d == b) { + IMUL(32, gpr.RX(d), gpr.R(a)); + } else { + MOV(32, gpr.R(d), gpr.R(b)); + IMUL(32, gpr.RX(d), gpr.R(a)); + } + gpr.UnlockAll(); + if (inst.Rc) { + MOV(32, R(EAX), gpr.R(d)); + CALL((u8*)asm_routines.computeRc); + } +} + +void Jit64::mulhwux(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(Integer) + int a = inst.RA, b = inst.RB, d = inst.RD; + gpr.FlushLockX(EDX); + gpr.Lock(a, b, d); + if (d != a && d != b) { + gpr.LoadToX64(d, false, true); + } else { + gpr.LoadToX64(d, true, true); + } + if (gpr.RX(d) == EDX) + PanicAlert("mulhwux : WTF"); + MOV(32, R(EAX), gpr.R(a)); + gpr.KillImmediate(b); + MUL(32, gpr.R(b)); + gpr.UnlockAll(); + gpr.UnlockAllX(); + if (inst.Rc) { + MOV(32, R(EAX), R(EDX)); + MOV(32, gpr.R(d), R(EDX)); + // result is already in eax + CALL((u8*)asm_routines.computeRc); + } else { + MOV(32, gpr.R(d), R(EDX)); + } +} + +// skipped some of the special handling in here - if we get crashes, let the interpreter handle this op +void Jit64::divwux(UGeckoInstruction inst) { + Default(inst); return; + + int a = inst.RA, b = inst.RB, d = inst.RD; + gpr.FlushLockX(EDX); + gpr.Lock(a, b, d); + if (d != a && d != b) { + gpr.LoadToX64(d, false, true); + } else { + gpr.LoadToX64(d, true, true); + } + MOV(32, R(EAX), gpr.R(a)); + XOR(32, R(EDX), R(EDX)); + gpr.KillImmediate(b); + DIV(32, gpr.R(b)); + MOV(32, gpr.R(d), R(EAX)); + gpr.UnlockAll(); + gpr.UnlockAllX(); + if (inst.Rc) { + CALL((u8*)asm_routines.computeRc); + } +} + +u32 Helper_Mask(u8 mb, u8 me) +{ + return (((mb > me) ? + ~(((u32)-1 >> mb) ^ ((me >= 31) ? 0 : (u32) -1 >> (me + 1))) + : + (((u32)-1 >> mb) ^ ((me >= 31) ? 0 : (u32) -1 >> (me + 1)))) + ); +} + +void Jit64::addx(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(Integer) + int a = inst.RA, b = inst.RB, d = inst.RD; + _assert_msg_(DYNA_REC, !inst.OE, "Add - OE enabled :("); + + if (a != d && b != d && a != b) { - // USES_XER - if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITIntegerOff) - {Default(inst); return;} // turn off from debugger - - INSTRUCTION_START; - int a = inst.RA, b = inst.RB, d = inst.RD; - gpr.FlushLockX(ECX); gpr.Lock(a, b, d); - if (d != a && d != b) - gpr.LoadToX64(d, false); - else - gpr.LoadToX64(d, true); - MOV(32, R(EAX), M(&PowerPC::ppcState.spr[SPR_XER])); - SHR(32, R(EAX), Imm8(30)); // shift the carry flag out into the x86 carry flag - MOV(32, R(EAX), gpr.R(a)); - ADC(32, R(EAX), gpr.R(b)); - MOV(32, gpr.R(d), R(EAX)); - GenerateCarry(ECX); - gpr.UnlockAll(); - gpr.UnlockAllX(); - if (inst.Rc) - { - CALL((u8*)asm_routines.computeRc); - } - } - - void Jit64::rlwinmx(UGeckoInstruction inst) - { - if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITIntegerOff) - {Default(inst); return;} // turn off from debugger - - INSTRUCTION_START; - int a = inst.RA; - int s = inst.RS; - if (gpr.R(s).IsImm() && !inst.Rc) - { - unsigned result = (int)gpr.R(s).offset; - if (inst.SH != 0) - result = _rotl(result, inst.SH); - result &= Helper_Mask(inst.MB, inst.ME); - gpr.SetImmediate32(a, result); - return; - } - - gpr.Lock(a, s); - gpr.LoadToX64(a, a == s); - if (a != s) - { - MOV(32, gpr.R(a), gpr.R(s)); - } - - if (inst.MB == 0 && inst.ME==31-inst.SH) - { - SHL(32, gpr.R(a), Imm8(inst.SH)); - } - else if (inst.ME == 31 && inst.MB == 32 - inst.SH) - { - SHR(32, gpr.R(a), Imm8(inst.MB)); - } - else - { - bool written = false; - if (inst.SH != 0) - { - ROL(32, gpr.R(a), Imm8(inst.SH)); - written = true; - } - if (!(inst.MB==0 && inst.ME==31)) - { - written = true; - AND(32, gpr.R(a), Imm32(Helper_Mask(inst.MB, inst.ME))); - } - _assert_msg_(DYNA_REC, written, "W T F!!!"); - } - gpr.UnlockAll(); - - if (inst.Rc) - { - MOV(32, R(EAX), gpr.R(a)); - CALL((u8*)asm_routines.computeRc); - } - } - - - void Jit64::rlwimix(UGeckoInstruction inst) - { - if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITIntegerOff) - {Default(inst); return;} // turn off from debugger - - INSTRUCTION_START; - int a = inst.RA; - int s = inst.RS; - if (gpr.R(a).IsImm() || gpr.R(s).IsImm()) - { - Default(inst); - return; - } - - if (a != s) - { - gpr.Lock(a, s); - gpr.LoadToX64(a, true); - } - - u32 mask = Helper_Mask(inst.MB, inst.ME); - MOV(32, R(EAX), gpr.R(s)); - AND(32, gpr.R(a), Imm32(~mask)); - if (inst.SH) - ROL(32, R(EAX), Imm8(inst.SH)); - AND(32, R(EAX), Imm32(mask)); - OR(32, gpr.R(a), R(EAX)); - gpr.UnlockAll(); - if (inst.Rc) - { - MOV(32, R(EAX), gpr.R(a)); - CALL((u8*)asm_routines.computeRc); - } - } - - void Jit64::rlwnmx(UGeckoInstruction inst) - { - if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITIntegerOff) - {Default(inst); return;} // turn off from debugger - - INSTRUCTION_START; - int a = inst.RA, b = inst.RB, s = inst.RS; - if (gpr.R(a).IsImm()) - { - Default(inst); - return; - } - - u32 mask = Helper_Mask(inst.MB, inst.ME); - gpr.FlushLockX(ECX); - gpr.Lock(a, b, s); - MOV(32, R(EAX), gpr.R(s)); - MOV(32, R(ECX), gpr.R(b)); - AND(32, R(ECX), Imm32(0x1f)); - ROL(32, R(EAX), R(ECX)); - AND(32, R(EAX), Imm32(mask)); - MOV(32, gpr.R(a), R(EAX)); - gpr.UnlockAll(); - gpr.UnlockAllX(); - if (inst.Rc) - { - MOV(32, R(EAX), gpr.R(a)); - CALL((u8*)asm_routines.computeRc); - } - } - - void Jit64::negx(UGeckoInstruction inst) - { - if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITIntegerOff) - {Default(inst); return;} // turn off from debugger - - INSTRUCTION_START; - int a = inst.RA; - int d = inst.RD; - gpr.Lock(a, d); - gpr.LoadToX64(d, a == d, true); - if (a != d) + gpr.LoadToX64(d, false); + if (gpr.R(a).IsSimpleReg() && gpr.R(b).IsSimpleReg()) { + LEA(32, gpr.RX(d), MComplex(gpr.RX(a), gpr.RX(b), 1, 0)); + } else { MOV(32, gpr.R(d), gpr.R(a)); - NEG(32, gpr.R(d)); - gpr.UnlockAll(); + ADD(32, gpr.R(d), gpr.R(b)); + } if (inst.Rc) { MOV(32, R(EAX), gpr.R(d)); CALL((u8*)asm_routines.computeRc); } - } - - void Jit64::srwx(UGeckoInstruction inst) - { - if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITIntegerOff) - {Default(inst); return;} // turn off from debugger - - INSTRUCTION_START; - int a = inst.RA; - int b = inst.RB; - int s = inst.RS; - gpr.FlushLockX(ECX); - gpr.Lock(a, b, s); - gpr.LoadToX64(a, a == s || a == b || s == b, true); - MOV(32, R(ECX), gpr.R(b)); - XOR(32, R(EAX), R(EAX)); - TEST(32, R(ECX), Imm32(32)); - FixupBranch branch = J_CC(CC_NZ); - MOV(32, R(EAX), gpr.R(s)); - SHR(32, R(EAX), R(ECX)); - SetJumpTarget(branch); - MOV(32, gpr.R(a), R(EAX)); gpr.UnlockAll(); - gpr.UnlockAllX(); - if (inst.Rc) - { - MOV(32, R(EAX), gpr.R(a)); - CALL((u8*)asm_routines.computeRc); - } } - - void Jit64::slwx(UGeckoInstruction inst) + else if (d == a && d != b) { - if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITIntegerOff) - {Default(inst); return;} // turn off from debugger - - INSTRUCTION_START; - int a = inst.RA; - int b = inst.RB; - int s = inst.RS; - gpr.FlushLockX(ECX); - gpr.Lock(a, b, s); - gpr.LoadToX64(a, a == s || a == b || s == b, true); - MOV(32, R(ECX), gpr.R(b)); - XOR(32, R(EAX), R(EAX)); - TEST(32, R(ECX), Imm32(32)); - FixupBranch branch = J_CC(CC_NZ); - MOV(32, R(EAX), gpr.R(s)); - SHL(32, R(EAX), R(ECX)); - SetJumpTarget(branch); - MOV(32, gpr.R(a), R(EAX)); - gpr.UnlockAll(); - gpr.UnlockAllX(); - if (inst.Rc) - { - MOV(32, R(EAX), gpr.R(a)); - CALL((u8*)asm_routines.computeRc); - } - } - - void Jit64::srawx(UGeckoInstruction inst) - { - // USES_XER - if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITIntegerOff) - {Default(inst); return;} // turn off from debugger - - INSTRUCTION_START; - int a = inst.RA; - int b = inst.RB; - int s = inst.RS; - gpr.Lock(a, s); - gpr.FlushLockX(ECX); - gpr.LoadToX64(a, a == s || a == b, true); - MOV(32, R(ECX), gpr.R(b)); - TEST(32, R(ECX), Imm32(32)); - FixupBranch topBitSet = J_CC(CC_NZ); - if (a != s) - MOV(32, gpr.R(a), gpr.R(s)); - MOV(32, R(EAX), Imm32(1)); - SHL(32, R(EAX), R(ECX)); - ADD(32, R(EAX), Imm32(0x7FFFFFFF)); - AND(32, R(EAX), gpr.R(a)); - ADD(32, R(EAX), Imm32(-1)); - CMP(32, R(EAX), Imm32(-1)); - SETcc(CC_L, R(EAX)); - SAR(32, gpr.R(a), R(ECX)); - AND(32, M(&PowerPC::ppcState.spr[SPR_XER]), Imm32(~(1 << 29))); - SHL(32, R(EAX), Imm8(29)); - OR(32, M(&PowerPC::ppcState.spr[SPR_XER]), R(EAX)); - FixupBranch end = J(); - SetJumpTarget(topBitSet); - MOV(32, R(EAX), gpr.R(s)); - SAR(32, R(EAX), Imm8(31)); - MOV(32, gpr.R(a), R(EAX)); - AND(32, M(&PowerPC::ppcState.spr[SPR_XER]), Imm32(~(1 << 29))); - AND(32, R(EAX), Imm32(1<<29)); - OR(32, M(&PowerPC::ppcState.spr[SPR_XER]), R(EAX)); - SetJumpTarget(end); - gpr.UnlockAll(); - gpr.UnlockAllX(); - - if (inst.Rc) { - MOV(32, R(EAX), gpr.R(a)); - CALL((u8*)asm_routines.computeRc); - } - } - - void Jit64::srawix(UGeckoInstruction inst) - { - if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITIntegerOff) - {Default(inst); return;} // turn off from debugger - - INSTRUCTION_START; - int a = inst.RA; - int s = inst.RS; - int amount = inst.SH; - if (amount != 0) - { - gpr.Lock(a, s); - gpr.LoadToX64(a, a == s, true); - MOV(32, R(EAX), gpr.R(s)); - MOV(32, gpr.R(a), R(EAX)); - SAR(32, gpr.R(a), Imm8(amount)); - CMP(32, R(EAX), Imm8(0)); - FixupBranch nocarry1 = J_CC(CC_GE); - TEST(32, R(EAX), Imm32((u32)0xFFFFFFFF >> (32 - amount))); // were any 1s shifted out? - FixupBranch nocarry2 = J_CC(CC_Z); - JitSetCA(); - FixupBranch carry = J(false); - SetJumpTarget(nocarry1); - SetJumpTarget(nocarry2); - JitClearCA(); - SetJumpTarget(carry); - gpr.UnlockAll(); - } - else - { - Default(inst); return; - gpr.Lock(a, s); - JitClearCA(); - gpr.LoadToX64(a, a == s, true); - if (a != s) - MOV(32, gpr.R(a), gpr.R(s)); - gpr.UnlockAll(); - } - - if (inst.Rc) { - MOV(32, R(EAX), gpr.R(a)); - CALL((u8*)asm_routines.computeRc); - } - } - - // count leading zeroes - void Jit64::cntlzwx(UGeckoInstruction inst) - { - if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITIntegerOff) - {Default(inst); return;} // turn off from debugger - - INSTRUCTION_START; - int a = inst.RA; - int s = inst.RS; - if (gpr.R(a).IsImm() || gpr.R(s).IsImm() || s == a) - { - Default(inst); - return; - } - gpr.Lock(a, s); - gpr.LoadToX64(a, false); - BSR(32, gpr.R(a).GetSimpleReg(), gpr.R(s)); - FixupBranch gotone = J_CC(CC_NZ); - MOV(32, gpr.R(a), Imm32(63)); - SetJumpTarget(gotone); - XOR(32, gpr.R(a), Imm8(0x1f)); // flip order - gpr.UnlockAll(); - + gpr.Lock(b, d); + gpr.LoadToX64(d, true); + ADD(32, gpr.R(d), gpr.R(b)); if (inst.Rc) { - MOV(32, R(EAX), gpr.R(a)); + MOV(32, R(EAX), gpr.R(d)); CALL((u8*)asm_routines.computeRc); - // TODO: Check PPC manual too } + gpr.UnlockAll(); } + else if (d == b && d != a) + { + gpr.Lock(a, d); + gpr.LoadToX64(d, true); + ADD(32, gpr.R(d), gpr.R(a)); + if (inst.Rc) + { + MOV(32, R(EAX), gpr.R(d)); + CALL((u8*)asm_routines.computeRc); + } + gpr.UnlockAll(); + } + else + { + Default(inst); return; + } +} + +// This can be optimized +void Jit64::addex(UGeckoInstruction inst) +{ + // USES_XER + INSTRUCTION_START + JITDISABLE(Integer) + int a = inst.RA, b = inst.RB, d = inst.RD; + gpr.FlushLockX(ECX); + gpr.Lock(a, b, d); + if (d != a && d != b) + gpr.LoadToX64(d, false); + else + gpr.LoadToX64(d, true); + MOV(32, R(EAX), M(&PowerPC::ppcState.spr[SPR_XER])); + SHR(32, R(EAX), Imm8(30)); // shift the carry flag out into the x86 carry flag + MOV(32, R(EAX), gpr.R(a)); + ADC(32, R(EAX), gpr.R(b)); + MOV(32, gpr.R(d), R(EAX)); + GenerateCarry(ECX); + gpr.UnlockAll(); + gpr.UnlockAllX(); + if (inst.Rc) + { + CALL((u8*)asm_routines.computeRc); + } +} + +void Jit64::rlwinmx(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(Integer) + int a = inst.RA; + int s = inst.RS; + if (gpr.R(s).IsImm() && !inst.Rc) + { + unsigned result = (int)gpr.R(s).offset; + if (inst.SH != 0) + result = _rotl(result, inst.SH); + result &= Helper_Mask(inst.MB, inst.ME); + gpr.SetImmediate32(a, result); + return; + } + + gpr.Lock(a, s); + gpr.LoadToX64(a, a == s); + if (a != s) + { + MOV(32, gpr.R(a), gpr.R(s)); + } + + if (inst.MB == 0 && inst.ME==31-inst.SH) + { + SHL(32, gpr.R(a), Imm8(inst.SH)); + } + else if (inst.ME == 31 && inst.MB == 32 - inst.SH) + { + SHR(32, gpr.R(a), Imm8(inst.MB)); + } + else + { + bool written = false; + if (inst.SH != 0) + { + ROL(32, gpr.R(a), Imm8(inst.SH)); + written = true; + } + if (!(inst.MB==0 && inst.ME==31)) + { + written = true; + AND(32, gpr.R(a), Imm32(Helper_Mask(inst.MB, inst.ME))); + } + _assert_msg_(DYNA_REC, written, "W T F!!!"); + } + gpr.UnlockAll(); + + if (inst.Rc) + { + MOV(32, R(EAX), gpr.R(a)); + CALL((u8*)asm_routines.computeRc); + } +} + + +void Jit64::rlwimix(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(Integer) + int a = inst.RA; + int s = inst.RS; + if (gpr.R(a).IsImm() || gpr.R(s).IsImm()) + { + Default(inst); + return; + } + + if (a != s) + { + gpr.Lock(a, s); + gpr.LoadToX64(a, true); + } + + u32 mask = Helper_Mask(inst.MB, inst.ME); + MOV(32, R(EAX), gpr.R(s)); + AND(32, gpr.R(a), Imm32(~mask)); + if (inst.SH) + ROL(32, R(EAX), Imm8(inst.SH)); + AND(32, R(EAX), Imm32(mask)); + OR(32, gpr.R(a), R(EAX)); + gpr.UnlockAll(); + if (inst.Rc) + { + MOV(32, R(EAX), gpr.R(a)); + CALL((u8*)asm_routines.computeRc); + } +} + +void Jit64::rlwnmx(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(Integer) + int a = inst.RA, b = inst.RB, s = inst.RS; + if (gpr.R(a).IsImm()) + { + Default(inst); + return; + } + + u32 mask = Helper_Mask(inst.MB, inst.ME); + gpr.FlushLockX(ECX); + gpr.Lock(a, b, s); + MOV(32, R(EAX), gpr.R(s)); + MOV(32, R(ECX), gpr.R(b)); + AND(32, R(ECX), Imm32(0x1f)); + ROL(32, R(EAX), R(ECX)); + AND(32, R(EAX), Imm32(mask)); + MOV(32, gpr.R(a), R(EAX)); + gpr.UnlockAll(); + gpr.UnlockAllX(); + if (inst.Rc) + { + MOV(32, R(EAX), gpr.R(a)); + CALL((u8*)asm_routines.computeRc); + } +} + +void Jit64::negx(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(Integer) + int a = inst.RA; + int d = inst.RD; + gpr.Lock(a, d); + gpr.LoadToX64(d, a == d, true); + if (a != d) + MOV(32, gpr.R(d), gpr.R(a)); + NEG(32, gpr.R(d)); + gpr.UnlockAll(); + if (inst.Rc) + { + MOV(32, R(EAX), gpr.R(d)); + CALL((u8*)asm_routines.computeRc); + } +} + +void Jit64::srwx(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(Integer) + int a = inst.RA; + int b = inst.RB; + int s = inst.RS; + gpr.FlushLockX(ECX); + gpr.Lock(a, b, s); + gpr.LoadToX64(a, a == s || a == b || s == b, true); + MOV(32, R(ECX), gpr.R(b)); + XOR(32, R(EAX), R(EAX)); + TEST(32, R(ECX), Imm32(32)); + FixupBranch branch = J_CC(CC_NZ); + MOV(32, R(EAX), gpr.R(s)); + SHR(32, R(EAX), R(ECX)); + SetJumpTarget(branch); + MOV(32, gpr.R(a), R(EAX)); + gpr.UnlockAll(); + gpr.UnlockAllX(); + if (inst.Rc) + { + MOV(32, R(EAX), gpr.R(a)); + CALL((u8*)asm_routines.computeRc); + } +} + +void Jit64::slwx(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(Integer) + int a = inst.RA; + int b = inst.RB; + int s = inst.RS; + gpr.FlushLockX(ECX); + gpr.Lock(a, b, s); + gpr.LoadToX64(a, a == s || a == b || s == b, true); + MOV(32, R(ECX), gpr.R(b)); + XOR(32, R(EAX), R(EAX)); + TEST(32, R(ECX), Imm32(32)); + FixupBranch branch = J_CC(CC_NZ); + MOV(32, R(EAX), gpr.R(s)); + SHL(32, R(EAX), R(ECX)); + SetJumpTarget(branch); + MOV(32, gpr.R(a), R(EAX)); + gpr.UnlockAll(); + gpr.UnlockAllX(); + if (inst.Rc) + { + MOV(32, R(EAX), gpr.R(a)); + CALL((u8*)asm_routines.computeRc); + } +} + +void Jit64::srawx(UGeckoInstruction inst) +{ + // USES_XER + INSTRUCTION_START + JITDISABLE(Integer) + int a = inst.RA; + int b = inst.RB; + int s = inst.RS; + gpr.Lock(a, s); + gpr.FlushLockX(ECX); + gpr.LoadToX64(a, a == s || a == b, true); + MOV(32, R(ECX), gpr.R(b)); + TEST(32, R(ECX), Imm32(32)); + FixupBranch topBitSet = J_CC(CC_NZ); + if (a != s) + MOV(32, gpr.R(a), gpr.R(s)); + MOV(32, R(EAX), Imm32(1)); + SHL(32, R(EAX), R(ECX)); + ADD(32, R(EAX), Imm32(0x7FFFFFFF)); + AND(32, R(EAX), gpr.R(a)); + ADD(32, R(EAX), Imm32(-1)); + CMP(32, R(EAX), Imm32(-1)); + SETcc(CC_L, R(EAX)); + SAR(32, gpr.R(a), R(ECX)); + AND(32, M(&PowerPC::ppcState.spr[SPR_XER]), Imm32(~(1 << 29))); + SHL(32, R(EAX), Imm8(29)); + OR(32, M(&PowerPC::ppcState.spr[SPR_XER]), R(EAX)); + FixupBranch end = J(); + SetJumpTarget(topBitSet); + MOV(32, R(EAX), gpr.R(s)); + SAR(32, R(EAX), Imm8(31)); + MOV(32, gpr.R(a), R(EAX)); + AND(32, M(&PowerPC::ppcState.spr[SPR_XER]), Imm32(~(1 << 29))); + AND(32, R(EAX), Imm32(1<<29)); + OR(32, M(&PowerPC::ppcState.spr[SPR_XER]), R(EAX)); + SetJumpTarget(end); + gpr.UnlockAll(); + gpr.UnlockAllX(); + + if (inst.Rc) { + MOV(32, R(EAX), gpr.R(a)); + CALL((u8*)asm_routines.computeRc); + } +} + +void Jit64::srawix(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(Integer) + int a = inst.RA; + int s = inst.RS; + int amount = inst.SH; + if (amount != 0) + { + gpr.Lock(a, s); + gpr.LoadToX64(a, a == s, true); + MOV(32, R(EAX), gpr.R(s)); + MOV(32, gpr.R(a), R(EAX)); + SAR(32, gpr.R(a), Imm8(amount)); + CMP(32, R(EAX), Imm8(0)); + FixupBranch nocarry1 = J_CC(CC_GE); + TEST(32, R(EAX), Imm32((u32)0xFFFFFFFF >> (32 - amount))); // were any 1s shifted out? + FixupBranch nocarry2 = J_CC(CC_Z); + JitSetCA(); + FixupBranch carry = J(false); + SetJumpTarget(nocarry1); + SetJumpTarget(nocarry2); + JitClearCA(); + SetJumpTarget(carry); + gpr.UnlockAll(); + } + else + { + Default(inst); return; + gpr.Lock(a, s); + JitClearCA(); + gpr.LoadToX64(a, a == s, true); + if (a != s) + MOV(32, gpr.R(a), gpr.R(s)); + gpr.UnlockAll(); + } + + if (inst.Rc) { + MOV(32, R(EAX), gpr.R(a)); + CALL((u8*)asm_routines.computeRc); + } +} + +// count leading zeroes +void Jit64::cntlzwx(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(Integer) + int a = inst.RA; + int s = inst.RS; + if (gpr.R(a).IsImm() || gpr.R(s).IsImm() || s == a) + { + Default(inst); + return; + } + gpr.Lock(a, s); + gpr.LoadToX64(a, false); + BSR(32, gpr.R(a).GetSimpleReg(), gpr.R(s)); + FixupBranch gotone = J_CC(CC_NZ); + MOV(32, gpr.R(a), Imm32(63)); + SetJumpTarget(gotone); + XOR(32, gpr.R(a), Imm8(0x1f)); // flip order + gpr.UnlockAll(); + + if (inst.Rc) + { + MOV(32, R(EAX), gpr.R(a)); + CALL((u8*)asm_routines.computeRc); + // TODO: Check PPC manual too + } +} diff --git a/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStore.cpp b/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStore.cpp index a76dc74eb3..b94b84b815 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStore.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStore.cpp @@ -33,441 +33,434 @@ #include "JitAsm.h" #include "JitRegCache.h" - void Jit64::lbzx(UGeckoInstruction inst) - { - if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITLoadStoreOff - || Core::g_CoreStartupParameter.bJITLoadStorelbzxOff) - {Default(inst); return;} // turn off from debugger - INSTRUCTION_START; +void Jit64::lbzx(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(LoadStore) + if (Core::g_CoreStartupParameter.bJITLoadStorelbzxOff) + Default(inst); return; - int a = inst.RA, b = inst.RB, d = inst.RD; - gpr.Lock(a, b, d); - gpr.FlushLockX(ABI_PARAM1); - if (b == d || a == d) - gpr.LoadToX64(d, true, true); - else - gpr.LoadToX64(d, false, true); - MOV(32, R(ABI_PARAM1), gpr.R(b)); - if (a) - ADD(32, R(ABI_PARAM1), gpr.R(a)); + int a = inst.RA, b = inst.RB, d = inst.RD; + gpr.Lock(a, b, d); + gpr.FlushLockX(ABI_PARAM1); + if (b == d || a == d) + gpr.LoadToX64(d, true, true); + else + gpr.LoadToX64(d, false, true); + MOV(32, R(ABI_PARAM1), gpr.R(b)); + if (a) + ADD(32, R(ABI_PARAM1), gpr.R(a)); #if 0 - SafeLoadRegToEAX(ABI_PARAM1, 8, 0); - MOV(32, gpr.R(d), R(EAX)); + SafeLoadRegToEAX(ABI_PARAM1, 8, 0); + MOV(32, gpr.R(d), R(EAX)); #else - UnsafeLoadRegToReg(ABI_PARAM1, gpr.RX(d), 8, 0, false); + UnsafeLoadRegToReg(ABI_PARAM1, gpr.RX(d), 8, 0, false); #endif - gpr.UnlockAll(); - gpr.UnlockAllX(); - } + gpr.UnlockAll(); + gpr.UnlockAllX(); +} - void Jit64::lwzx(UGeckoInstruction inst) - { - if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITLoadStoreOff) - {Default(inst); return;} // turn off from debugger - INSTRUCTION_START; +void Jit64::lwzx(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(LoadStore) - int a = inst.RA, b = inst.RB, d = inst.RD; - gpr.Lock(a, b, d); - gpr.FlushLockX(ABI_PARAM1); - if (b == d || a == d) - gpr.LoadToX64(d, true, true); - else - gpr.LoadToX64(d, false, true); - MOV(32, R(ABI_PARAM1), gpr.R(b)); - if (a) - ADD(32, R(ABI_PARAM1), gpr.R(a)); + int a = inst.RA, b = inst.RB, d = inst.RD; + gpr.Lock(a, b, d); + gpr.FlushLockX(ABI_PARAM1); + if (b == d || a == d) + gpr.LoadToX64(d, true, true); + else + gpr.LoadToX64(d, false, true); + MOV(32, R(ABI_PARAM1), gpr.R(b)); + if (a) + ADD(32, R(ABI_PARAM1), gpr.R(a)); #if 1 - SafeLoadRegToEAX(ABI_PARAM1, 32, 0); - MOV(32, gpr.R(d), R(EAX)); + SafeLoadRegToEAX(ABI_PARAM1, 32, 0); + MOV(32, gpr.R(d), R(EAX)); #else - UnsafeLoadRegToReg(ABI_PARAM1, gpr.RX(d), 32, 0, false); + UnsafeLoadRegToReg(ABI_PARAM1, gpr.RX(d), 32, 0, false); #endif - gpr.UnlockAll(); - gpr.UnlockAllX(); - } + gpr.UnlockAll(); + gpr.UnlockAllX(); +} - void Jit64::lhax(UGeckoInstruction inst) +void Jit64::lhax(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(LoadStore) + + int a = inst.RA, b = inst.RB, d = inst.RD; + gpr.Lock(a, b, d); + gpr.FlushLockX(ABI_PARAM1); + if (b == d || a == d) + gpr.LoadToX64(d, true, true); + else + gpr.LoadToX64(d, false, true); + MOV(32, R(ABI_PARAM1), gpr.R(b)); + if (a) + ADD(32, R(ABI_PARAM1), gpr.R(a)); + + // Some homebrew actually loads from a hw reg with this instruction + SafeLoadRegToEAX(ABI_PARAM1, 16, 0, true); + MOV(32, gpr.R(d), R(EAX)); + + gpr.UnlockAll(); + gpr.UnlockAllX(); +} + +void Jit64::lXz(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(LoadStore) + if (Core::g_CoreStartupParameter.bJITLoadStorelXzOff) + Default(inst); return; + + int d = inst.RD; + int a = inst.RA; + + // TODO(ector): Make it dynamically enable/disable idle skipping where appropriate + // Will give nice boost to dual core mode + // (mb2): I agree, + // IMHO those Idles should always be skipped and replaced by a more controllable "native" Idle methode + // ... maybe the throttle one already do that :p + // if (CommandProcessor::AllowIdleSkipping() && PixelEngine::AllowIdleSkipping()) + if (Core::GetStartupParameter().bSkipIdle && + inst.OPCD == 32 && + (inst.hex & 0xFFFF0000) == 0x800D0000 && + (Memory::ReadUnchecked_U32(js.compilerPC + 4) == 0x28000000 || + (Core::GetStartupParameter().bWii && Memory::ReadUnchecked_U32(js.compilerPC + 4) == 0x2C000000)) && + Memory::ReadUnchecked_U32(js.compilerPC + 8) == 0x4182fff8) { - if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITLoadStoreOff) - {Default(inst); return;} // turn off from debugger - INSTRUCTION_START; + // TODO(LinesPrower): + // - Rewrite this! + // It seems to be ugly and unefficient, but I don't know JIT stuff enough to make it right + // It only demonstrates the idea - int a = inst.RA, b = inst.RB, d = inst.RD; - gpr.Lock(a, b, d); + // do our job at first + s32 offset = (s32)(s16)inst.SIMM_16; gpr.FlushLockX(ABI_PARAM1); - if (b == d || a == d) - gpr.LoadToX64(d, true, true); - else - gpr.LoadToX64(d, false, true); - MOV(32, R(ABI_PARAM1), gpr.R(b)); - if (a) - ADD(32, R(ABI_PARAM1), gpr.R(a)); - - // Some homebrew actually loads from a hw reg with this instruction - SafeLoadRegToEAX(ABI_PARAM1, 16, 0, true); + gpr.Lock(d, a); + MOV(32, R(ABI_PARAM1), gpr.R(a)); + SafeLoadRegToEAX(ABI_PARAM1, 32, offset); + gpr.LoadToX64(d, false, true); MOV(32, gpr.R(d), R(EAX)); - gpr.UnlockAll(); gpr.UnlockAllX(); + + gpr.Flush(FLUSH_ALL); + + // if it's still 0, we can wait until the next event + CMP(32, R(RAX), Imm32(0)); + FixupBranch noIdle = J_CC(CC_NE); + + gpr.Flush(FLUSH_ALL); + fpr.Flush(FLUSH_ALL); + ABI_CallFunctionC((void *)&PowerPC::OnIdle, PowerPC::ppcState.gpr[a] + (s32)(s16)inst.SIMM_16); + + // ! we must continue executing of the loop after exception handling, maybe there is still 0 in r0 + //MOV(32, M(&PowerPC::ppcState.pc), Imm32(js.compilerPC)); + JMP(asm_routines.testExceptions, true); + + SetJumpTarget(noIdle); + + //js.compilerPC += 8; + return; } - void Jit64::lXz(UGeckoInstruction inst) + // R2 always points to the small read-only data area. We could bake R2-relative loads into immediates. + // R13 always points to the small read/write data area. Not so exciting but at least could drop checks in 32-bit safe mode. + + s32 offset = (s32)(s16)inst.SIMM_16; + if (!a) { - if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITLoadStoreOff - || Core::g_CoreStartupParameter.bJITLoadStorelXzOff) - {Default(inst); return;} // turn off from debugger - INSTRUCTION_START; - - int d = inst.RD; - int a = inst.RA; + Default(inst); + return; + } + int accessSize; + switch (inst.OPCD) + { + case 32: + accessSize = 32; + if (Core::g_CoreStartupParameter.bJITLoadStorelwzOff) {Default(inst); return;} + break; //lwz + case 40: accessSize = 16; break; //lhz + case 34: accessSize = 8; break; //lbz + default: + //_assert_msg_(DYNA_REC, 0, "lXz: invalid access size"); + PanicAlert("lXz: invalid access size"); + return; + } - // TODO(ector): Make it dynamically enable/disable idle skipping where appropriate - // Will give nice boost to dual core mode - // (mb2): I agree, - // IMHO those Idles should always be skipped and replaced by a more controllable "native" Idle methode - // ... maybe the throttle one already do that :p - // if (CommandProcessor::AllowIdleSkipping() && PixelEngine::AllowIdleSkipping()) - if (Core::GetStartupParameter().bSkipIdle && - inst.OPCD == 32 && - (inst.hex & 0xFFFF0000) == 0x800D0000 && - (Memory::ReadUnchecked_U32(js.compilerPC + 4) == 0x28000000 || - (Core::GetStartupParameter().bWii && Memory::ReadUnchecked_U32(js.compilerPC + 4) == 0x2C000000)) && - Memory::ReadUnchecked_U32(js.compilerPC + 8) == 0x4182fff8) - { - // TODO(LinesPrower): - // - Rewrite this! - // It seems to be ugly and unefficient, but I don't know JIT stuff enough to make it right - // It only demonstrates the idea - - // do our job at first - s32 offset = (s32)(s16)inst.SIMM_16; - gpr.FlushLockX(ABI_PARAM1); - gpr.Lock(d, a); - MOV(32, R(ABI_PARAM1), gpr.R(a)); - SafeLoadRegToEAX(ABI_PARAM1, 32, offset); - gpr.LoadToX64(d, false, true); - MOV(32, gpr.R(d), R(EAX)); - gpr.UnlockAll(); - gpr.UnlockAllX(); - - gpr.Flush(FLUSH_ALL); - - // if it's still 0, we can wait until the next event - CMP(32, R(RAX), Imm32(0)); - FixupBranch noIdle = J_CC(CC_NE); - - gpr.Flush(FLUSH_ALL); - fpr.Flush(FLUSH_ALL); - ABI_CallFunctionC((void *)&PowerPC::OnIdle, PowerPC::ppcState.gpr[a] + (s32)(s16)inst.SIMM_16); - - // ! we must continue executing of the loop after exception handling, maybe there is still 0 in r0 - //MOV(32, M(&PowerPC::ppcState.pc), Imm32(js.compilerPC)); - JMP(asm_routines.testExceptions, true); - - SetJumpTarget(noIdle); - - //js.compilerPC += 8; - return; - } - - // R2 always points to the small read-only data area. We could bake R2-relative loads into immediates. - // R13 always points to the small read/write data area. Not so exciting but at least could drop checks in 32-bit safe mode. - - s32 offset = (s32)(s16)inst.SIMM_16; - if (!a) - { - Default(inst); - return; - } - int accessSize; - switch (inst.OPCD) - { - case 32: - accessSize = 32; - if (Core::g_CoreStartupParameter.bJITLoadStorelwzOff) {Default(inst); return;} - break; //lwz - case 40: accessSize = 16; break; //lhz - case 34: accessSize = 8; break; //lbz - default: - //_assert_msg_(DYNA_REC, 0, "lXz: invalid access size"); - PanicAlert("lXz: invalid access size"); - return; - } - - //Still here? Do regular path. + //Still here? Do regular path. #if defined(_M_X64) - if (accessSize == 8 || accessSize == 16 || !jo.enableFastMem) { + if (accessSize == 8 || accessSize == 16 || !jo.enableFastMem) { #else - if (true) { + if (true) { #endif - // Safe and boring - gpr.FlushLockX(ABI_PARAM1); - gpr.Lock(d, a); - MOV(32, R(ABI_PARAM1), gpr.R(a)); - SafeLoadRegToEAX(ABI_PARAM1, accessSize, offset); - gpr.LoadToX64(d, false, true); - MOV(32, gpr.R(d), R(EAX)); - gpr.UnlockAll(); - gpr.UnlockAllX(); - return; - } - - // Fast and daring - gpr.Lock(a, d); - gpr.LoadToX64(a, true, false); - gpr.LoadToX64(d, a == d, true); - MOV(accessSize, gpr.R(d), MComplex(RBX, gpr.R(a).GetSimpleReg(), SCALE_1, offset)); - switch (accessSize) { - case 32: - BSWAP(32, gpr.R(d).GetSimpleReg()); - break; -// Careful in the backpatch - need to properly nop over first -// case 16: -// BSWAP(32, gpr.R(d).GetSimpleReg()); -// SHR(32, gpr.R(d), Imm8(16)); -// break; - } - gpr.UnlockAll(); - } - - void Jit64::lha(UGeckoInstruction inst) - { - if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITLoadStoreOff) - {Default(inst); return;} // turn off from debugger - INSTRUCTION_START; - - int d = inst.RD; - int a = inst.RA; - s32 offset = (s32)(s16)inst.SIMM_16; // Safe and boring gpr.FlushLockX(ABI_PARAM1); gpr.Lock(d, a); MOV(32, R(ABI_PARAM1), gpr.R(a)); - SafeLoadRegToEAX(ABI_PARAM1, 16, offset, true); - gpr.LoadToX64(d, d == a, true); + SafeLoadRegToEAX(ABI_PARAM1, accessSize, offset); + gpr.LoadToX64(d, false, true); MOV(32, gpr.R(d), R(EAX)); gpr.UnlockAll(); gpr.UnlockAllX(); return; } - void Jit64::lwzux(UGeckoInstruction inst) + // Fast and daring + gpr.Lock(a, d); + gpr.LoadToX64(a, true, false); + gpr.LoadToX64(d, a == d, true); + MOV(accessSize, gpr.R(d), MComplex(RBX, gpr.R(a).GetSimpleReg(), SCALE_1, offset)); + switch (accessSize) { + case 32: + BSWAP(32, gpr.R(d).GetSimpleReg()); + break; + // Careful in the backpatch - need to properly nop over first + // case 16: + // BSWAP(32, gpr.R(d).GetSimpleReg()); + // SHR(32, gpr.R(d), Imm8(16)); + // break; + } + gpr.UnlockAll(); +} + +void Jit64::lha(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(LoadStore) + + int d = inst.RD; + int a = inst.RA; + s32 offset = (s32)(s16)inst.SIMM_16; + // Safe and boring + gpr.FlushLockX(ABI_PARAM1); + gpr.Lock(d, a); + MOV(32, R(ABI_PARAM1), gpr.R(a)); + SafeLoadRegToEAX(ABI_PARAM1, 16, offset, true); + gpr.LoadToX64(d, d == a, true); + MOV(32, gpr.R(d), R(EAX)); + gpr.UnlockAll(); + gpr.UnlockAllX(); + return; +} + +void Jit64::lwzux(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(LoadStore) + + int a = inst.RA, b = inst.RB, d = inst.RD; + if (!a || a == d || a == b) { - if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITLoadStoreOff) - {Default(inst); return;} // turn off from debugger - INSTRUCTION_START; - - int a = inst.RA, b = inst.RB, d = inst.RD; - if (!a || a == d || a == b) - { - Default(inst); - return; - } - gpr.Lock(a, b, d); - - gpr.LoadToX64(d, b == d, true); - gpr.LoadToX64(a, true, true); - ADD(32, gpr.R(a), gpr.R(b)); - MOV(32, R(EAX), gpr.R(a)); - SafeLoadRegToEAX(EAX, 32, 0, false); - MOV(32, gpr.R(d), R(EAX)); - - gpr.UnlockAll(); + Default(inst); return; } + gpr.Lock(a, b, d); - // Zero cache line. - void Jit64::dcbz(UGeckoInstruction inst) - { - Default(inst); return; + gpr.LoadToX64(d, b == d, true); + gpr.LoadToX64(a, true, true); + ADD(32, gpr.R(a), gpr.R(b)); + MOV(32, R(EAX), gpr.R(a)); + SafeLoadRegToEAX(EAX, 32, 0, false); + MOV(32, gpr.R(d), R(EAX)); - if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITLoadStoreOff) - {Default(inst); return;} // turn off from debugger - INSTRUCTION_START; + gpr.UnlockAll(); + return; +} - MOV(32, R(EAX), gpr.R(inst.RB)); - if (inst.RA) - ADD(32, R(EAX), gpr.R(inst.RA)); - AND(32, R(EAX), Imm32(~31)); - XORPD(XMM0, R(XMM0)); +// Zero cache line. +void Jit64::dcbz(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(LoadStore) + + Default(inst); return; + + MOV(32, R(EAX), gpr.R(inst.RB)); + if (inst.RA) + ADD(32, R(EAX), gpr.R(inst.RA)); + AND(32, R(EAX), Imm32(~31)); + XORPD(XMM0, R(XMM0)); #ifdef _M_X64 - MOVAPS(MComplex(EBX, EAX, SCALE_1, 0), XMM0); - MOVAPS(MComplex(EBX, EAX, SCALE_1, 16), XMM0); + MOVAPS(MComplex(EBX, EAX, SCALE_1, 0), XMM0); + MOVAPS(MComplex(EBX, EAX, SCALE_1, 16), XMM0); #else - AND(32, R(EAX), Imm32(Memory::MEMVIEW32_MASK)); - MOVAPS(MDisp(EAX, (u32)Memory::base), XMM0); - MOVAPS(MDisp(EAX, (u32)Memory::base + 16), XMM0); + AND(32, R(EAX), Imm32(Memory::MEMVIEW32_MASK)); + MOVAPS(MDisp(EAX, (u32)Memory::base), XMM0); + MOVAPS(MDisp(EAX, (u32)Memory::base + 16), XMM0); #endif - } +} - void Jit64::stX(UGeckoInstruction inst) +void Jit64::stX(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(LoadStore) + + int s = inst.RS; + int a = inst.RA; + + bool update = inst.OPCD & 1; + + s32 offset = (s32)(s16)inst.SIMM_16; + if (a || update) { - if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITLoadStoreOff) - {Default(inst); return;} // turn off from debugger - INSTRUCTION_START; - - int s = inst.RS; - int a = inst.RA; - - bool update = inst.OPCD & 1; - - s32 offset = (s32)(s16)inst.SIMM_16; - if (a || update) + int accessSize; + switch (inst.OPCD & ~1) { - int accessSize; - switch (inst.OPCD & ~1) - { - case 36: accessSize = 32; break; //stw - case 44: accessSize = 16; break; //sth - case 38: accessSize = 8; break; //stb - default: _assert_msg_(DYNA_REC, 0, "AWETKLJASDLKF"); return; - } + case 36: accessSize = 32; break; //stw + case 44: accessSize = 16; break; //sth + case 38: accessSize = 8; break; //stb + default: _assert_msg_(DYNA_REC, 0, "AWETKLJASDLKF"); return; + } - if (gpr.R(a).IsImm()) + if (gpr.R(a).IsImm()) + { + // If we already know the address through constant folding, we can do some + // fun tricks... + u32 addr = (u32)gpr.R(a).offset; + addr += offset; + if ((addr & 0xFFFFF000) == 0xCC008000 && jo.optimizeGatherPipe) { - // If we already know the address through constant folding, we can do some - // fun tricks... - u32 addr = (u32)gpr.R(a).offset; - addr += offset; - if ((addr & 0xFFFFF000) == 0xCC008000 && jo.optimizeGatherPipe) - { - if (offset && update) - gpr.SetImmediate32(a, addr); - gpr.FlushLockX(ABI_PARAM1); - MOV(32, R(ABI_PARAM1), gpr.R(s)); - switch (accessSize) - { + if (offset && update) + gpr.SetImmediate32(a, addr); + gpr.FlushLockX(ABI_PARAM1); + MOV(32, R(ABI_PARAM1), gpr.R(s)); + switch (accessSize) + { // No need to protect these, they don't touch any state // question - should we inline them instead? Pro: Lose a CALL Con: Code bloat - case 8: CALL((void *)asm_routines.fifoDirectWrite8); break; - case 16: CALL((void *)asm_routines.fifoDirectWrite16); break; - case 32: CALL((void *)asm_routines.fifoDirectWrite32); break; - } - js.fifoBytesThisBlock += accessSize >> 3; - gpr.UnlockAllX(); - return; + case 8: CALL((void *)asm_routines.fifoDirectWrite8); break; + case 16: CALL((void *)asm_routines.fifoDirectWrite16); break; + case 32: CALL((void *)asm_routines.fifoDirectWrite32); break; } - else if (Memory::IsRAMAddress(addr) && accessSize == 32) - { - if (offset && update) - gpr.SetImmediate32(a, addr); - MOV(accessSize, R(EAX), gpr.R(s)); - BSWAP(accessSize, EAX); - WriteToConstRamAddress(accessSize, R(EAX), addr); - return; - } - // Other IO not worth the trouble. - } - - // Optimized stack access? - if (accessSize == 32 && !gpr.R(a).IsImm() && a == 1 && js.st.isFirstBlockOfFunction && jo.optimizeStack) - { - gpr.FlushLockX(ABI_PARAM1); - MOV(32, R(ABI_PARAM1), gpr.R(a)); - MOV(32, R(EAX), gpr.R(s)); - BSWAP(32, EAX); -#ifdef _M_X64 - MOV(accessSize, MComplex(RBX, ABI_PARAM1, SCALE_1, (u32)offset), R(EAX)); -#elif _M_IX86 - AND(32, R(ABI_PARAM1), Imm32(Memory::MEMVIEW32_MASK)); - MOV(accessSize, MDisp(ABI_PARAM1, (u32)Memory::base + (u32)offset), R(EAX)); -#endif - if (update) - ADD(32, gpr.R(a), Imm32(offset)); + js.fifoBytesThisBlock += accessSize >> 3; gpr.UnlockAllX(); return; } - - /* // TODO - figure out why Beyond Good and Evil hates this -#ifdef _M_X64 - if (accessSize == 32 && !update && jo.enableFastMem) + else if (Memory::IsRAMAddress(addr) && accessSize == 32) { - // Fast and daring - requires 64-bit - MOV(32, R(EAX), gpr.R(s)); - gpr.LoadToX64(a, true, false); - BSWAP(32, EAX); - MOV(accessSize, MComplex(RBX, gpr.RX(a), SCALE_1, (u32)offset), R(EAX)); + if (offset && update) + gpr.SetImmediate32(a, addr); + MOV(accessSize, R(EAX), gpr.R(s)); + BSWAP(accessSize, EAX); + WriteToConstRamAddress(accessSize, R(EAX), addr); return; } -#endif*/ + // Other IO not worth the trouble. + } - //Still here? Do regular path. - gpr.Lock(s, a); - gpr.FlushLockX(ECX, EDX); - MOV(32, R(EDX), gpr.R(a)); - MOV(32, R(ECX), gpr.R(s)); - if (offset) - ADD(32, R(EDX), Imm32((u32)offset)); - if (update && offset) - { - gpr.LoadToX64(a, true, true); - MOV(32, gpr.R(a), R(EDX)); - } - TEST(32, R(EDX), Imm32(0x0C000000)); - FixupBranch unsafe_addr = J_CC(CC_NZ); - BSWAP(accessSize, ECX); -#ifdef _M_X64 - MOV(accessSize, MComplex(RBX, EDX, SCALE_1, 0), R(ECX)); -#else - AND(32, R(EDX), Imm32(Memory::MEMVIEW32_MASK)); - MOV(accessSize, MDisp(EDX, (u32)Memory::base), R(ECX)); + // Optimized stack access? + if (accessSize == 32 && !gpr.R(a).IsImm() && a == 1 && js.st.isFirstBlockOfFunction && jo.optimizeStack) + { + gpr.FlushLockX(ABI_PARAM1); + MOV(32, R(ABI_PARAM1), gpr.R(a)); + MOV(32, R(EAX), gpr.R(s)); + BSWAP(32, EAX); +#ifdef _M_X64 + MOV(accessSize, MComplex(RBX, ABI_PARAM1, SCALE_1, (u32)offset), R(EAX)); +#elif _M_IX86 + AND(32, R(ABI_PARAM1), Imm32(Memory::MEMVIEW32_MASK)); + MOV(accessSize, MDisp(ABI_PARAM1, (u32)Memory::base + (u32)offset), R(EAX)); #endif - FixupBranch skip_call = J(); - SetJumpTarget(unsafe_addr); - switch (accessSize) - { - case 32: ABI_CallFunctionRR(thunks.ProtectFunction((void *)&Memory::Write_U32, 2), ECX, EDX); break; - case 16: ABI_CallFunctionRR(thunks.ProtectFunction((void *)&Memory::Write_U16, 2), ECX, EDX); break; - case 8: ABI_CallFunctionRR(thunks.ProtectFunction((void *)&Memory::Write_U8, 2), ECX, EDX); break; - } - SetJumpTarget(skip_call); - gpr.UnlockAll(); + if (update) + ADD(32, gpr.R(a), Imm32(offset)); gpr.UnlockAllX(); - } - else - { - Default(inst); - } - } - - void Jit64::stXx(UGeckoInstruction inst) - { - if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITLoadStoreOff) - {Default(inst); return;} // turn off from debugger - INSTRUCTION_START; - - int a = inst.RA, b = inst.RB, s = inst.RS; - if (!a || a == s || a == b) - { - Default(inst); return; } - gpr.Lock(a, b, s); - gpr.FlushLockX(ECX, EDX); - if (inst.SUBOP10 & 32) { - gpr.LoadToX64(a, true, true); - ADD(32, gpr.R(a), gpr.R(b)); - MOV(32, R(EDX), gpr.R(a)); - } else { - MOV(32, R(EDX), gpr.R(a)); - ADD(32, R(EDX), gpr.R(b)); + /* // TODO - figure out why Beyond Good and Evil hates this + #ifdef _M_X64 + if (accessSize == 32 && !update && jo.enableFastMem) + { + // Fast and daring - requires 64-bit + MOV(32, R(EAX), gpr.R(s)); + gpr.LoadToX64(a, true, false); + BSWAP(32, EAX); + MOV(accessSize, MComplex(RBX, gpr.RX(a), SCALE_1, (u32)offset), R(EAX)); + return; } - unsigned accessSize; - switch (inst.SUBOP10 & ~32) { + #endif*/ + + //Still here? Do regular path. + gpr.Lock(s, a); + gpr.FlushLockX(ECX, EDX); + MOV(32, R(EDX), gpr.R(a)); + MOV(32, R(ECX), gpr.R(s)); + if (offset) + ADD(32, R(EDX), Imm32((u32)offset)); + if (update && offset) + { + gpr.LoadToX64(a, true, true); + MOV(32, gpr.R(a), R(EDX)); + } + TEST(32, R(EDX), Imm32(0x0C000000)); + FixupBranch unsafe_addr = J_CC(CC_NZ); + BSWAP(accessSize, ECX); +#ifdef _M_X64 + MOV(accessSize, MComplex(RBX, EDX, SCALE_1, 0), R(ECX)); +#else + AND(32, R(EDX), Imm32(Memory::MEMVIEW32_MASK)); + MOV(accessSize, MDisp(EDX, (u32)Memory::base), R(ECX)); +#endif + FixupBranch skip_call = J(); + SetJumpTarget(unsafe_addr); + switch (accessSize) + { + case 32: ABI_CallFunctionRR(thunks.ProtectFunction((void *)&Memory::Write_U32, 2), ECX, EDX); break; + case 16: ABI_CallFunctionRR(thunks.ProtectFunction((void *)&Memory::Write_U16, 2), ECX, EDX); break; + case 8: ABI_CallFunctionRR(thunks.ProtectFunction((void *)&Memory::Write_U8, 2), ECX, EDX); break; + } + SetJumpTarget(skip_call); + gpr.UnlockAll(); + gpr.UnlockAllX(); + } + else + { + Default(inst); + } +} + +void Jit64::stXx(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(LoadStore) + + int a = inst.RA, b = inst.RB, s = inst.RS; + if (!a || a == s || a == b) + { + Default(inst); + return; + } + gpr.Lock(a, b, s); + gpr.FlushLockX(ECX, EDX); + + if (inst.SUBOP10 & 32) { + gpr.LoadToX64(a, true, true); + ADD(32, gpr.R(a), gpr.R(b)); + MOV(32, R(EDX), gpr.R(a)); + } else { + MOV(32, R(EDX), gpr.R(a)); + ADD(32, R(EDX), gpr.R(b)); + } + unsigned accessSize; + switch (inst.SUBOP10 & ~32) { case 151: accessSize = 32; break; case 407: accessSize = 16; break; case 215: accessSize = 8; break; - } - - MOV(32, R(ECX), gpr.R(s)); - SafeWriteRegToReg(ECX, EDX, accessSize, 0); - - gpr.UnlockAll(); - gpr.UnlockAllX(); - return; } + MOV(32, R(ECX), gpr.R(s)); + SafeWriteRegToReg(ECX, EDX, accessSize, 0); + + gpr.UnlockAll(); + gpr.UnlockAllX(); + return; +} + // A few games use these heavily in video codecs. void Jit64::lmw(UGeckoInstruction inst) { @@ -512,4 +505,4 @@ void Jit64::icbi(UGeckoInstruction inst) { Default(inst); WriteExit(js.compilerPC + 4, 0); -} \ No newline at end of file +} diff --git a/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStoreFloating.cpp b/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStoreFloating.cpp index dc1a691c00..ae3d091720 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStoreFloating.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStoreFloating.cpp @@ -51,9 +51,8 @@ u32 GC_ALIGNED16(temp32); void Jit64::lfs(UGeckoInstruction inst) { - if (Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITLoadStoreFloatingOff) - {Default(inst); return;} // turn off from debugger - INSTRUCTION_START; + INSTRUCTION_START + JITDISABLE(LoadStoreFloating) int d = inst.RD; int a = inst.RA; @@ -88,9 +87,8 @@ void Jit64::lfs(UGeckoInstruction inst) void Jit64::lfd(UGeckoInstruction inst) { - if (Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITLoadStoreFloatingOff) - {Default(inst); return;} // turn off from debugger - INSTRUCTION_START; + INSTRUCTION_START + JITDISABLE(LoadStoreFloating) int d = inst.RD; int a = inst.RA; @@ -155,10 +153,8 @@ void Jit64::lfd(UGeckoInstruction inst) void Jit64::stfd(UGeckoInstruction inst) { - if (Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITLoadStoreFloatingOff) - {Default(inst); return;} // turn off from debugger - - INSTRUCTION_START; + INSTRUCTION_START + JITDISABLE(LoadStoreFloating) int s = inst.RS; int a = inst.RA; @@ -234,9 +230,8 @@ void Jit64::stfd(UGeckoInstruction inst) void Jit64::stfs(UGeckoInstruction inst) { - if (Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITLoadStoreFloatingOff) - {Default(inst); return;} // turn off from debugger - INSTRUCTION_START; + INSTRUCTION_START + JITDISABLE(LoadStoreFloating) bool update = inst.OPCD & 1; int s = inst.RS; @@ -291,9 +286,8 @@ void Jit64::stfs(UGeckoInstruction inst) void Jit64::stfsx(UGeckoInstruction inst) { - if (Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITLoadStoreFloatingOff) - {Default(inst); return;} // turn off from debugger - INSTRUCTION_START; + INSTRUCTION_START + JITDISABLE(LoadStoreFloating) // We can take a shortcut here - it's not likely that a hardware access would use this instruction. gpr.FlushLockX(ABI_PARAM1); @@ -311,9 +305,8 @@ void Jit64::stfsx(UGeckoInstruction inst) void Jit64::lfsx(UGeckoInstruction inst) { - if (Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITLoadStoreFloatingOff) - {Default(inst); return;} // turn off from debugger - INSTRUCTION_START; + INSTRUCTION_START + JITDISABLE(LoadStoreFloating) fpr.Lock(inst.RS); fpr.LoadToX64(inst.RS, false, true); diff --git a/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStorePaired.cpp b/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStorePaired.cpp index 098831d286..758e547537 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStorePaired.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStorePaired.cpp @@ -91,9 +91,8 @@ const double GC_ALIGNED16(m_dequantizeTableD[]) = // We will have to break block after quantizers are written to. void Jit64::psq_st(UGeckoInstruction inst) { - if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITLoadStorePairedOff) - {Default(inst); return;} // turn off from debugger - INSTRUCTION_START; + INSTRUCTION_START + JITDISABLE(LoadStorePaired) js.block_flags |= BLOCK_USE_GQR0 << inst.I; if (js.blockSetsQuantizers || !Core::GetStartupParameter().bOptimizeQuantizers) @@ -296,9 +295,8 @@ void Jit64::psq_st(UGeckoInstruction inst) void Jit64::psq_l(UGeckoInstruction inst) { - if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITLoadStorePairedOff) - {Default(inst); return;} // turn off from debugger - INSTRUCTION_START; + INSTRUCTION_START + JITDISABLE(LoadStorePaired) js.block_flags |= BLOCK_USE_GQR0 << inst.I; diff --git a/Source/Core/Core/Src/PowerPC/Jit64/Jit_Paired.cpp b/Source/Core/Core/Src/PowerPC/Jit64/Jit_Paired.cpp index 0c2f6fceee..b350010c13 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/Jit_Paired.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64/Jit_Paired.cpp @@ -34,370 +34,361 @@ // cmppd, andpd, andnpd, or // lfsx, ps_merge01 etc - const u64 GC_ALIGNED16(psSignBits[2]) = {0x8000000000000000ULL, 0x8000000000000000ULL}; - const u64 GC_ALIGNED16(psAbsMask[2]) = {0x7FFFFFFFFFFFFFFFULL, 0x7FFFFFFFFFFFFFFFULL}; - const double GC_ALIGNED16(psOneOne[2]) = {1.0, 1.0}; - const double GC_ALIGNED16(psZeroZero[2]) = {0.0, 0.0}; +const u64 GC_ALIGNED16(psSignBits[2]) = {0x8000000000000000ULL, 0x8000000000000000ULL}; +const u64 GC_ALIGNED16(psAbsMask[2]) = {0x7FFFFFFFFFFFFFFFULL, 0x7FFFFFFFFFFFFFFFULL}; +const double GC_ALIGNED16(psOneOne[2]) = {1.0, 1.0}; +const double GC_ALIGNED16(psZeroZero[2]) = {0.0, 0.0}; - void Jit64::ps_mr(UGeckoInstruction inst) +void Jit64::ps_mr(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(Paired) + if (inst.Rc) { + Default(inst); return; + } + int d = inst.FD; + int b = inst.FB; + if (d == b) + return; + fpr.LoadToX64(d, false); + MOVAPD(fpr.RX(d), fpr.R(b)); +} + +void Jit64::ps_sel(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(Paired) + + Default(inst); return; + + if (inst.Rc) { + Default(inst); return; + } + // GRR can't get this to work 100%. Getting artifacts in D.O.N. intro. + int d = inst.FD; + int a = inst.FA; + int b = inst.FB; + int c = inst.FC; + fpr.FlushLockX(XMM7); + fpr.FlushLockX(XMM6); + fpr.Lock(a, b, c, d); + fpr.LoadToX64(a, true, false); + fpr.LoadToX64(d, false, true); + // BLENDPD would have been nice... + MOVAPD(XMM7, fpr.R(a)); + CMPPD(XMM7, M((void*)psZeroZero), 1); //less-than = 111111 + MOVAPD(XMM6, R(XMM7)); + ANDPD(XMM7, fpr.R(d)); + ANDNPD(XMM6, fpr.R(c)); + MOVAPD(fpr.RX(d), R(XMM7)); + ORPD(fpr.RX(d), R(XMM6)); + fpr.UnlockAll(); + fpr.UnlockAllX(); +} + +void Jit64::ps_sign(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(Paired) + if (inst.Rc) { + Default(inst); return; + } + int d = inst.FD; + int b = inst.FB; + + fpr.Lock(d, b); + if (d != b) { - if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITPairedOff) - {Default(inst); return;} // turn off from debugger - INSTRUCTION_START; - if (inst.Rc) { - Default(inst); return; - } - int d = inst.FD; - int b = inst.FB; - if (d == b) - return; fpr.LoadToX64(d, false); MOVAPD(fpr.RX(d), fpr.R(b)); } - - void Jit64::ps_sel(UGeckoInstruction inst) + else { - if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITPairedOff) - {Default(inst); return;} // turn off from debugger - INSTRUCTION_START; + fpr.LoadToX64(d, true); + } + + switch (inst.SUBOP10) + { + case 40: //neg + XORPD(fpr.RX(d), M((void*)&psSignBits)); + break; + case 136: //nabs + ORPD(fpr.RX(d), M((void*)&psSignBits)); + break; + case 264: //abs + ANDPD(fpr.RX(d), M((void*)&psAbsMask)); + break; + } + + fpr.UnlockAll(); +} + +void Jit64::ps_rsqrte(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(Paired) + if (inst.Rc) { + Default(inst); return; + } + int d = inst.FD; + int b = inst.FB; + fpr.Lock(d, b); + SQRTPD(XMM0, fpr.R(b)); + MOVAPD(XMM1, M((void*)&psOneOne)); + DIVPD(XMM1, R(XMM0)); + MOVAPD(fpr.R(d), XMM1); + fpr.UnlockAll(); +} + +//add a, b, c + +//mov a, b +//add a, c +//we need: +/* +psq_l +psq_stu +*/ + +/* +add a,b,a +*/ + +//There's still a little bit more optimization that can be squeezed out of this +void Jit64::tri_op(int d, int a, int b, bool reversible, void (XEmitter::*op)(X64Reg, OpArg)) +{ + fpr.Lock(d, a, b); + + if (d == a) + { + fpr.LoadToX64(d, true); + (this->*op)(fpr.RX(d), fpr.R(b)); + } + else if (d == b && reversible) + { + fpr.LoadToX64(d, true); + (this->*op)(fpr.RX(d), fpr.R(a)); + } + else if (a != d && b != d) + { + //sources different from d, can use rather quick solution + fpr.LoadToX64(d, false); + MOVAPD(fpr.RX(d), fpr.R(a)); + (this->*op)(fpr.RX(d), fpr.R(b)); + } + else if (b != d) + { + fpr.LoadToX64(d, false); + MOVAPD(XMM0, fpr.R(b)); + MOVAPD(fpr.RX(d), fpr.R(a)); + (this->*op)(fpr.RX(d), Gen::R(XMM0)); + } + else //Other combo, must use two temps :( + { + MOVAPD(XMM0, fpr.R(a)); + MOVAPD(XMM1, fpr.R(b)); + fpr.LoadToX64(d, false); + (this->*op)(XMM0, Gen::R(XMM1)); + MOVAPD(fpr.RX(d), Gen::R(XMM0)); + } + ForceSinglePrecisionP(fpr.RX(d)); + fpr.UnlockAll(); +} + +void Jit64::ps_arith(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(Paired) + if (inst.Rc) { + Default(inst); return; + } + switch (inst.SUBOP5) + { + case 18: tri_op(inst.FD, inst.FA, inst.FB, false, &XEmitter::DIVPD); break; //div + case 20: tri_op(inst.FD, inst.FA, inst.FB, false, &XEmitter::SUBPD); break; //sub + case 21: tri_op(inst.FD, inst.FA, inst.FB, true, &XEmitter::ADDPD); break; //add + case 23://sel Default(inst); - return; - - if (inst.Rc) { - Default(inst); return; - } - // GRR can't get this to work 100%. Getting artifacts in D.O.N. intro. - int d = inst.FD; - int a = inst.FA; - int b = inst.FB; - int c = inst.FC; - fpr.FlushLockX(XMM7); - fpr.FlushLockX(XMM6); - fpr.Lock(a, b, c, d); - fpr.LoadToX64(a, true, false); - fpr.LoadToX64(d, false, true); - // BLENDPD would have been nice... - MOVAPD(XMM7, fpr.R(a)); - CMPPD(XMM7, M((void*)psZeroZero), 1); //less-than = 111111 - MOVAPD(XMM6, R(XMM7)); - ANDPD(XMM7, fpr.R(d)); - ANDNPD(XMM6, fpr.R(c)); - MOVAPD(fpr.RX(d), R(XMM7)); - ORPD(fpr.RX(d), R(XMM6)); - fpr.UnlockAll(); - fpr.UnlockAllX(); + break; + case 24://res + Default(inst); + break; + case 25: tri_op(inst.FD, inst.FA, inst.FC, true, &XEmitter::MULPD); break; //mul + default: + _assert_msg_(DYNA_REC, 0, "ps_arith WTF!!!"); } +} - void Jit64::ps_sign(UGeckoInstruction inst) - { - if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITPairedOff) - {Default(inst); return;} // turn off from debugger - INSTRUCTION_START; - if (inst.Rc) { - Default(inst); return; - } - int d = inst.FD; - int b = inst.FB; - - fpr.Lock(d, b); - if (d != b) - { - fpr.LoadToX64(d, false); - MOVAPD(fpr.RX(d), fpr.R(b)); - } - else - { - fpr.LoadToX64(d, true); - } - - switch (inst.SUBOP10) - { - case 40: //neg - XORPD(fpr.RX(d), M((void*)&psSignBits)); - break; - case 136: //nabs - ORPD(fpr.RX(d), M((void*)&psSignBits)); - break; - case 264: //abs - ANDPD(fpr.RX(d), M((void*)&psAbsMask)); - break; - } - - fpr.UnlockAll(); +void Jit64::ps_sum(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(Paired) + if (inst.Rc) { + Default(inst); return; } - - void Jit64::ps_rsqrte(UGeckoInstruction inst) + int d = inst.FD; + int a = inst.FA; + int b = inst.FB; + int c = inst.FC; + fpr.Lock(a,b,c,d); + fpr.LoadToX64(d, d == a || d == b || d == c, true); + switch (inst.SUBOP5) { - if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITPairedOff) - {Default(inst); return;} // turn off from debugger - INSTRUCTION_START; - if (inst.Rc) { - Default(inst); return; - } - int d = inst.FD; - int b = inst.FB; - fpr.Lock(d, b); - SQRTPD(XMM0, fpr.R(b)); - MOVAPD(XMM1, M((void*)&psOneOne)); - DIVPD(XMM1, R(XMM0)); + case 10: + // Do the sum in upper subregisters, merge uppers + MOVDDUP(XMM0, fpr.R(a)); + MOVAPD(XMM1, fpr.R(b)); + ADDPD(XMM0, R(XMM1)); + UNPCKHPD(XMM0, fpr.R(c)); //merge + MOVAPD(fpr.R(d), XMM0); + break; + case 11: + // Do the sum in lower subregisters, merge lowers + MOVAPD(XMM0, fpr.R(a)); + MOVAPD(XMM1, fpr.R(b)); + SHUFPD(XMM1, R(XMM1), 5); // copy higher to lower + ADDPD(XMM0, R(XMM1)); // sum lowers + MOVAPD(XMM1, fpr.R(c)); + UNPCKLPD(XMM1, R(XMM0)); // merge MOVAPD(fpr.R(d), XMM1); - fpr.UnlockAll(); + break; + default: + PanicAlert("ps_sum WTF!!!"); } + ForceSinglePrecisionP(fpr.RX(d)); + fpr.UnlockAll(); +} - //add a, b, c - - //mov a, b - //add a, c - //we need: - /* - psq_l - psq_stu - */ - - /* - add a,b,a - */ - //There's still a little bit more optimization that can be squeezed out of this - void Jit64::tri_op(int d, int a, int b, bool reversible, void (XEmitter::*op)(X64Reg, OpArg)) +void Jit64::ps_muls(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(Paired) + if (inst.Rc) { + Default(inst); return; + } + int d = inst.FD; + int a = inst.FA; + int c = inst.FC; + fpr.Lock(a, c, d); + fpr.LoadToX64(d, d == a || d == c, true); + switch (inst.SUBOP5) { - fpr.Lock(d, a, b); - - if (d == a) - { - fpr.LoadToX64(d, true); - (this->*op)(fpr.RX(d), fpr.R(b)); - } - else if (d == b && reversible) - { - fpr.LoadToX64(d, true); - (this->*op)(fpr.RX(d), fpr.R(a)); - } - else if (a != d && b != d) - { - //sources different from d, can use rather quick solution - fpr.LoadToX64(d, false); - MOVAPD(fpr.RX(d), fpr.R(a)); - (this->*op)(fpr.RX(d), fpr.R(b)); - } - else if (b != d) - { - fpr.LoadToX64(d, false); - MOVAPD(XMM0, fpr.R(b)); - MOVAPD(fpr.RX(d), fpr.R(a)); - (this->*op)(fpr.RX(d), Gen::R(XMM0)); - } - else //Other combo, must use two temps :( - { - MOVAPD(XMM0, fpr.R(a)); - MOVAPD(XMM1, fpr.R(b)); - fpr.LoadToX64(d, false); - (this->*op)(XMM0, Gen::R(XMM1)); - MOVAPD(fpr.RX(d), Gen::R(XMM0)); - } - ForceSinglePrecisionP(fpr.RX(d)); - fpr.UnlockAll(); - } - - void Jit64::ps_arith(UGeckoInstruction inst) - { - if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITPairedOff) - {Default(inst); return;} // turn off from debugger - INSTRUCTION_START; - if (inst.Rc) { - Default(inst); return; - } - switch (inst.SUBOP5) - { - case 18: tri_op(inst.FD, inst.FA, inst.FB, false, &XEmitter::DIVPD); break; //div - case 20: tri_op(inst.FD, inst.FA, inst.FB, false, &XEmitter::SUBPD); break; //sub - case 21: tri_op(inst.FD, inst.FA, inst.FB, true, &XEmitter::ADDPD); break; //add - case 23://sel - Default(inst); - break; - case 24://res - Default(inst); - break; - case 25: tri_op(inst.FD, inst.FA, inst.FC, true, &XEmitter::MULPD); break; //mul - default: - _assert_msg_(DYNA_REC, 0, "ps_arith WTF!!!"); - } - } - - void Jit64::ps_sum(UGeckoInstruction inst) - { - if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITPairedOff) - {Default(inst); return;} // turn off from debugger - INSTRUCTION_START; - if (inst.Rc) { - Default(inst); return; - } - int d = inst.FD; - int a = inst.FA; - int b = inst.FB; - int c = inst.FC; - fpr.Lock(a,b,c,d); - fpr.LoadToX64(d, d == a || d == b || d == c, true); - switch (inst.SUBOP5) - { - case 10: - // Do the sum in upper subregisters, merge uppers - MOVDDUP(XMM0, fpr.R(a)); - MOVAPD(XMM1, fpr.R(b)); - ADDPD(XMM0, R(XMM1)); - UNPCKHPD(XMM0, fpr.R(c)); //merge - MOVAPD(fpr.R(d), XMM0); - break; - case 11: - // Do the sum in lower subregisters, merge lowers - MOVAPD(XMM0, fpr.R(a)); - MOVAPD(XMM1, fpr.R(b)); - SHUFPD(XMM1, R(XMM1), 5); // copy higher to lower - ADDPD(XMM0, R(XMM1)); // sum lowers - MOVAPD(XMM1, fpr.R(c)); - UNPCKLPD(XMM1, R(XMM0)); // merge - MOVAPD(fpr.R(d), XMM1); - break; - default: - PanicAlert("ps_sum WTF!!!"); - } - ForceSinglePrecisionP(fpr.RX(d)); - fpr.UnlockAll(); - } - - - void Jit64::ps_muls(UGeckoInstruction inst) - { - if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITPairedOff) - {Default(inst); return;} // turn off from debugger - INSTRUCTION_START; - if (inst.Rc) { - Default(inst); return; - } - int d = inst.FD; - int a = inst.FA; - int c = inst.FC; - fpr.Lock(a, c, d); - fpr.LoadToX64(d, d == a || d == c, true); - switch (inst.SUBOP5) - { - case 12: - // Single multiply scalar high - // TODO - faster version for when regs are different - MOVAPD(XMM0, fpr.R(a)); - MOVDDUP(XMM1, fpr.R(c)); - MULPD(XMM0, R(XMM1)); - MOVAPD(fpr.R(d), XMM0); - break; - case 13: - // TODO - faster version for when regs are different - MOVAPD(XMM0, fpr.R(a)); - MOVAPD(XMM1, fpr.R(c)); - SHUFPD(XMM1, R(XMM1), 3); // copy higher to lower - MULPD(XMM0, R(XMM1)); - MOVAPD(fpr.R(d), XMM0); - break; - default: - PanicAlert("ps_muls WTF!!!"); - } - ForceSinglePrecisionP(fpr.RX(d)); - fpr.UnlockAll(); - } - - - //TODO: find easy cases and optimize them, do a breakout like ps_arith - void Jit64::ps_mergeXX(UGeckoInstruction inst) - { - if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITPairedOff) - {Default(inst); return;} // turn off from debugger - INSTRUCTION_START; - if (inst.Rc) { - Default(inst); return; - } - int d = inst.FD; - int a = inst.FA; - int b = inst.FB; - fpr.Lock(a,b,d); - + case 12: + // Single multiply scalar high + // TODO - faster version for when regs are different MOVAPD(XMM0, fpr.R(a)); - switch (inst.SUBOP10) - { - case 528: - UNPCKLPD(XMM0, fpr.R(b)); //unpck is faster than shuf - break; //00 - case 560: - SHUFPD(XMM0, fpr.R(b), 2); //must use shuf here - break; //01 - case 592: - SHUFPD(XMM0, fpr.R(b), 1); - break; //10 - case 624: - UNPCKHPD(XMM0, fpr.R(b)); - break; //11 - default: - _assert_msg_(DYNA_REC, 0, "ps_merge - invalid op"); - } - fpr.LoadToX64(d, false); - MOVAPD(fpr.RX(d), Gen::R(XMM0)); - fpr.UnlockAll(); - } - - - //TODO: add optimized cases - void Jit64::ps_maddXX(UGeckoInstruction inst) - { - if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITPairedOff) - {Default(inst); return;} // turn off from debugger - INSTRUCTION_START; - if (inst.Rc) { - Default(inst); return; - } - int a = inst.FA; - int b = inst.FB; - int c = inst.FC; - int d = inst.FD; - fpr.Lock(a,b,c,d); - + MOVDDUP(XMM1, fpr.R(c)); + MULPD(XMM0, R(XMM1)); + MOVAPD(fpr.R(d), XMM0); + break; + case 13: + // TODO - faster version for when regs are different MOVAPD(XMM0, fpr.R(a)); - switch (inst.SUBOP5) - { - case 14: //madds0 - MOVDDUP(XMM1, fpr.R(c)); - MULPD(XMM0, R(XMM1)); - ADDPD(XMM0, fpr.R(b)); - break; - case 15: //madds1 - MOVAPD(XMM1, fpr.R(c)); - SHUFPD(XMM1, R(XMM1), 3); // copy higher to lower - MULPD(XMM0, R(XMM1)); - ADDPD(XMM0, fpr.R(b)); - break; - case 28: //msub - MULPD(XMM0, fpr.R(c)); - SUBPD(XMM0, fpr.R(b)); - break; - case 29: //madd - MULPD(XMM0, fpr.R(c)); - ADDPD(XMM0, fpr.R(b)); - break; - case 30: //nmsub - MULPD(XMM0, fpr.R(c)); - SUBPD(XMM0, fpr.R(b)); - XORPD(XMM0, M((void*)&psSignBits)); - break; - case 31: //nmadd - MULPD(XMM0, fpr.R(c)); - ADDPD(XMM0, fpr.R(b)); - XORPD(XMM0, M((void*)&psSignBits)); - break; - default: - _assert_msg_(DYNA_REC, 0, "ps_maddXX WTF!!!"); - //Default(inst); - //fpr.UnlockAll(); - return; - } - fpr.LoadToX64(d, false); - MOVAPD(fpr.RX(d), Gen::R(XMM0)); - ForceSinglePrecisionP(fpr.RX(d)); - fpr.UnlockAll(); + MOVAPD(XMM1, fpr.R(c)); + SHUFPD(XMM1, R(XMM1), 3); // copy higher to lower + MULPD(XMM0, R(XMM1)); + MOVAPD(fpr.R(d), XMM0); + break; + default: + PanicAlert("ps_muls WTF!!!"); } + ForceSinglePrecisionP(fpr.RX(d)); + fpr.UnlockAll(); +} + + +//TODO: find easy cases and optimize them, do a breakout like ps_arith +void Jit64::ps_mergeXX(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(Paired) + if (inst.Rc) { + Default(inst); return; + } + int d = inst.FD; + int a = inst.FA; + int b = inst.FB; + fpr.Lock(a,b,d); + + MOVAPD(XMM0, fpr.R(a)); + switch (inst.SUBOP10) + { + case 528: + UNPCKLPD(XMM0, fpr.R(b)); //unpck is faster than shuf + break; //00 + case 560: + SHUFPD(XMM0, fpr.R(b), 2); //must use shuf here + break; //01 + case 592: + SHUFPD(XMM0, fpr.R(b), 1); + break; //10 + case 624: + UNPCKHPD(XMM0, fpr.R(b)); + break; //11 + default: + _assert_msg_(DYNA_REC, 0, "ps_merge - invalid op"); + } + fpr.LoadToX64(d, false); + MOVAPD(fpr.RX(d), Gen::R(XMM0)); + fpr.UnlockAll(); +} + + +//TODO: add optimized cases +void Jit64::ps_maddXX(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(Paired) + if (inst.Rc) { + Default(inst); return; + } + int a = inst.FA; + int b = inst.FB; + int c = inst.FC; + int d = inst.FD; + fpr.Lock(a,b,c,d); + + MOVAPD(XMM0, fpr.R(a)); + switch (inst.SUBOP5) + { + case 14: //madds0 + MOVDDUP(XMM1, fpr.R(c)); + MULPD(XMM0, R(XMM1)); + ADDPD(XMM0, fpr.R(b)); + break; + case 15: //madds1 + MOVAPD(XMM1, fpr.R(c)); + SHUFPD(XMM1, R(XMM1), 3); // copy higher to lower + MULPD(XMM0, R(XMM1)); + ADDPD(XMM0, fpr.R(b)); + break; + case 28: //msub + MULPD(XMM0, fpr.R(c)); + SUBPD(XMM0, fpr.R(b)); + break; + case 29: //madd + MULPD(XMM0, fpr.R(c)); + ADDPD(XMM0, fpr.R(b)); + break; + case 30: //nmsub + MULPD(XMM0, fpr.R(c)); + SUBPD(XMM0, fpr.R(b)); + XORPD(XMM0, M((void*)&psSignBits)); + break; + case 31: //nmadd + MULPD(XMM0, fpr.R(c)); + ADDPD(XMM0, fpr.R(b)); + XORPD(XMM0, M((void*)&psSignBits)); + break; + default: + _assert_msg_(DYNA_REC, 0, "ps_maddXX WTF!!!"); + //Default(inst); + //fpr.UnlockAll(); + return; + } + fpr.LoadToX64(d, false); + MOVAPD(fpr.RX(d), Gen::R(XMM0)); + ForceSinglePrecisionP(fpr.RX(d)); + fpr.UnlockAll(); +} diff --git a/Source/Core/Core/Src/PowerPC/Jit64/Jit_SystemRegisters.cpp b/Source/Core/Core/Src/PowerPC/Jit64/Jit_SystemRegisters.cpp index 87d599d955..8db22dff9e 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/Jit_SystemRegisters.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64/Jit_SystemRegisters.cpp @@ -29,172 +29,165 @@ #include "Jit.h" #include "JitRegCache.h" - void Jit64::mtspr(UGeckoInstruction inst) +void Jit64::mtspr(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(SystemRegisters) + u32 iIndex = (inst.SPRU << 5) | (inst.SPRL & 0x1F); + int d = inst.RD; + + switch (iIndex) { - if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITSystemRegistersOff) - {Default(inst); return;} // turn off from debugger - INSTRUCTION_START; - u32 iIndex = (inst.SPRU << 5) | (inst.SPRL & 0x1F); - int d = inst.RD; + case SPR_LR: + case SPR_CTR: + case SPR_XER: + // These are safe to do the easy way, see the bottom of this function. + break; - switch (iIndex) + case SPR_GQR0: + case SPR_GQR0 + 1: + case SPR_GQR0 + 2: + case SPR_GQR0 + 3: + case SPR_GQR0 + 4: + case SPR_GQR0 + 5: + case SPR_GQR0 + 6: + case SPR_GQR0 + 7: + js.blockSetsQuantizers = true; + // Prevent recompiler from compiling in old quantizer values. + // If the value changed, destroy all blocks using this quantizer + // This will create a little bit of block churn, but hopefully not too bad. { - case SPR_LR: - case SPR_CTR: - case SPR_XER: - // These are safe to do the easy way, see the bottom of this function. - break; - - case SPR_GQR0: - case SPR_GQR0 + 1: - case SPR_GQR0 + 2: - case SPR_GQR0 + 3: - case SPR_GQR0 + 4: - case SPR_GQR0 + 5: - case SPR_GQR0 + 6: - case SPR_GQR0 + 7: - js.blockSetsQuantizers = true; - // Prevent recompiler from compiling in old quantizer values. - // If the value changed, destroy all blocks using this quantizer - // This will create a little bit of block churn, but hopefully not too bad. - { - /* + /* MOV(32, R(EAX), M(&PowerPC::ppcState.spr[iIndex])); // Load old value CMP(32, R(EAX), gpr.R(inst.RD)); FixupBranch skip_destroy = J_CC(CC_E, false); int gqr = iIndex - SPR_GQR0; ABI_CallFunctionC(ProtectFunction(&Jit64::DestroyBlocksWithFlag, 1), (u32)BLOCK_USE_GQR0 << gqr); SetJumpTarget(skip_destroy);*/ - } - break; + } + break; // TODO - break block if quantizers are written to. - default: - Default(inst); - return; - } + default: + Default(inst); + return; + } - // OK, this is easy. + // OK, this is easy. + gpr.Lock(d); + gpr.LoadToX64(d, true); + MOV(32, M(&PowerPC::ppcState.spr[iIndex]), gpr.R(d)); + gpr.UnlockAll(); +} + +void Jit64::mfspr(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(SystemRegisters) + u32 iIndex = (inst.SPRU << 5) | (inst.SPRL & 0x1F); + int d = inst.RD; + switch (iIndex) + { + case SPR_WPAR: + Default(inst); + return; + // case SPR_DEC: + //MessageBox(NULL, "Read from DEC", "????", MB_OK); + //break; + case SPR_TL: + case SPR_TU: + //CALL((void Jit64::*)&CoreTiming::Advance); + // fall through + default: gpr.Lock(d); - gpr.LoadToX64(d, true); - MOV(32, M(&PowerPC::ppcState.spr[iIndex]), gpr.R(d)); + gpr.LoadToX64(d, false); + MOV(32, gpr.R(d), M(&PowerPC::ppcState.spr[iIndex])); gpr.UnlockAll(); + break; } - - void Jit64::mfspr(UGeckoInstruction inst) - { - if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITSystemRegistersOff) - {Default(inst); return;} // turn off from debugger - INSTRUCTION_START; - u32 iIndex = (inst.SPRU << 5) | (inst.SPRL & 0x1F); - int d = inst.RD; - switch (iIndex) - { - case SPR_WPAR: - Default(inst); - return; -// case SPR_DEC: - //MessageBox(NULL, "Read from DEC", "????", MB_OK); - //break; - case SPR_TL: - case SPR_TU: - //CALL((void Jit64::*)&CoreTiming::Advance); - // fall through - default: - gpr.Lock(d); - gpr.LoadToX64(d, false); - MOV(32, gpr.R(d), M(&PowerPC::ppcState.spr[iIndex])); - gpr.UnlockAll(); - break; - } - } +} - // ======================================================================================= - // Don't interpret this, if we do we get thrown out - // -------------- - void Jit64::mtmsr(UGeckoInstruction inst) - { - if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITSystemRegistersOff) - {Default(inst); return;} // turn off from debugger - INSTRUCTION_START; - gpr.LoadToX64(inst.RS, true, false); - MOV(32, M(&MSR), gpr.R(inst.RS)); - gpr.Flush(FLUSH_ALL); - fpr.Flush(FLUSH_ALL); - WriteExit(js.compilerPC + 4, 0); - } - // ============== +// ======================================================================================= +// Don't interpret this, if we do we get thrown out +// -------------- +void Jit64::mtmsr(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(SystemRegisters) + gpr.LoadToX64(inst.RS, true, false); + MOV(32, M(&MSR), gpr.R(inst.RS)); + gpr.Flush(FLUSH_ALL); + fpr.Flush(FLUSH_ALL); + WriteExit(js.compilerPC + 4, 0); +} +// ============== - void Jit64::mfmsr(UGeckoInstruction inst) - { - if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITSystemRegistersOff) - {Default(inst); return;} // turn off from debugger - INSTRUCTION_START; - //Privileged? - gpr.LoadToX64(inst.RD, false); - MOV(32, gpr.R(inst.RD), M(&MSR)); - } +void Jit64::mfmsr(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(SystemRegisters) + //Privileged? + gpr.LoadToX64(inst.RD, false); + MOV(32, gpr.R(inst.RD), M(&MSR)); +} - void Jit64::mftb(UGeckoInstruction inst) - { - if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITSystemRegistersOff) - {Default(inst); return;} // turn off from debugger - INSTRUCTION_START; - mfspr(inst); - } +void Jit64::mftb(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(SystemRegisters) + mfspr(inst); +} - void Jit64::mfcr(UGeckoInstruction inst) - { - if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITSystemRegistersOff) - {Default(inst); return;} // turn off from debugger - INSTRUCTION_START; - // USES_CR - int d = inst.RD; - gpr.LoadToX64(d, false, true); - MOV(8, R(EAX), M(&PowerPC::ppcState.cr_fast[0])); +void Jit64::mfcr(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(SystemRegisters) + // USES_CR + int d = inst.RD; + gpr.LoadToX64(d, false, true); + MOV(8, R(EAX), M(&PowerPC::ppcState.cr_fast[0])); + SHL(32, R(EAX), Imm8(4)); + for (int i = 1; i < 7; i++) { + OR(8, R(EAX), M(&PowerPC::ppcState.cr_fast[i])); SHL(32, R(EAX), Imm8(4)); - for (int i = 1; i < 7; i++) { - OR(8, R(EAX), M(&PowerPC::ppcState.cr_fast[i])); - SHL(32, R(EAX), Imm8(4)); - } - OR(8, R(EAX), M(&PowerPC::ppcState.cr_fast[7])); - MOV(32, gpr.R(d), R(EAX)); } + OR(8, R(EAX), M(&PowerPC::ppcState.cr_fast[7])); + MOV(32, gpr.R(d), R(EAX)); +} - void Jit64::mtcrf(UGeckoInstruction inst) - { - if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITSystemRegistersOff) - {Default(inst); return;} // turn off from debugger - INSTRUCTION_START; +void Jit64::mtcrf(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(SystemRegisters) - // USES_CR - u32 mask = 0; - u32 crm = inst.CRM; - if (crm == 0xFF) { - gpr.FlushLockX(ECX); - MOV(32, R(EAX), gpr.R(inst.RS)); - for (int i = 0; i < 8; i++) { - MOV(32, R(ECX), R(EAX)); - SHR(32, R(ECX), Imm8(28 - (i * 4))); - AND(32, R(ECX), Imm32(0xF)); - MOV(8, M(&PowerPC::ppcState.cr_fast[i]), R(ECX)); - } - gpr.UnlockAllX(); - } else { - Default(inst); - return; - - // TODO: translate this to work in new CR model. - for (int i = 0; i < 8; i++) { - if (crm & (1 << i)) - mask |= 0xF << (i*4); - } - MOV(32, R(EAX), gpr.R(inst.RS)); - MOV(32, R(ECX), M(&PowerPC::ppcState.cr)); - AND(32, R(EAX), Imm32(mask)); - AND(32, R(ECX), Imm32(~mask)); - OR(32, R(EAX), R(ECX)); - MOV(32, M(&PowerPC::ppcState.cr), R(EAX)); + // USES_CR + u32 mask = 0; + u32 crm = inst.CRM; + if (crm == 0xFF) { + gpr.FlushLockX(ECX); + MOV(32, R(EAX), gpr.R(inst.RS)); + for (int i = 0; i < 8; i++) { + MOV(32, R(ECX), R(EAX)); + SHR(32, R(ECX), Imm8(28 - (i * 4))); + AND(32, R(ECX), Imm32(0xF)); + MOV(8, M(&PowerPC::ppcState.cr_fast[i]), R(ECX)); } + gpr.UnlockAllX(); + } else { + Default(inst); + return; + + // TODO: translate this to work in new CR model. + for (int i = 0; i < 8; i++) { + if (crm & (1 << i)) + mask |= 0xF << (i*4); + } + MOV(32, R(EAX), gpr.R(inst.RS)); + MOV(32, R(ECX), M(&PowerPC::ppcState.cr)); + AND(32, R(EAX), Imm32(mask)); + AND(32, R(ECX), Imm32(~mask)); + OR(32, R(EAX), R(ECX)); + MOV(32, M(&PowerPC::ppcState.cr), R(EAX)); } +} diff --git a/Source/Core/Core/Src/PowerPC/Jit64IL/Jit_SystemRegisters.cpp b/Source/Core/Core/Src/PowerPC/Jit64IL/Jit_SystemRegisters.cpp index 8d6a85bf90..95d4c20e7e 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64IL/Jit_SystemRegisters.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64IL/Jit_SystemRegisters.cpp @@ -31,12 +31,12 @@ //#define INSTRUCTION_START Default(inst); return; #define INSTRUCTION_START - void Jit64::mtspr(UGeckoInstruction inst) - { - INSTRUCTION_START +void Jit64::mtspr(UGeckoInstruction inst) +{ + INSTRUCTION_START JITDISABLE(SystemRegisters) u32 iIndex = (inst.SPRU << 5) | (inst.SPRL & 0x1F); - switch(iIndex) { + switch(iIndex) { case SPR_LR: ibuild.EmitStoreLink(ibuild.EmitLoadGReg(inst.RD)); return; @@ -60,121 +60,121 @@ default: Default(inst); return; - } } +} - void Jit64::mfspr(UGeckoInstruction inst) - { - INSTRUCTION_START +void Jit64::mfspr(UGeckoInstruction inst) +{ + INSTRUCTION_START JITDISABLE(SystemRegisters) u32 iIndex = (inst.SPRU << 5) | (inst.SPRL & 0x1F); - switch (iIndex) - { - case SPR_LR: - ibuild.EmitStoreGReg(ibuild.EmitLoadLink(), inst.RD); - return; - case SPR_CTR: - ibuild.EmitStoreGReg(ibuild.EmitLoadCTR(), inst.RD); - return; - case SPR_GQR0: - case SPR_GQR0 + 1: - case SPR_GQR0 + 2: - case SPR_GQR0 + 3: - case SPR_GQR0 + 4: - case SPR_GQR0 + 5: - case SPR_GQR0 + 6: - case SPR_GQR0 + 7: - ibuild.EmitStoreGReg(ibuild.EmitLoadGQR(iIndex - SPR_GQR0), inst.RD); - return; - default: - Default(inst); - return; - } - } - - - // ======================================================================================= - // Don't interpret this, if we do we get thrown out - // -------------- - void Jit64::mtmsr(UGeckoInstruction inst) + switch (iIndex) { - ibuild.EmitStoreMSR(ibuild.EmitLoadGReg(inst.RS)); - ibuild.EmitBranchUncond(ibuild.EmitIntConst(js.compilerPC + 4)); + case SPR_LR: + ibuild.EmitStoreGReg(ibuild.EmitLoadLink(), inst.RD); + return; + case SPR_CTR: + ibuild.EmitStoreGReg(ibuild.EmitLoadCTR(), inst.RD); + return; + case SPR_GQR0: + case SPR_GQR0 + 1: + case SPR_GQR0 + 2: + case SPR_GQR0 + 3: + case SPR_GQR0 + 4: + case SPR_GQR0 + 5: + case SPR_GQR0 + 6: + case SPR_GQR0 + 7: + ibuild.EmitStoreGReg(ibuild.EmitLoadGQR(iIndex - SPR_GQR0), inst.RD); + return; + default: + Default(inst); + return; } - // ============== +} - void Jit64::mfmsr(UGeckoInstruction inst) - { - INSTRUCTION_START +// ======================================================================================= +// Don't interpret this, if we do we get thrown out +// -------------- +void Jit64::mtmsr(UGeckoInstruction inst) +{ + ibuild.EmitStoreMSR(ibuild.EmitLoadGReg(inst.RS)); + ibuild.EmitBranchUncond(ibuild.EmitIntConst(js.compilerPC + 4)); +} +// ============== + + +void Jit64::mfmsr(UGeckoInstruction inst) +{ + INSTRUCTION_START JITDISABLE(SystemRegisters) ibuild.EmitStoreGReg(ibuild.EmitLoadMSR(), inst.RD); - } +} - void Jit64::mftb(UGeckoInstruction inst) - { - INSTRUCTION_START; - JITDISABLE(SystemRegisters) +void Jit64::mftb(UGeckoInstruction inst) +{ + INSTRUCTION_START; + JITDISABLE(SystemRegisters) mfspr(inst); - } +} - void Jit64::mfcr(UGeckoInstruction inst) - { - Default(inst); return; +void Jit64::mfcr(UGeckoInstruction inst) +{ + Default(inst); return; #if 0 - if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITSystemRegistersOff) - {Default(inst); return;} // turn off from debugger - INSTRUCTION_START; - // USES_CR - int d = inst.RD; - gpr.LoadToX64(d, false, true); - MOV(8, R(EAX), M(&PowerPC::ppcState.cr_fast[0])); + if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITSystemRegistersOff) + {Default(inst); return;} // turn off from debugger + INSTRUCTION_START; + // USES_CR + int d = inst.RD; + gpr.LoadToX64(d, false, true); + MOV(8, R(EAX), M(&PowerPC::ppcState.cr_fast[0])); + SHL(32, R(EAX), Imm8(4)); + for (int i = 1; i < 7; i++) { + OR(8, R(EAX), M(&PowerPC::ppcState.cr_fast[i])); SHL(32, R(EAX), Imm8(4)); - for (int i = 1; i < 7; i++) { - OR(8, R(EAX), M(&PowerPC::ppcState.cr_fast[i])); - SHL(32, R(EAX), Imm8(4)); - } - OR(8, R(EAX), M(&PowerPC::ppcState.cr_fast[7])); - MOV(32, gpr.R(d), R(EAX)); -#endif } + OR(8, R(EAX), M(&PowerPC::ppcState.cr_fast[7])); + MOV(32, gpr.R(d), R(EAX)); +#endif +} - void Jit64::mtcrf(UGeckoInstruction inst) - { - Default(inst); return; +void Jit64::mtcrf(UGeckoInstruction inst) +{ + Default(inst); return; #if 0 - if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITSystemRegistersOff) - {Default(inst); return;} // turn off from debugger - INSTRUCTION_START; + if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITSystemRegistersOff) + {Default(inst); return;} // turn off from debugger + INSTRUCTION_START; - // USES_CR - u32 mask = 0; - u32 crm = inst.CRM; - if (crm == 0xFF) { - gpr.FlushLockX(ECX); - MOV(32, R(EAX), gpr.R(inst.RS)); - for (int i = 0; i < 8; i++) { - MOV(32, R(ECX), R(EAX)); - SHR(32, R(ECX), Imm8(28 - (i * 4))); - AND(32, R(ECX), Imm32(0xF)); - MOV(8, M(&PowerPC::ppcState.cr_fast[i]), R(ECX)); - } - gpr.UnlockAllX(); - } else { - Default(inst); - return; - - // TODO: translate this to work in new CR model. - for (int i = 0; i < 8; i++) { - if (crm & (1 << i)) - mask |= 0xF << (i*4); - } - MOV(32, R(EAX), gpr.R(inst.RS)); - MOV(32, R(ECX), M(&PowerPC::ppcState.cr)); - AND(32, R(EAX), Imm32(mask)); - AND(32, R(ECX), Imm32(~mask)); - OR(32, R(EAX), R(ECX)); - MOV(32, M(&PowerPC::ppcState.cr), R(EAX)); + // USES_CR + u32 mask = 0; + u32 crm = inst.CRM; + if (crm == 0xFF) { + gpr.FlushLockX(ECX); + MOV(32, R(EAX), gpr.R(inst.RS)); + for (int i = 0; i < 8; i++) { + MOV(32, R(ECX), R(EAX)); + SHR(32, R(ECX), Imm8(28 - (i * 4))); + AND(32, R(ECX), Imm32(0xF)); + MOV(8, M(&PowerPC::ppcState.cr_fast[i]), R(ECX)); } -#endif + gpr.UnlockAllX(); + } else { + Default(inst); + return; + + // TODO: translate this to work in new CR model. + for (int i = 0; i < 8; i++) { + if (crm & (1 << i)) + mask |= 0xF << (i*4); + } + MOV(32, R(EAX), gpr.R(inst.RS)); + MOV(32, R(ECX), M(&PowerPC::ppcState.cr)); + AND(32, R(EAX), Imm32(mask)); + AND(32, R(ECX), Imm32(~mask)); + OR(32, R(EAX), R(ECX)); + MOV(32, M(&PowerPC::ppcState.cr), R(EAX)); } +#endif +}