diff --git a/Source/Core/Core/Src/DSP/DSPEmitter.cpp b/Source/Core/Core/Src/DSP/DSPEmitter.cpp index caf6f1ed74..8fc2a94923 100644 --- a/Source/Core/Core/Src/DSP/DSPEmitter.cpp +++ b/Source/Core/Core/Src/DSP/DSPEmitter.cpp @@ -87,10 +87,11 @@ void DSPEmitter::checkExceptions(u32 retval) MOV(16, M(&(g_dsp.pc)), Imm16(compilePC)); DSPJitRegCache c(gpr); - SaveDSPRegs(); + gpr.saveRegs(); ABI_CallFunction((void *)&DSPCore_CheckExceptions); MOV(32, R(EAX), Imm32(retval)); JMP(returnDispatcher, true); + gpr.loadRegs(false); gpr.flushRegs(c,false); SetJumpTarget(skipCheck); @@ -107,9 +108,9 @@ void DSPEmitter::Default(UDSPInstruction inst) } // Fall back to interpreter - SaveDSPRegs(); + gpr.pushRegs(); ABI_CallFunctionC16((void*)opTable[inst]->intFunc, inst); - LoadDSPRegs(); + gpr.popRegs(); } void DSPEmitter::EmitInstruction(UDSPInstruction inst) @@ -122,9 +123,9 @@ void DSPEmitter::EmitInstruction(UDSPInstruction inst) if ((inst >> 12) == 0x3) { if (! extOpTable[inst & 0x7F]->jitFunc) { // Fall back to interpreter - SaveDSPRegs(); + gpr.pushRegs(); ABI_CallFunctionC16((void*)extOpTable[inst & 0x7F]->intFunc, inst); - LoadDSPRegs(); + gpr.popRegs(); INFO_LOG(DSPLLE, "Instruction not JITed(ext part): %04x\n", inst); ext_is_jit = false; } else { @@ -134,9 +135,9 @@ void DSPEmitter::EmitInstruction(UDSPInstruction inst) } else { if (!extOpTable[inst & 0xFF]->jitFunc) { // Fall back to interpreter - SaveDSPRegs(); + gpr.pushRegs(); ABI_CallFunctionC16((void*)extOpTable[inst & 0xFF]->intFunc, inst); - LoadDSPRegs(); + gpr.popRegs(); INFO_LOG(DSPLLE, "Instruction not JITed(ext part): %04x\n", inst); ext_is_jit = false; } else { @@ -161,9 +162,9 @@ void DSPEmitter::EmitInstruction(UDSPInstruction inst) if (!ext_is_jit) { //need to call the online cleanup function because //the writeBackLog gets populated at runtime - SaveDSPRegs(); + gpr.pushRegs(); ABI_CallFunction((void*)::applyWriteBackLog); - LoadDSPRegs(); + gpr.popRegs(); } else { popExtValueToReg(); } @@ -189,10 +190,10 @@ void DSPEmitter::Compile(u16 start_addr) return; if (g_dsp.exceptions == 0) - return; + return; */ - LoadDSPRegs(); + gpr.loadRegs(); blockLinkEntry = GetCodePtr(); @@ -240,7 +241,7 @@ void DSPEmitter::Compile(u16 start_addr) // end of each block and in this order DSPJitRegCache c(gpr); HandleLoop(); - SaveDSPRegs(); + gpr.saveRegs(); if (!DSPHost_OnThread() && DSPAnalyzer::code_flags[start_addr] & DSPAnalyzer::CODE_IDLE_SKIP) { MOV(16, R(EAX), Imm16(DSP_IDLE_SKIP_CYCLES)); @@ -250,6 +251,7 @@ void DSPEmitter::Compile(u16 start_addr) MOV(16, R(EAX), Imm16(blockSize[start_addr])); } JMP(returnDispatcher, true); + gpr.loadRegs(false); gpr.flushRegs(c,false); SetJumpTarget(rLoopAddressExit); @@ -273,7 +275,7 @@ void DSPEmitter::Compile(u16 start_addr) DSPJitRegCache c(gpr); //don't update g_dsp.pc -- the branch insn already did - SaveDSPRegs(); + gpr.saveRegs(); if (!DSPHost_OnThread() && DSPAnalyzer::code_flags[start_addr] & DSPAnalyzer::CODE_IDLE_SKIP) { MOV(16, R(EAX), Imm16(DSP_IDLE_SKIP_CYCLES)); @@ -283,6 +285,7 @@ void DSPEmitter::Compile(u16 start_addr) MOV(16, R(EAX), Imm16(blockSize[start_addr])); } JMP(returnDispatcher, true); + gpr.loadRegs(false); gpr.flushRegs(c,false); SetJumpTarget(rNoBranch); @@ -334,7 +337,7 @@ void DSPEmitter::Compile(u16 start_addr) blockSize[start_addr] = 1; } - SaveDSPRegs(); + gpr.saveRegs(); if (!DSPHost_OnThread() && DSPAnalyzer::code_flags[start_addr] & DSPAnalyzer::CODE_IDLE_SKIP) { MOV(16, R(EAX), Imm16(DSP_IDLE_SKIP_CYCLES)); @@ -342,7 +345,7 @@ void DSPEmitter::Compile(u16 start_addr) else { MOV(16, R(EAX), Imm16(blockSize[start_addr])); - } + } JMP(returnDispatcher, true); } diff --git a/Source/Core/Core/Src/DSP/DSPEmitter.h b/Source/Core/Core/Src/DSP/DSPEmitter.h index 874caef5fe..a9b357b0a4 100644 --- a/Source/Core/Core/Src/DSP/DSPEmitter.h +++ b/Source/Core/Core/Src/DSP/DSPEmitter.h @@ -52,8 +52,10 @@ public: // CC Util void Update_SR_Register64(Gen::X64Reg val = Gen::EAX); - void Update_SR_Register64_Carry(Gen::X64Reg val = Gen::EAX); - void Update_SR_Register64_Carry2(Gen::X64Reg val = Gen::EAX); + void Update_SR_Register64_Carry(Gen::X64Reg val, + Gen::X64Reg carry_ovfl); + void Update_SR_Register64_Carry2(Gen::X64Reg val, + Gen::X64Reg carry_ovfl); void Update_SR_Register16(Gen::X64Reg val = Gen::EAX); void Update_SR_Register16_OverS32(Gen::X64Reg val = Gen::EAX); @@ -65,13 +67,13 @@ public: // Memory helper functions void increment_addr_reg(int reg); void decrement_addr_reg(int reg); - void increase_addr_reg(int reg); + void increase_addr_reg(int reg, int ix_reg); void decrease_addr_reg(int reg); - void imem_read(); - void dmem_read(); + void imem_read(Gen::X64Reg address); + void dmem_read(Gen::X64Reg address); void dmem_read_imm(u16 addr); - void dmem_write(); - void dmem_write_imm(u16 addr); + void dmem_write(Gen::X64Reg value); + void dmem_write_imm(u16 addr, Gen::X64Reg value); // Ext command helpers void pushExtValueFromReg(u16 dreg, u16 sreg); @@ -250,6 +252,7 @@ public: // CALL this to start the dispatcher const u8 *enterDispatcher; + const u8 *reenterDispatcher; const u8 *stubEntryPoint; const u8 *returnDispatcher; u16 compilePC; @@ -259,10 +262,6 @@ public: std::list *unresolvedJumps; DSPJitRegCache gpr; - - void LoadDSPRegs(); - void SaveDSPRegs(); - private: DSPCompiledCode *blocks; Block blockLinkEntry; @@ -275,12 +274,8 @@ private: // Counts down. // int cycles; - void Update_SR_Register(Gen::X64Reg val = Gen::EAX); - void ToMask(Gen::X64Reg value_reg = Gen::EDI); - void dsp_increment_one(Gen::X64Reg ar = Gen::EAX, Gen::X64Reg wr = Gen::EDX, Gen::X64Reg wr_pow = Gen::EDI, Gen::X64Reg temp_reg = Gen::ESI); - void dsp_decrement_one(Gen::X64Reg ar = Gen::EAX, Gen::X64Reg wr = Gen::EDX, Gen::X64Reg wr_pow = Gen::EDI, Gen::X64Reg temp_reg = Gen::ESI); void get_long_prod(Gen::X64Reg long_prod = Gen::RAX); void get_long_prod_round_prodl(Gen::X64Reg long_prod = Gen::RAX); void set_long_prod(); diff --git a/Source/Core/Core/Src/DSP/Jit/DSPJitArithmetic.cpp b/Source/Core/Core/Src/DSP/Jit/DSPJitArithmetic.cpp index ff536eb177..c1716d0d41 100644 --- a/Source/Core/Core/Src/DSP/Jit/DSPJitArithmetic.cpp +++ b/Source/Core/Core/Src/DSP/Jit/DSPJitArithmetic.cpp @@ -210,16 +210,19 @@ void DSPEmitter::cmp(const UDSPInstruction opc) #ifdef _M_X64 if (!(DSPAnalyzer::code_flags[compilePC] & DSPAnalyzer::CODE_START_OF_INST) || (DSPAnalyzer::code_flags[compilePC] & DSPAnalyzer::CODE_UPDATE_SR)) { + X64Reg tmp1; + gpr.getFreeXReg(tmp1); // s64 acc0 = dsp_get_long_acc(0); - get_long_acc(0, RCX); - MOV(64, R(RAX), R(RCX)); + get_long_acc(0, tmp1); + MOV(64, R(RAX), R(tmp1)); // s64 acc1 = dsp_get_long_acc(1); get_long_acc(1, RDX); // s64 res = dsp_convert_long_acc(acc0 - acc1); SUB(64, R(RAX), R(RDX)); // Update_SR_Register64(res, isCarry2(acc0, res), isOverflow(acc0, -acc1, res)); // CF -> influence on ABS/0xa100 NEG(64, R(RDX)); - Update_SR_Register64_Carry2(); + Update_SR_Register64_Carry2(EAX, tmp1); + gpr.putXReg(tmp1); } #else Default(opc); @@ -227,7 +230,7 @@ void DSPEmitter::cmp(const UDSPInstruction opc) } // CMPAR $acS axR.h -// 1100 0001 xxxx xxxx +// 110r s001 xxxx xxxx // Compares accumulator $acS with accumulator axR.h. // Not described by Duddie's doc - at least not as a separate instruction. // @@ -240,9 +243,11 @@ void DSPEmitter::cmpar(const UDSPInstruction opc) u8 rreg = ((opc >> 12) & 0x1); u8 sreg = (opc >> 11) & 0x1; + X64Reg tmp1; + gpr.getFreeXReg(tmp1); // s64 sr = dsp_get_long_acc(sreg); - get_long_acc(sreg, RCX); - MOV(64, R(RAX), R(RCX)); + get_long_acc(sreg, tmp1); + MOV(64, R(RAX), R(tmp1)); // s64 rr = (s16)g_dsp.r.axh[rreg]; get_ax_h(rreg, RDX); // rr <<= 16; @@ -251,7 +256,8 @@ void DSPEmitter::cmpar(const UDSPInstruction opc) SUB(64, R(RAX), R(RDX)); // Update_SR_Register64(res, isCarry2(sr, res), isOverflow(sr, -rr, res)); NEG(64, R(RDX)); - Update_SR_Register64_Carry2(); + Update_SR_Register64_Carry2(EAX, tmp1); + gpr.putXReg(tmp1); } #else Default(opc); @@ -271,9 +277,11 @@ void DSPEmitter::cmpi(const UDSPInstruction opc) if (!(DSPAnalyzer::code_flags[compilePC] & DSPAnalyzer::CODE_START_OF_INST) || (DSPAnalyzer::code_flags[compilePC] & DSPAnalyzer::CODE_UPDATE_SR)) { u8 reg = (opc >> 8) & 0x1; + X64Reg tmp1; + gpr.getFreeXReg(tmp1); // s64 val = dsp_get_long_acc(reg); - get_long_acc(reg, RCX); - MOV(64, R(RAX), R(RCX)); + get_long_acc(reg, tmp1); + MOV(64, R(RAX), R(tmp1)); // s64 imm = (s64)(s16)dsp_fetch_code() << 16; // Immediate is considered to be at M level in the 40-bit accumulator. u16 imm = dsp_imem_read(compilePC+1); MOV(64, R(RDX), Imm64((s64)(s16)imm << 16)); @@ -281,7 +289,8 @@ void DSPEmitter::cmpi(const UDSPInstruction opc) SUB(64, R(RAX), R(RDX)); // Update_SR_Register64(res, isCarry2(val, res), isOverflow(val, -imm, res)); NEG(64, R(RDX)); - Update_SR_Register64_Carry2(); + Update_SR_Register64_Carry2(EAX, tmp1); + gpr.putXReg(tmp1); } #else Default(opc); @@ -302,8 +311,10 @@ void DSPEmitter::cmpis(const UDSPInstruction opc) { u8 areg = (opc >> 8) & 0x1; // s64 acc = dsp_get_long_acc(areg); - get_long_acc(areg, RCX); - MOV(64, R(RAX), R(RCX)); + X64Reg tmp1; + gpr.getFreeXReg(tmp1); + get_long_acc(areg, tmp1); + MOV(64, R(RAX), R(tmp1)); // s64 val = (s8)opc; // val <<= 16; MOV(64, R(RDX), Imm64((s64)(s8)opc << 16)); @@ -311,7 +322,8 @@ void DSPEmitter::cmpis(const UDSPInstruction opc) SUB(64, R(RAX), R(RDX)); // Update_SR_Register64(res, isCarry2(acc, res), isOverflow(acc, -val, res)); NEG(64, R(RDX)); - Update_SR_Register64_Carry2(); + Update_SR_Register64_Carry2(EAX, tmp1); + gpr.putXReg(tmp1); } #else Default(opc); @@ -341,7 +353,7 @@ void DSPEmitter::xorr(const UDSPInstruction opc) // Update_SR_Register16((s16)accm, false, false, isOverS32(dsp_get_long_acc(dreg))); if (!(DSPAnalyzer::code_flags[compilePC] & DSPAnalyzer::CODE_START_OF_INST) || (DSPAnalyzer::code_flags[compilePC] & DSPAnalyzer::CODE_UPDATE_SR)) { - get_long_acc(dreg, RSI); + get_long_acc(dreg, RCX); Update_SR_Register16_OverS32(); } #else @@ -370,7 +382,7 @@ void DSPEmitter::andr(const UDSPInstruction opc) // Update_SR_Register16((s16)accm, false, false, isOverS32(dsp_get_long_acc(dreg))); if (!(DSPAnalyzer::code_flags[compilePC] & DSPAnalyzer::CODE_START_OF_INST) || (DSPAnalyzer::code_flags[compilePC] & DSPAnalyzer::CODE_UPDATE_SR)) { - get_long_acc(dreg, RSI); + get_long_acc(dreg, RCX); Update_SR_Register16_OverS32(); } #else @@ -399,7 +411,7 @@ void DSPEmitter::orr(const UDSPInstruction opc) // Update_SR_Register16((s16)accm, false, false, isOverS32(dsp_get_long_acc(dreg))); if (!(DSPAnalyzer::code_flags[compilePC] & DSPAnalyzer::CODE_START_OF_INST) || (DSPAnalyzer::code_flags[compilePC] & DSPAnalyzer::CODE_UPDATE_SR)) { - get_long_acc(dreg, RSI); + get_long_acc(dreg, RCX); Update_SR_Register16_OverS32(); } #else @@ -427,7 +439,7 @@ void DSPEmitter::andc(const UDSPInstruction opc) // Update_SR_Register16((s16)accm, false, false, isOverS32(dsp_get_long_acc(dreg))); if (!(DSPAnalyzer::code_flags[compilePC] & DSPAnalyzer::CODE_START_OF_INST) || (DSPAnalyzer::code_flags[compilePC] & DSPAnalyzer::CODE_UPDATE_SR)) { - get_long_acc(dreg, RSI); + get_long_acc(dreg, RCX); Update_SR_Register16_OverS32(); } #else @@ -455,7 +467,7 @@ void DSPEmitter::orc(const UDSPInstruction opc) // Update_SR_Register16((s16)accm, false, false, isOverS32(dsp_get_long_acc(dreg))); if (!(DSPAnalyzer::code_flags[compilePC] & DSPAnalyzer::CODE_START_OF_INST) || (DSPAnalyzer::code_flags[compilePC] & DSPAnalyzer::CODE_UPDATE_SR)) { - get_long_acc(dreg, RSI); + get_long_acc(dreg, RCX); Update_SR_Register16_OverS32(); } #else @@ -482,7 +494,7 @@ void DSPEmitter::xorc(const UDSPInstruction opc) // Update_SR_Register16((s16)accm, false, false, isOverS32(dsp_get_long_acc(dreg))); if (!(DSPAnalyzer::code_flags[compilePC] & DSPAnalyzer::CODE_START_OF_INST) || (DSPAnalyzer::code_flags[compilePC] & DSPAnalyzer::CODE_UPDATE_SR)) { - get_long_acc(dreg, RSI); + get_long_acc(dreg, RCX); Update_SR_Register16_OverS32(); } #else @@ -508,7 +520,7 @@ void DSPEmitter::notc(const UDSPInstruction opc) // Update_SR_Register16((s16)accm, false, false, isOverS32(dsp_get_long_acc(dreg))); if (!(DSPAnalyzer::code_flags[compilePC] & DSPAnalyzer::CODE_START_OF_INST) || (DSPAnalyzer::code_flags[compilePC] & DSPAnalyzer::CODE_UPDATE_SR)) { - get_long_acc(dreg, RSI); + get_long_acc(dreg, RCX); Update_SR_Register16_OverS32(); } #else @@ -536,7 +548,7 @@ void DSPEmitter::xori(const UDSPInstruction opc) // Update_SR_Register16((s16)g_dsp.r.acm[reg], false, false, isOverS32(dsp_get_long_acc(reg))); if (!(DSPAnalyzer::code_flags[compilePC] & DSPAnalyzer::CODE_START_OF_INST) || (DSPAnalyzer::code_flags[compilePC] & DSPAnalyzer::CODE_UPDATE_SR)) { - get_long_acc(reg, RSI); + get_long_acc(reg, RCX); Update_SR_Register16_OverS32(); } #else @@ -563,7 +575,7 @@ void DSPEmitter::andi(const UDSPInstruction opc) // Update_SR_Register16((s16)g_dsp.r.acm[reg], false, false, isOverS32(dsp_get_long_acc(reg))); if (!(DSPAnalyzer::code_flags[compilePC] & DSPAnalyzer::CODE_START_OF_INST) || (DSPAnalyzer::code_flags[compilePC] & DSPAnalyzer::CODE_UPDATE_SR)) { - get_long_acc(reg, RSI); + get_long_acc(reg, RCX); Update_SR_Register16_OverS32(); } #else @@ -590,7 +602,7 @@ void DSPEmitter::ori(const UDSPInstruction opc) // Update_SR_Register16((s16)g_dsp.r.acm[reg], false, false, isOverS32(dsp_get_long_acc(reg))); if (!(DSPAnalyzer::code_flags[compilePC] & DSPAnalyzer::CODE_START_OF_INST) || (DSPAnalyzer::code_flags[compilePC] & DSPAnalyzer::CODE_UPDATE_SR)) { - get_long_acc(reg, RSI); + get_long_acc(reg, RCX); Update_SR_Register16_OverS32(); } #else @@ -612,8 +624,10 @@ void DSPEmitter::addr(const UDSPInstruction opc) u8 sreg = ((opc >> 9) & 0x3) + DSP_REG_AXL0; // s64 acc = dsp_get_long_acc(dreg); - get_long_acc(dreg, RCX); - MOV(64, R(RAX), R(RCX)); + X64Reg tmp1; + gpr.getFreeXReg(tmp1); + get_long_acc(dreg, tmp1); + MOV(64, R(RAX), R(tmp1)); // s64 ax = (s16)g_dsp.r[sreg]; dsp_op_read_reg(sreg, RDX, SIGN); // ax <<= 16; @@ -624,15 +638,15 @@ void DSPEmitter::addr(const UDSPInstruction opc) // Update_SR_Register64(res, isCarry(acc, res), isOverflow(acc, ax, res)); if (!(DSPAnalyzer::code_flags[compilePC] & DSPAnalyzer::CODE_START_OF_INST) || (DSPAnalyzer::code_flags[compilePC] & DSPAnalyzer::CODE_UPDATE_SR)) { - MOV(64, R(RSI), R(RAX)); - set_long_acc(dreg, RSI); - Update_SR_Register64_Carry(); + MOV(64, R(RCX), R(RAX)); + set_long_acc(dreg, RCX); + Update_SR_Register64_Carry(EAX, tmp1); } else { set_long_acc(dreg, RAX); } - + gpr.putXReg(tmp1); #else Default(opc); #endif @@ -649,9 +663,11 @@ void DSPEmitter::addax(const UDSPInstruction opc) u8 dreg = (opc >> 8) & 0x1; u8 sreg = (opc >> 9) & 0x1; + X64Reg tmp1; + gpr.getFreeXReg(tmp1); // s64 acc = dsp_get_long_acc(dreg); - get_long_acc(dreg, RCX); - MOV(64, R(RAX), R(RCX)); + get_long_acc(dreg, tmp1); + MOV(64, R(RAX), R(tmp1)); // s64 ax = dsp_get_long_acx(sreg); get_long_acx(sreg, RDX); // s64 res = acc + ax; @@ -661,14 +677,15 @@ void DSPEmitter::addax(const UDSPInstruction opc) // Update_SR_Register64(res, isCarry(acc, res), isOverflow(acc, ax, res)); if (!(DSPAnalyzer::code_flags[compilePC] & DSPAnalyzer::CODE_START_OF_INST) || (DSPAnalyzer::code_flags[compilePC] & DSPAnalyzer::CODE_UPDATE_SR)) { - MOV(64, R(RSI), R(RAX)); - set_long_acc(dreg, RSI); - Update_SR_Register64_Carry(); + MOV(64, R(RCX), R(RAX)); + set_long_acc(dreg, RCX); + Update_SR_Register64_Carry(EAX, tmp1); } else { set_long_acc(dreg, RAX); } + gpr.putXReg(tmp1); #else Default(opc); #endif @@ -684,9 +701,11 @@ void DSPEmitter::add(const UDSPInstruction opc) #ifdef _M_X64 u8 dreg = (opc >> 8) & 0x1; + X64Reg tmp1; + gpr.getFreeXReg(tmp1); // s64 acc0 = dsp_get_long_acc(dreg); - get_long_acc(dreg, RCX); - MOV(64, R(RAX), R(RCX)); + get_long_acc(dreg, tmp1); + MOV(64, R(RAX), R(tmp1)); // s64 acc1 = dsp_get_long_acc(1 - dreg); get_long_acc(1 - dreg, RDX); // s64 res = acc0 + acc1; @@ -696,14 +715,15 @@ void DSPEmitter::add(const UDSPInstruction opc) // Update_SR_Register64(res, isCarry(acc0, res), isOverflow(acc0, acc1, res)); if (!(DSPAnalyzer::code_flags[compilePC] & DSPAnalyzer::CODE_START_OF_INST) || (DSPAnalyzer::code_flags[compilePC] & DSPAnalyzer::CODE_UPDATE_SR)) { - MOV(64, R(RSI), R(RAX)); - set_long_acc(dreg, RSI); - Update_SR_Register64_Carry(); + MOV(64, R(RCX), R(RAX)); + set_long_acc(dreg, RCX); + Update_SR_Register64_Carry(EAX, tmp1); } else { set_long_acc(dreg, RAX); } + gpr.putXReg(tmp1); #else Default(opc); #endif @@ -719,9 +739,11 @@ void DSPEmitter::addp(const UDSPInstruction opc) #ifdef _M_X64 u8 dreg = (opc >> 8) & 0x1; + X64Reg tmp1; + gpr.getFreeXReg(tmp1); // s64 acc = dsp_get_long_acc(dreg); - get_long_acc(dreg, RCX); - MOV(64, R(RAX), R(RCX)); + get_long_acc(dreg, tmp1); + MOV(64, R(RAX), R(tmp1)); // s64 prod = dsp_get_long_prod(); get_long_prod(RDX); // s64 res = acc + prod; @@ -731,14 +753,15 @@ void DSPEmitter::addp(const UDSPInstruction opc) // Update_SR_Register64(res, isCarry2(acc, res), isOverflow(acc, prod, res)); if (!(DSPAnalyzer::code_flags[compilePC] & DSPAnalyzer::CODE_START_OF_INST) || (DSPAnalyzer::code_flags[compilePC] & DSPAnalyzer::CODE_UPDATE_SR)) { - MOV(64, R(RSI), R(RAX)); - set_long_acc(dreg, RSI); - Update_SR_Register64_Carry2(); + MOV(64, R(RCX), R(RAX)); + set_long_acc(dreg, RCX); + Update_SR_Register64_Carry2(EAX, tmp1); } else { set_long_acc(dreg, RAX); } + gpr.putXReg(tmp1); #else Default(opc); #endif @@ -756,9 +779,11 @@ void DSPEmitter::addaxl(const UDSPInstruction opc) u8 sreg = (opc >> 9) & 0x1; u8 dreg = (opc >> 8) & 0x1; + X64Reg tmp1; + gpr.getFreeXReg(tmp1); // u64 acc = dsp_get_long_acc(dreg); - get_long_acc(dreg, RCX); - MOV(64, R(RAX), R(RCX)); + get_long_acc(dreg, tmp1); + MOV(64, R(RAX), R(tmp1)); // u16 acx = (u16)dsp_get_ax_l(sreg); get_ax_l(sreg, RDX); // u64 res = acc + acx; @@ -768,14 +793,15 @@ void DSPEmitter::addaxl(const UDSPInstruction opc) // Update_SR_Register64((s64)res, isCarry(acc, res), isOverflow((s64)acc, (s64)acx, (s64)res)); if (!(DSPAnalyzer::code_flags[compilePC] & DSPAnalyzer::CODE_START_OF_INST) || (DSPAnalyzer::code_flags[compilePC] & DSPAnalyzer::CODE_UPDATE_SR)) { - MOV(64, R(RSI), R(RAX)); - set_long_acc(dreg, RSI); - Update_SR_Register64_Carry(); + MOV(64, R(RCX), R(RAX)); + set_long_acc(dreg, RCX); + Update_SR_Register64_Carry(EAX, tmp1); } else { set_long_acc(dreg, RAX); } + gpr.putXReg(tmp1); #else Default(opc); #endif @@ -791,9 +817,11 @@ void DSPEmitter::addi(const UDSPInstruction opc) { #ifdef _M_X64 u8 areg = (opc >> 8) & 0x1; + X64Reg tmp1; + gpr.getFreeXReg(tmp1); // s64 acc = dsp_get_long_acc(areg); - get_long_acc(areg, RCX); - MOV(64, R(RAX), R(RCX)); + get_long_acc(areg, tmp1); + MOV(64, R(RAX), R(tmp1)); // s64 imm = (s16)dsp_fetch_code(); s16 imm = dsp_imem_read(compilePC+1); //imm <<= 16; @@ -807,14 +835,15 @@ void DSPEmitter::addi(const UDSPInstruction opc) // Update_SR_Register64(res, isCarry(acc, res), isOverflow(acc, imm, res)); if (!(DSPAnalyzer::code_flags[compilePC] & DSPAnalyzer::CODE_START_OF_INST) || (DSPAnalyzer::code_flags[compilePC] & DSPAnalyzer::CODE_UPDATE_SR)) { - MOV(64, R(RSI), R(RAX)); - set_long_acc(areg, RSI); - Update_SR_Register64_Carry(); + MOV(64, R(RCX), R(RAX)); + set_long_acc(areg, RCX); + Update_SR_Register64_Carry(EAX, tmp1); } else { set_long_acc(areg, RAX); } + gpr.putXReg(tmp1); #else Default(opc); #endif @@ -830,9 +859,11 @@ void DSPEmitter::addis(const UDSPInstruction opc) #ifdef _M_X64 u8 dreg = (opc >> 8) & 0x1; + X64Reg tmp1; + gpr.getFreeXReg(tmp1); // s64 acc = dsp_get_long_acc(dreg); - get_long_acc(dreg, RCX); - MOV(64, R(RAX), R(RCX)); + get_long_acc(dreg, tmp1); + MOV(64, R(RAX), R(tmp1)); // s64 imm = (s8)(u8)opc; // imm <<= 16; MOV(8, R(RDX), Imm8((u8)opc)); @@ -845,14 +876,15 @@ void DSPEmitter::addis(const UDSPInstruction opc) // Update_SR_Register64(res, isCarry(acc, res), isOverflow(acc, imm, res)); if (!(DSPAnalyzer::code_flags[compilePC] & DSPAnalyzer::CODE_START_OF_INST) || (DSPAnalyzer::code_flags[compilePC] & DSPAnalyzer::CODE_UPDATE_SR)) { - MOV(64, R(RSI), R(RAX)); - set_long_acc(dreg, RSI); - Update_SR_Register64_Carry(); + MOV(64, R(RCX), R(RAX)); + set_long_acc(dreg, RCX); + Update_SR_Register64_Carry(EAX, tmp1); } else { set_long_acc(dreg, RAX); } + gpr.putXReg(tmp1); #else Default(opc); #endif @@ -868,9 +900,11 @@ void DSPEmitter::incm(const UDSPInstruction opc) #ifdef _M_X64 u8 dreg = (opc >> 8) & 0x1; s64 subtract = 0x10000; + X64Reg tmp1; + gpr.getFreeXReg(tmp1); // s64 acc = dsp_get_long_acc(dreg); - get_long_acc(dreg, RCX); - MOV(64, R(RAX), R(RCX)); + get_long_acc(dreg, tmp1); + MOV(64, R(RAX), R(tmp1)); // s64 res = acc + sub; ADD(64, R(RAX), Imm32((u32)subtract)); // dsp_set_long_acc(dreg, res); @@ -879,15 +913,15 @@ void DSPEmitter::incm(const UDSPInstruction opc) if (!(DSPAnalyzer::code_flags[compilePC] & DSPAnalyzer::CODE_START_OF_INST) || (DSPAnalyzer::code_flags[compilePC] & DSPAnalyzer::CODE_UPDATE_SR)) { MOV(64, R(RDX), Imm32((u32)subtract)); - MOV(64, R(RSI), R(RAX)); - set_long_acc(dreg, RSI); - Update_SR_Register64_Carry(); + MOV(64, R(RCX), R(RAX)); + set_long_acc(dreg, RCX); + Update_SR_Register64_Carry(EAX, tmp1); } else { set_long_acc(dreg); - Update_SR_Register64_Carry(); } + gpr.putXReg(tmp1); #else Default(opc); #endif @@ -902,9 +936,11 @@ void DSPEmitter::inc(const UDSPInstruction opc) { #ifdef _M_X64 u8 dreg = (opc >> 8) & 0x1; + X64Reg tmp1; + gpr.getFreeXReg(tmp1); // s64 acc = dsp_get_long_acc(dreg); - get_long_acc(dreg, RCX); - MOV(64, R(RAX), R(RCX)); + get_long_acc(dreg, tmp1); + MOV(64, R(RAX), R(tmp1)); // s64 res = acc + 1; ADD(64, R(RAX), Imm8(1)); // dsp_set_long_acc(dreg, res); @@ -913,15 +949,16 @@ void DSPEmitter::inc(const UDSPInstruction opc) if (!(DSPAnalyzer::code_flags[compilePC] & DSPAnalyzer::CODE_START_OF_INST) || (DSPAnalyzer::code_flags[compilePC] & DSPAnalyzer::CODE_UPDATE_SR)) { MOV(64, R(RDX), Imm64(1)); - MOV(64, R(RSI), R(RAX)); - set_long_acc(dreg, RSI); - Update_SR_Register64_Carry(); + MOV(64, R(RCX), R(RAX)); + set_long_acc(dreg, RCX); + Update_SR_Register64_Carry(EAX, tmp1); } else { set_long_acc(dreg); - Update_SR_Register64_Carry(); + Update_SR_Register64_Carry(EAX, tmp1);//why is this still done? } + gpr.putXReg(tmp1); #else Default(opc); #endif @@ -940,9 +977,11 @@ void DSPEmitter::subr(const UDSPInstruction opc) u8 dreg = (opc >> 8) & 0x1; u8 sreg = ((opc >> 9) & 0x3) + DSP_REG_AXL0; + X64Reg tmp1; + gpr.getFreeXReg(tmp1); // s64 acc = dsp_get_long_acc(dreg); - get_long_acc(dreg, RCX); - MOV(64, R(RAX), R(RCX)); + get_long_acc(dreg, tmp1); + MOV(64, R(RAX), R(tmp1)); // s64 ax = (s16)g_dsp.r[sreg]; dsp_op_read_reg(sreg, RDX, SIGN); // ax <<= 16; @@ -955,14 +994,15 @@ void DSPEmitter::subr(const UDSPInstruction opc) if (!(DSPAnalyzer::code_flags[compilePC] & DSPAnalyzer::CODE_START_OF_INST) || (DSPAnalyzer::code_flags[compilePC] & DSPAnalyzer::CODE_UPDATE_SR)) { NEG(64, R(RDX)); - MOV(64, R(RSI), R(RAX)); - set_long_acc(dreg, RSI); - Update_SR_Register64_Carry2(); + MOV(64, R(RCX), R(RAX)); + set_long_acc(dreg, RCX); + Update_SR_Register64_Carry2(EAX, tmp1); } else { set_long_acc(dreg, RAX); } + gpr.putXReg(tmp1); #else Default(opc); #endif @@ -979,9 +1019,11 @@ void DSPEmitter::subax(const UDSPInstruction opc) u8 dreg = (opc >> 8) & 0x1; u8 sreg = (opc >> 9) & 0x1; + X64Reg tmp1; + gpr.getFreeXReg(tmp1); // s64 acc = dsp_get_long_acc(dreg); - get_long_acc(dreg, RCX); - MOV(64, R(RAX), R(RCX)); + get_long_acc(dreg, tmp1); + MOV(64, R(RAX), R(tmp1)); // s64 acx = dsp_get_long_acx(sreg); get_long_acx(sreg, RDX); // s64 res = acc - acx; @@ -992,14 +1034,15 @@ void DSPEmitter::subax(const UDSPInstruction opc) if (!(DSPAnalyzer::code_flags[compilePC] & DSPAnalyzer::CODE_START_OF_INST) || (DSPAnalyzer::code_flags[compilePC] & DSPAnalyzer::CODE_UPDATE_SR)) { NEG(64, R(RDX)); - MOV(64, R(RSI), R(RAX)); - set_long_acc(dreg, RSI); - Update_SR_Register64_Carry2(); + MOV(64, R(RCX), R(RAX)); + set_long_acc(dreg, RCX); + Update_SR_Register64_Carry2(EAX, tmp1); } else { set_long_acc(dreg, RAX); } + gpr.putXReg(tmp1); #else Default(opc); #endif @@ -1014,9 +1057,11 @@ void DSPEmitter::sub(const UDSPInstruction opc) { #ifdef _M_X64 u8 dreg = (opc >> 8) & 0x1; + X64Reg tmp1; + gpr.getFreeXReg(tmp1); // s64 acc1 = dsp_get_long_acc(dreg); - get_long_acc(dreg, RCX); - MOV(64, R(RAX), R(RCX)); + get_long_acc(dreg, tmp1); + MOV(64, R(RAX), R(tmp1)); // s64 acc2 = dsp_get_long_acc(1 - dreg); get_long_acc(1 - dreg, RDX); // s64 res = acc1 - acc2; @@ -1027,14 +1072,15 @@ void DSPEmitter::sub(const UDSPInstruction opc) if (!(DSPAnalyzer::code_flags[compilePC] & DSPAnalyzer::CODE_START_OF_INST) || (DSPAnalyzer::code_flags[compilePC] & DSPAnalyzer::CODE_UPDATE_SR)) { NEG(64, R(RDX)); - MOV(64, R(RSI), R(RAX)); - set_long_acc(dreg, RSI); - Update_SR_Register64_Carry2(); + MOV(64, R(RCX), R(RAX)); + set_long_acc(dreg, RCX); + Update_SR_Register64_Carry2(EAX, tmp1); } else { set_long_acc(dreg, RAX); } + gpr.putXReg(tmp1); #else Default(opc); #endif @@ -1049,9 +1095,11 @@ void DSPEmitter::subp(const UDSPInstruction opc) { #ifdef _M_X64 u8 dreg = (opc >> 8) & 0x1; + X64Reg tmp1; + gpr.getFreeXReg(tmp1); // s64 acc = dsp_get_long_acc(dreg); - get_long_acc(dreg, RCX); - MOV(64, R(RAX), R(RCX)); + get_long_acc(dreg, tmp1); + MOV(64, R(RAX), R(tmp1)); // s64 prod = dsp_get_long_prod(); get_long_prod(RDX); // s64 res = acc - prod; @@ -1062,14 +1110,15 @@ void DSPEmitter::subp(const UDSPInstruction opc) if (!(DSPAnalyzer::code_flags[compilePC] & DSPAnalyzer::CODE_START_OF_INST) || (DSPAnalyzer::code_flags[compilePC] & DSPAnalyzer::CODE_UPDATE_SR)) { NEG(64, R(RDX)); - MOV(64, R(RSI), R(RAX)); - set_long_acc(dreg, RSI); - Update_SR_Register64_Carry2(); + MOV(64, R(RCX), R(RAX)); + set_long_acc(dreg, RCX); + Update_SR_Register64_Carry2(EAX, tmp1); } else { set_long_acc(dreg, RAX); } + gpr.putXReg(tmp1); #else Default(opc); #endif @@ -1085,9 +1134,11 @@ void DSPEmitter::decm(const UDSPInstruction opc) #ifdef _M_X64 u8 dreg = (opc >> 8) & 0x01; s64 subtract = 0x10000; + X64Reg tmp1; + gpr.getFreeXReg(tmp1); // s64 acc = dsp_get_long_acc(dreg); - get_long_acc(dreg, RCX); - MOV(64, R(RAX), R(RCX)); + get_long_acc(dreg, tmp1); + MOV(64, R(RAX), R(tmp1)); // s64 res = acc - sub; SUB(64, R(RAX), Imm32((u32)subtract)); // dsp_set_long_acc(dreg, res); @@ -1096,14 +1147,15 @@ void DSPEmitter::decm(const UDSPInstruction opc) if (!(DSPAnalyzer::code_flags[compilePC] & DSPAnalyzer::CODE_START_OF_INST) || (DSPAnalyzer::code_flags[compilePC] & DSPAnalyzer::CODE_UPDATE_SR)) { MOV(64, R(RDX), Imm64(-subtract)); - MOV(64, R(RSI), R(RAX)); - set_long_acc(dreg, RSI); - Update_SR_Register64_Carry2(); + MOV(64, R(RCX), R(RAX)); + set_long_acc(dreg, RCX); + Update_SR_Register64_Carry2(EAX, tmp1); } else { set_long_acc(dreg, RAX); } + gpr.putXReg(tmp1); #else Default(opc); #endif @@ -1118,9 +1170,11 @@ void DSPEmitter::dec(const UDSPInstruction opc) { #ifdef _M_X64 u8 dreg = (opc >> 8) & 0x01; + X64Reg tmp1; + gpr.getFreeXReg(tmp1); // s64 acc = dsp_get_long_acc(dreg); - get_long_acc(dreg, RCX); - MOV(64, R(RAX), R(RCX)); + get_long_acc(dreg, tmp1); + MOV(64, R(RAX), R(tmp1)); // s64 res = acc - 1; SUB(64, R(RAX), Imm32(1)); // dsp_set_long_acc(dreg, res); @@ -1129,14 +1183,15 @@ void DSPEmitter::dec(const UDSPInstruction opc) if (!(DSPAnalyzer::code_flags[compilePC] & DSPAnalyzer::CODE_START_OF_INST) || (DSPAnalyzer::code_flags[compilePC] & DSPAnalyzer::CODE_UPDATE_SR)) { MOV(64, R(RDX), Imm64(-1)); - MOV(64, R(RSI), R(RAX)); - set_long_acc(dreg, RSI); - Update_SR_Register64_Carry2(); + MOV(64, R(RCX), R(RAX)); + set_long_acc(dreg, RCX); + Update_SR_Register64_Carry2(EAX, tmp1); } else { set_long_acc(dreg); } + gpr.putXReg(tmp1); #else Default(opc); #endif diff --git a/Source/Core/Core/Src/DSP/Jit/DSPJitBranch.cpp b/Source/Core/Core/Src/DSP/Jit/DSPJitBranch.cpp index 815cff9c1e..6c4d5e0867 100644 --- a/Source/Core/Core/Src/DSP/Jit/DSPJitBranch.cpp +++ b/Source/Core/Core/Src/DSP/Jit/DSPJitBranch.cpp @@ -56,8 +56,8 @@ static void ReJitConditional(const UDSPInstruction opc, DSPEmitter& emitter) emitter.TEST(16, R(EAX), Imm16(1)); //LE: problem in here, half the tests fail - skipCode2 = emitter.J_CC(CC_NE); - //skipCode2 = emitter.J_CC((CCFlags)(CC_NE - (cond & 1))); + skipCode2 = emitter.J_CC(CC_NE, true); + //skipCode2 = emitter.J_CC((CCFlags)(CC_NE - (cond & 1)), true); emitter.dsp_op_read_reg(DSP_REG_SR, RAX); emitter.TEST(16, R(EAX), Imm16(SR_ARITH_ZERO)); break; @@ -94,7 +94,7 @@ static void ReJitConditional(const UDSPInstruction opc, DSPEmitter& emitter) break; //c2 = emitter.gpr; //emitter.TEST(16, R(EAX), Imm16(SR_OVER_S32 | SR_TOP2BITS)); - //skipCode2 = emitter.J_CC((CCFlags)(CC_E + (cond & 1))); + //skipCode2 = emitter.J_CC((CCFlags)(CC_E + (cond & 1)), true); //emitter.TEST(16, R(EAX), Imm16(SR_ARITH_ZERO)); //break; } @@ -107,7 +107,7 @@ static void ReJitConditional(const UDSPInstruction opc, DSPEmitter& emitter) break; } DSPJitRegCache c1(emitter.gpr); - FixupBranch skipCode = cond == 0xe ? emitter.J_CC(CC_E) : emitter.J_CC((CCFlags)(CC_NE - (cond & 1))); + FixupBranch skipCode = cond == 0xe ? emitter.J_CC(CC_E,true) : emitter.J_CC((CCFlags)(CC_NE - (cond & 1)),true); jitCode(opc,emitter); emitter.gpr.flushRegs(c1); emitter.SetJumpTarget(skipCode); @@ -121,7 +121,8 @@ static void ReJitConditional(const UDSPInstruction opc, DSPEmitter& emitter) static void WriteBranchExit(DSPEmitter& emitter) { - emitter.SaveDSPRegs(); + DSPJitRegCache c(emitter.gpr); + emitter.gpr.saveRegs(); if (DSPAnalyzer::code_flags[emitter.startAddr] & DSPAnalyzer::CODE_IDLE_SKIP) { emitter.MOV(16, R(EAX), Imm16(0x1000)); @@ -131,6 +132,8 @@ static void WriteBranchExit(DSPEmitter& emitter) emitter.MOV(16, R(EAX), Imm16(emitter.blockSize[emitter.startAddr])); } emitter.JMP(emitter.returnDispatcher, true); + emitter.gpr.loadRegs(false); + emitter.gpr.flushRegs(c,false); } static void WriteBlockLink(DSPEmitter& emitter, u16 dest) @@ -142,12 +145,12 @@ static void WriteBlockLink(DSPEmitter& emitter, u16 dest) { emitter.gpr.flushRegs(); // Check if we have enough cycles to execute the next block - emitter.MOV(16, R(ESI), M(&cyclesLeft)); - emitter.CMP(16, R(ESI), Imm16(emitter.blockSize[emitter.startAddr] + emitter.blockSize[dest])); + emitter.MOV(16, R(ECX), M(&cyclesLeft)); + emitter.CMP(16, R(ECX), Imm16(emitter.blockSize[emitter.startAddr] + emitter.blockSize[dest])); FixupBranch notEnoughCycles = emitter.J_CC(CC_BE); - emitter.SUB(16, R(ESI), Imm16(emitter.blockSize[emitter.startAddr])); - emitter.MOV(16, M(&cyclesLeft), R(ESI)); + emitter.SUB(16, R(ECX), Imm16(emitter.blockSize[emitter.startAddr])); + emitter.MOV(16, M(&cyclesLeft), R(ECX)); emitter.JMP(emitter.blockLinks[dest], true); emitter.SetJumpTarget(notEnoughCycles); } @@ -339,9 +342,11 @@ void DSPEmitter::HandleLoop() FixupBranch loopUpdated = J(true); SetJumpTarget(loadStack); + DSPJitRegCache c(gpr); dsp_reg_load_stack(0); dsp_reg_load_stack(2); dsp_reg_load_stack(3); + gpr.flushRegs(c); SetJumpTarget(loopUpdated); SetJumpTarget(rLoopAddrG); diff --git a/Source/Core/Core/Src/DSP/Jit/DSPJitCCUtil.cpp b/Source/Core/Core/Src/DSP/Jit/DSPJitCCUtil.cpp index 620c2a2c43..e382b6114a 100644 --- a/Source/Core/Core/Src/DSP/Jit/DSPJitCCUtil.cpp +++ b/Source/Core/Core/Src/DSP/Jit/DSPJitCCUtil.cpp @@ -22,12 +22,12 @@ #include "../DSPIntUtil.h" #include "../DSPEmitter.h" +#include "DSPJitUtil.h" #include "x64Emitter.h" #include "ABI.h" using namespace Gen; // In: RAX: s64 _Value -// In: RCX: 1 = carry, 2 = overflow // Clobbers RDX void DSPEmitter::Update_SR_Register(Gen::X64Reg val) { @@ -72,7 +72,6 @@ void DSPEmitter::Update_SR_Register(Gen::X64Reg val) } // In: RAX: s64 _Value -// In: RCX: 1 = carry, 2 = overflow // Clobbers RDX void DSPEmitter::Update_SR_Register64(Gen::X64Reg val) { @@ -86,18 +85,18 @@ void DSPEmitter::Update_SR_Register64(Gen::X64Reg val) #endif } -// In: RAX: s64 _Value -// In: RCX: 1 = carry, 2 = overflow +// In: (val): s64 _Value +// In: (carry_ovfl): 1 = carry, 2 = overflow // Clobbers RDX -void DSPEmitter::Update_SR_Register64_Carry(Gen::X64Reg val) -{ +void DSPEmitter::Update_SR_Register64_Carry(X64Reg val, X64Reg carry_ovfl) +{ #ifdef _M_X64 OpArg sr_reg; gpr.getReg(DSP_REG_SR,sr_reg); // g_dsp.r[DSP_REG_SR] &= ~SR_CMP_MASK; AND(16, sr_reg, Imm16(~SR_CMP_MASK)); - CMP(64, R(RCX), R(val)); + CMP(64, R(carry_ovfl), R(val)); // 0x01 // g_dsp.r[DSP_REG_SR] |= SR_CARRY; @@ -110,10 +109,10 @@ void DSPEmitter::Update_SR_Register64_Carry(Gen::X64Reg val) // g_dsp.r[DSP_REG_SR] |= SR_OVERFLOW; // g_dsp.r[DSP_REG_SR] |= SR_OVERFLOW_STICKY; // Overflow = ((acc ^ res) & (ax ^ res)) < 0 - XOR(64, R(RCX), R(val)); + XOR(64, R(carry_ovfl), R(val)); XOR(64, R(RDX), R(val)); - AND(64, R(RCX), R(RDX)); - CMP(64, R(RCX), Imm8(0)); + AND(64, R(carry_ovfl), R(RDX)); + CMP(64, R(carry_ovfl), Imm8(0)); FixupBranch noOverflow = J_CC(CC_GE); OR(16, sr_reg, Imm16(SR_OVERFLOW | SR_OVERFLOW_STICKY)); SetJumpTarget(noOverflow); @@ -123,10 +122,10 @@ void DSPEmitter::Update_SR_Register64_Carry(Gen::X64Reg val) #endif } -// In: RAX: s64 _Value -// In: RCX: 1 = carry, 2 = overflow +// In: (val): s64 _Value +// In: (carry_ovfl): 1 = carry, 2 = overflow // Clobbers RDX -void DSPEmitter::Update_SR_Register64_Carry2(Gen::X64Reg val) +void DSPEmitter::Update_SR_Register64_Carry2(X64Reg val, X64Reg carry_ovfl) { #ifdef _M_X64 OpArg sr_reg; @@ -134,7 +133,7 @@ void DSPEmitter::Update_SR_Register64_Carry2(Gen::X64Reg val) // g_dsp.r[DSP_REG_SR] &= ~SR_CMP_MASK; AND(16, sr_reg, Imm16(~SR_CMP_MASK)); - CMP(64, R(RCX), R(val)); + CMP(64, R(carry_ovfl), R(val)); // 0x01 // g_dsp.r[DSP_REG_SR] |= SR_CARRY; @@ -147,10 +146,10 @@ void DSPEmitter::Update_SR_Register64_Carry2(Gen::X64Reg val) // g_dsp.r[DSP_REG_SR] |= SR_OVERFLOW; // g_dsp.r[DSP_REG_SR] |= SR_OVERFLOW_STICKY; // Overflow = ((acc ^ res) & (ax ^ res)) < 0 - XOR(64, R(RCX), R(val)); + XOR(64, R(carry_ovfl), R(val)); XOR(64, R(RDX), R(val)); - AND(64, R(RCX), R(RDX)); - CMP(64, R(RCX), Imm8(0)); + AND(64, R(carry_ovfl), R(RDX)); + CMP(64, R(carry_ovfl), Imm8(0)); FixupBranch noOverflow = J_CC(CC_GE); OR(16, sr_reg, Imm16(SR_OVERFLOW | SR_OVERFLOW_STICKY)); SetJumpTarget(noOverflow); @@ -171,9 +170,8 @@ void DSPEmitter::Update_SR_Register64_Carry2(Gen::X64Reg val) //} // In: RAX: s64 _Value -// In: RCX: 1 = carry, 2 = overflow // Clobbers RDX -void DSPEmitter::Update_SR_Register16(Gen::X64Reg val) +void DSPEmitter::Update_SR_Register16(X64Reg val) { #ifdef _M_X64 OpArg sr_reg; @@ -214,7 +212,6 @@ void DSPEmitter::Update_SR_Register16(Gen::X64Reg val) } // In: RAX: s64 _Value -// In: RCX: 1 = carry, 2 = overflow // Clobbers RDX void DSPEmitter::Update_SR_Register16_OverS32(Gen::X64Reg val) { @@ -225,8 +222,8 @@ void DSPEmitter::Update_SR_Register16_OverS32(Gen::X64Reg val) // // 0x10 // if (_Value != (s32)_Value) g_dsp.r[DSP_REG_SR] |= SR_OVER_S32; - MOVSX(64, 32, RSI, R(val)); - CMP(64, R(RSI), R(val)); + MOVSX(64, 32, RCX, R(val)); + CMP(64, R(RCX), R(val)); FixupBranch noOverS32 = J_CC(CC_E); OR(16, sr_reg, Imm16(SR_OVER_S32)); SetJumpTarget(noOverS32); diff --git a/Source/Core/Core/Src/DSP/Jit/DSPJitExtOps.cpp b/Source/Core/Core/Src/DSP/Jit/DSPJitExtOps.cpp index 938b9534ec..0d2d28deb3 100644 --- a/Source/Core/Core/Src/DSP/Jit/DSPJitExtOps.cpp +++ b/Source/Core/Core/Src/DSP/Jit/DSPJitExtOps.cpp @@ -58,7 +58,7 @@ void DSPEmitter::ir(const UDSPInstruction opc) { void DSPEmitter::nr(const UDSPInstruction opc) { u8 reg = opc & 0x3; - increase_addr_reg(reg); + increase_addr_reg(reg, reg); } // MV $axD.D, $acS.S @@ -81,9 +81,16 @@ void DSPEmitter::s(const UDSPInstruction opc) u8 sreg = ((opc >> 3) & 0x3) + DSP_REG_ACL0; // u16 addr = g_dsp.r[dest]; dsp_op_read_reg(dreg, RAX, ZERO); - dsp_op_read_reg(sreg, RCX, ZERO); + + X64Reg tmp1; + gpr.getFreeXReg(tmp1); + + dsp_op_read_reg(sreg, tmp1, ZERO); // u16 val = g_dsp.r[src]; - dmem_write(); + dmem_write(tmp1); + + gpr.putXReg(tmp1); + increment_addr_reg(dreg); } @@ -96,9 +103,16 @@ void DSPEmitter::sn(const UDSPInstruction opc) u8 dreg = opc & 0x3; u8 sreg = ((opc >> 3) & 0x3) + DSP_REG_ACL0; dsp_op_read_reg(dreg, RAX, ZERO); - dsp_op_read_reg(sreg, RCX, ZERO); - dmem_write(); - increase_addr_reg(dreg); + + X64Reg tmp1; + gpr.getFreeXReg(tmp1); + + dsp_op_read_reg(sreg, tmp1, ZERO); + dmem_write(tmp1); + + gpr.putXReg(tmp1); + + increase_addr_reg(dreg, dreg); } // L $axD.D, @$arS @@ -117,7 +131,7 @@ void DSPEmitter::l(const UDSPInstruction opc) //even if only for one bit, can only //store (up to) two registers in EBX, //so store all of SR - MOV(16, R(EAX), M(&g_dsp.r.sr)); + dsp_op_read_reg(DSP_REG_SR, RAX); SHL(32, R(EAX), Imm8(16)); OR(32, R(EBX), R(EAX)); } @@ -141,12 +155,12 @@ void DSPEmitter::ln(const UDSPInstruction opc) //even if only for one bit, can only //store (up to) two registers in EBX, //so store all of SR - MOV(16, R(EAX), M(&g_dsp.r.sr)); + dsp_op_read_reg(DSP_REG_SR, RAX); SHL(32, R(EAX), Imm8(16)); OR(32, R(EBX), R(EAX)); } - increase_addr_reg(sreg); + increase_addr_reg(sreg, sreg); } // LS $axD.D, $acS.m @@ -159,8 +173,14 @@ void DSPEmitter::ls(const UDSPInstruction opc) u8 sreg = opc & 0x1; u8 dreg = ((opc >> 4) & 0x3) + DSP_REG_AXL0; dsp_op_read_reg(DSP_REG_AR3, RAX, ZERO); - get_acc_m(sreg, ECX, false); - dmem_write(); + + X64Reg tmp1; + gpr.getFreeXReg(tmp1); + + get_acc_m(sreg, tmp1, false); + dmem_write(tmp1); + + gpr.putXReg(tmp1); pushExtValueFromMem(dreg, DSP_REG_AR0); @@ -180,13 +200,19 @@ void DSPEmitter::lsn(const UDSPInstruction opc) u8 sreg = opc & 0x1; u8 dreg = ((opc >> 4) & 0x3) + DSP_REG_AXL0; dsp_op_read_reg(DSP_REG_AR3, RAX, ZERO); - get_acc_m(sreg, ECX, false); - dmem_write(); + + X64Reg tmp1; + gpr.getFreeXReg(tmp1); + + get_acc_m(sreg, tmp1, false); + dmem_write(tmp1); + + gpr.putXReg(tmp1); pushExtValueFromMem(dreg, DSP_REG_AR0); increment_addr_reg(DSP_REG_AR3); - increase_addr_reg(DSP_REG_AR0); + increase_addr_reg(DSP_REG_AR0, DSP_REG_AR0); } // LSM $axD.D, $acS.m @@ -200,12 +226,18 @@ void DSPEmitter::lsm(const UDSPInstruction opc) u8 sreg = opc & 0x1; u8 dreg = ((opc >> 4) & 0x3) + DSP_REG_AXL0; dsp_op_read_reg(DSP_REG_AR3, RAX, ZERO); - get_acc_m(sreg, ECX, false); - dmem_write(); + + X64Reg tmp1; + gpr.getFreeXReg(tmp1); + + get_acc_m(sreg, tmp1, false); + dmem_write(tmp1); + + gpr.putXReg(tmp1); pushExtValueFromMem(dreg, DSP_REG_AR0); - increase_addr_reg(DSP_REG_AR3); + increase_addr_reg(DSP_REG_AR3, DSP_REG_AR3); increment_addr_reg(DSP_REG_AR0); } @@ -221,13 +253,19 @@ void DSPEmitter::lsnm(const UDSPInstruction opc) u8 sreg = opc & 0x1; u8 dreg = ((opc >> 4) & 0x3) + DSP_REG_AXL0; dsp_op_read_reg(DSP_REG_AR3, RAX, ZERO); - get_acc_m(sreg, ECX, false); - dmem_write(); + + X64Reg tmp1; + gpr.getFreeXReg(tmp1); + + get_acc_m(sreg, tmp1, false); + dmem_write(tmp1); + + gpr.putXReg(tmp1); pushExtValueFromMem(dreg, DSP_REG_AR0); - increase_addr_reg(DSP_REG_AR3); - increase_addr_reg(DSP_REG_AR0); + increase_addr_reg(DSP_REG_AR3, DSP_REG_AR3); + increase_addr_reg(DSP_REG_AR0, DSP_REG_AR0); } // SL $acS.m, $axD.D @@ -240,13 +278,19 @@ void DSPEmitter::sl(const UDSPInstruction opc) u8 sreg = opc & 0x1; u8 dreg = ((opc >> 4) & 0x3) + DSP_REG_AXL0; dsp_op_read_reg(DSP_REG_AR0, RAX, ZERO); - get_acc_m(sreg, ECX, false); - dmem_write(); + + X64Reg tmp1; + gpr.getFreeXReg(tmp1); + + get_acc_m(sreg, tmp1, false); + dmem_write(tmp1); + + gpr.putXReg(tmp1); pushExtValueFromMem(dreg, DSP_REG_AR3); increment_addr_reg(DSP_REG_AR3); - increment_addr_reg(DSP_REG_AR0); + increment_addr_reg(DSP_REG_AR0); } // SLN $acS.m, $axD.D @@ -260,13 +304,19 @@ void DSPEmitter::sln(const UDSPInstruction opc) u8 sreg = opc & 0x1; u8 dreg = ((opc >> 4) & 0x3) + DSP_REG_AXL0; dsp_op_read_reg(DSP_REG_AR0, RAX, ZERO); - get_acc_m(sreg, ECX, false); - dmem_write(); + + X64Reg tmp1; + gpr.getFreeXReg(tmp1); + + get_acc_m(sreg, tmp1, false); + dmem_write(tmp1); + + gpr.putXReg(tmp1); pushExtValueFromMem(dreg, DSP_REG_AR3); increment_addr_reg(DSP_REG_AR3); - increase_addr_reg(DSP_REG_AR0); + increase_addr_reg(DSP_REG_AR0, DSP_REG_AR0); } // SLM $acS.m, $axD.D @@ -280,12 +330,18 @@ void DSPEmitter::slm(const UDSPInstruction opc) u8 sreg = opc & 0x1; u8 dreg = ((opc >> 4) & 0x3) + DSP_REG_AXL0; dsp_op_read_reg(DSP_REG_AR0, RAX, ZERO); - get_acc_m(sreg, ECX, false); - dmem_write(); + + X64Reg tmp1; + gpr.getFreeXReg(tmp1); + + get_acc_m(sreg, tmp1, false); + dmem_write(tmp1); + + gpr.putXReg(tmp1); pushExtValueFromMem(dreg, DSP_REG_AR3); - increase_addr_reg(DSP_REG_AR3); + increase_addr_reg(DSP_REG_AR3, DSP_REG_AR3); increment_addr_reg(DSP_REG_AR0); } @@ -300,13 +356,19 @@ void DSPEmitter::slnm(const UDSPInstruction opc) u8 sreg = opc & 0x1; u8 dreg = ((opc >> 4) & 0x3) + DSP_REG_AXL0; dsp_op_read_reg(DSP_REG_AR0, RAX, ZERO); - get_acc_m(sreg, ECX, false); - dmem_write(); + + X64Reg tmp1; + gpr.getFreeXReg(tmp1); + + get_acc_m(sreg, tmp1, false); + dmem_write(tmp1); + + gpr.putXReg(tmp1); pushExtValueFromMem(dreg, DSP_REG_AR3); - increase_addr_reg(DSP_REG_AR3); - increase_addr_reg(DSP_REG_AR0); + increase_addr_reg(DSP_REG_AR3, DSP_REG_AR3); + increase_addr_reg(DSP_REG_AR0, DSP_REG_AR0); } // LD $ax0.d, $ax1.r, @$arS @@ -319,6 +381,9 @@ void DSPEmitter::slnm(const UDSPInstruction opc) // points into an invalid memory page (ie 0x2000), then AX0.H keeps its old // value. (not implemented yet) If AR3 points into an invalid memory page, then // AX0.L gets the same value as AX0.H. (not implemented yet) + +// LD $axr.h, @$ard +// xxxx xxxx 11dr 0011 void DSPEmitter::ld(const UDSPInstruction opc) { u8 dreg = (opc >> 5) & 0x1; @@ -329,12 +394,14 @@ void DSPEmitter::ld(const UDSPInstruction opc) pushExtValueFromMem((dreg << 1) + DSP_REG_AXL0, sreg); // if (IsSameMemArea(g_dsp.r[sreg], g_dsp.r[DSP_REG_AR3])) { - dsp_op_read_reg(sreg, RSI, NONE); - dsp_op_read_reg(DSP_REG_AR3, RDI, NONE); - SHR(16, R(ESI), Imm8(10)); - SHR(16, R(EDI), Imm8(10)); + X64Reg tmp; + gpr.getFreeXReg(tmp); + dsp_op_read_reg(sreg, RCX, NONE); + dsp_op_read_reg(DSP_REG_AR3, tmp, NONE); + XOR(16, R(ECX), R(tmp)); + gpr.putXReg(tmp); DSPJitRegCache c(gpr); - CMP(16, R(ESI), R(EDI)); + TEST(16, R(ECX), Imm16(0xfc00)); FixupBranch not_equal = J_CC(CC_NE,true); pushExtValueFromMem2((rreg << 1) + DSP_REG_AXL1, sreg); gpr.flushRegs(c); @@ -349,13 +416,15 @@ void DSPEmitter::ld(const UDSPInstruction opc) } else { pushExtValueFromMem(rreg + DSP_REG_AXH0, dreg); + X64Reg tmp; + gpr.getFreeXReg(tmp); //if (IsSameMemArea(g_dsp.r[dreg], g_dsp.r[DSP_REG_AR3])) { - dsp_op_read_reg(dreg, RSI, NONE); - dsp_op_read_reg(DSP_REG_AR3, RDI, NONE); - SHR(16, R(ESI), Imm8(10)); - SHR(16, R(EDI), Imm8(10)); + dsp_op_read_reg(dreg, RCX, NONE); + dsp_op_read_reg(DSP_REG_AR3, tmp, NONE); + XOR(16, R(ECX), R(tmp)); + gpr.putXReg(tmp); DSPJitRegCache c(gpr); - CMP(16, R(ESI), R(EDI)); + TEST(16, R(ECX), Imm16(0xfc00)); FixupBranch not_equal = J_CC(CC_NE, true); pushExtValueFromMem2(rreg + DSP_REG_AXL0, dreg); gpr.flushRegs(c); @@ -382,13 +451,15 @@ void DSPEmitter::ldn(const UDSPInstruction opc) if (sreg != DSP_REG_AR3) { pushExtValueFromMem((dreg << 1) + DSP_REG_AXL0, sreg); + X64Reg tmp; + gpr.getFreeXReg(tmp); //if (IsSameMemArea(g_dsp.r[sreg], g_dsp.r[DSP_REG_AR3])) { - dsp_op_read_reg(sreg, RSI, NONE); - dsp_op_read_reg(DSP_REG_AR3, RDI, NONE); - SHR(16, R(ESI), Imm8(10)); - SHR(16, R(EDI), Imm8(10)); + dsp_op_read_reg(sreg, RCX, NONE); + dsp_op_read_reg(DSP_REG_AR3, tmp, NONE); + XOR(16, R(ECX), R(tmp)); + gpr.putXReg(tmp); DSPJitRegCache c(gpr); - CMP(16, R(ESI), R(EDI)); + TEST(16, R(ECX), Imm16(0xfc00)); FixupBranch not_equal = J_CC(CC_NE,true); pushExtValueFromMem2((rreg << 1) + DSP_REG_AXL1, sreg); gpr.flushRegs(c); @@ -398,17 +469,19 @@ void DSPEmitter::ldn(const UDSPInstruction opc) gpr.flushRegs(c); SetJumpTarget(after); - increase_addr_reg(sreg); + increase_addr_reg(sreg, sreg); } else { pushExtValueFromMem(rreg + DSP_REG_AXH0, dreg); + X64Reg tmp; + gpr.getFreeXReg(tmp); //if (IsSameMemArea(g_dsp.r[dreg], g_dsp.r[DSP_REG_AR3])) { - dsp_op_read_reg(dreg, RSI, NONE); - dsp_op_read_reg(DSP_REG_AR3, RDI, NONE); - SHR(16, R(ESI), Imm8(10)); - SHR(16, R(EDI), Imm8(10)); + dsp_op_read_reg(dreg, RCX, NONE); + dsp_op_read_reg(DSP_REG_AR3, tmp, NONE); + XOR(16, R(ECX), R(tmp)); + gpr.putXReg(tmp); DSPJitRegCache c(gpr); - CMP(16, R(ESI), R(EDI)); + TEST(16, R(ECX), Imm16(0xfc00)); FixupBranch not_equal = J_CC(CC_NE,true); pushExtValueFromMem2(rreg + DSP_REG_AXL0, dreg); gpr.flushRegs(c); @@ -418,7 +491,7 @@ void DSPEmitter::ldn(const UDSPInstruction opc) gpr.flushRegs(c); SetJumpTarget(after); - increase_addr_reg(dreg); + increase_addr_reg(dreg, dreg); } increment_addr_reg(DSP_REG_AR3); @@ -435,13 +508,15 @@ void DSPEmitter::ldm(const UDSPInstruction opc) if (sreg != DSP_REG_AR3) { pushExtValueFromMem((dreg << 1) + DSP_REG_AXL0, sreg); + X64Reg tmp; + gpr.getFreeXReg(tmp); //if (IsSameMemArea(g_dsp.r[sreg], g_dsp.r[DSP_REG_AR3])) { - dsp_op_read_reg(sreg, RSI, NONE); - dsp_op_read_reg(DSP_REG_AR3, RDI, NONE); - SHR(16, R(ESI), Imm8(10)); - SHR(16, R(EDI), Imm8(10)); + dsp_op_read_reg(sreg, RCX, NONE); + dsp_op_read_reg(DSP_REG_AR3, tmp, NONE); + XOR(16, R(ECX), R(tmp)); + gpr.putXReg(tmp); DSPJitRegCache c(gpr); - CMP(16, R(ESI), R(EDI)); + TEST(16, R(ECX), Imm16(0xfc00)); FixupBranch not_equal = J_CC(CC_NE,true); pushExtValueFromMem2((rreg << 1) + DSP_REG_AXL1, sreg); gpr.flushRegs(c); @@ -455,13 +530,15 @@ void DSPEmitter::ldm(const UDSPInstruction opc) } else { pushExtValueFromMem(rreg + DSP_REG_AXH0, dreg); + X64Reg tmp; + gpr.getFreeXReg(tmp); //if (IsSameMemArea(g_dsp.r[dreg], g_dsp.r[DSP_REG_AR3])) { - dsp_op_read_reg(dreg, RSI, NONE); - dsp_op_read_reg(DSP_REG_AR3, RDI, NONE); - SHR(16, R(ESI), Imm8(10)); - SHR(16, R(EDI), Imm8(10)); + dsp_op_read_reg(dreg, RCX, NONE); + dsp_op_read_reg(DSP_REG_AR3, tmp, NONE); + XOR(16, R(ECX), R(tmp)); + gpr.putXReg(tmp); DSPJitRegCache c(gpr); - CMP(16, R(ESI), R(EDI)); + TEST(16, R(ECX), Imm16(0xfc00)); FixupBranch not_equal = J_CC(CC_NE,true); pushExtValueFromMem2(rreg + DSP_REG_AXL0, dreg); gpr.flushRegs(c); @@ -474,7 +551,7 @@ void DSPEmitter::ldm(const UDSPInstruction opc) increment_addr_reg(dreg); } - increase_addr_reg(DSP_REG_AR3); + increase_addr_reg(DSP_REG_AR3, DSP_REG_AR3); } // LDNM $ax0.d, $ax1.r, @$arS @@ -488,13 +565,15 @@ void DSPEmitter::ldnm(const UDSPInstruction opc) if (sreg != DSP_REG_AR3) { pushExtValueFromMem((dreg << 1) + DSP_REG_AXL0, sreg); + X64Reg tmp; + gpr.getFreeXReg(tmp); //if (IsSameMemArea(g_dsp.r[sreg], g_dsp.r[DSP_REG_AR3])) { - dsp_op_read_reg(sreg, RSI, NONE); - dsp_op_read_reg(DSP_REG_AR3, RDI, NONE); - SHR(16, R(ESI), Imm8(10)); - SHR(16, R(EDI), Imm8(10)); + dsp_op_read_reg(sreg, RCX, NONE); + dsp_op_read_reg(DSP_REG_AR3, tmp, NONE); + XOR(16, R(ECX), R(tmp)); + gpr.putXReg(tmp); DSPJitRegCache c(gpr); - CMP(16, R(ESI), R(EDI)); + TEST(16, R(ECX), Imm16(0xfc00)); FixupBranch not_equal = J_CC(CC_NE,true); pushExtValueFromMem2((rreg << 1) + DSP_REG_AXL1, sreg); gpr.flushRegs(c); @@ -504,17 +583,19 @@ void DSPEmitter::ldnm(const UDSPInstruction opc) gpr.flushRegs(c); SetJumpTarget(after); - increase_addr_reg(sreg); + increase_addr_reg(sreg, sreg); } else { pushExtValueFromMem(rreg + DSP_REG_AXH0, dreg); + X64Reg tmp; + gpr.getFreeXReg(tmp); //if (IsSameMemArea(g_dsp.r[dreg], g_dsp.r[DSP_REG_AR3])) { - dsp_op_read_reg(dreg, RSI, NONE); - dsp_op_read_reg(DSP_REG_AR3, RDI, NONE); - SHR(16, R(ESI), Imm8(10)); - SHR(16, R(EDI), Imm8(10)); + dsp_op_read_reg(dreg, RCX, NONE); + dsp_op_read_reg(DSP_REG_AR3, tmp, NONE); + XOR(16, R(ECX), R(tmp)); + gpr.putXReg(tmp); DSPJitRegCache c(gpr); - CMP(16, R(ESI), R(EDI)); + TEST(16, R(ECX), Imm16(0xfc00)); FixupBranch not_equal = J_CC(CC_NE,true); pushExtValueFromMem2(rreg + DSP_REG_AXL0, dreg); gpr.flushRegs(c); @@ -524,10 +605,10 @@ void DSPEmitter::ldnm(const UDSPInstruction opc) gpr.flushRegs(c); SetJumpTarget(after); - increase_addr_reg(dreg); + increase_addr_reg(dreg, dreg); } - increase_addr_reg(DSP_REG_AR3); + increase_addr_reg(DSP_REG_AR3, DSP_REG_AR3); } @@ -540,8 +621,15 @@ void DSPEmitter::pushExtValueFromReg(u16 dreg, u16 sreg) { void DSPEmitter::pushExtValueFromMem(u16 dreg, u16 sreg) { // u16 addr = g_dsp.r[addr]; - dsp_op_read_reg(sreg, RCX, ZERO); - dmem_read(); + + X64Reg tmp1; + gpr.getFreeXReg(tmp1); + + dsp_op_read_reg(sreg, tmp1, ZERO); + dmem_read(tmp1); + + gpr.putXReg(tmp1); + MOVZX(32, 16, EBX, R(EAX)); storeIndex = dreg; @@ -549,8 +637,15 @@ void DSPEmitter::pushExtValueFromMem(u16 dreg, u16 sreg) { void DSPEmitter::pushExtValueFromMem2(u16 dreg, u16 sreg) { // u16 addr = g_dsp.r[addr]; - dsp_op_read_reg(sreg, RCX, ZERO); - dmem_read(); + + X64Reg tmp1; + gpr.getFreeXReg(tmp1); + + dsp_op_read_reg(sreg, tmp1, ZERO); + dmem_read(tmp1); + + gpr.putXReg(tmp1); + SHL(32, R(EAX), Imm8(16)); OR(32, R(EBX), R(EAX)); @@ -569,7 +664,7 @@ void DSPEmitter::popExtValueToReg() { dsp_op_write_reg(storeIndex, RBX); if (storeIndex >= DSP_REG_ACM0 && storeIndex2 == -1) { TEST(32, R(EBX), Imm32(SR_40_MODE_BIT << 16)); - FixupBranch not_40bit = J_CC(CC_Z); + FixupBranch not_40bit = J_CC(CC_Z, true); DSPJitRegCache c(gpr); //if (g_dsp.r[DSP_REG_SR] & SR_40_MODE_BIT) //{ @@ -613,16 +708,16 @@ void DSPEmitter::zeroWriteBackLog(const UDSPInstruction opc) if ((opc >> 12) == 0x3) { if (! extOpTable[opc & 0x7F]->jitFunc) { - SaveDSPRegs(); + gpr.pushRegs(); ABI_CallFunction((void*)::zeroWriteBackLog); - LoadDSPRegs(); + gpr.popRegs(); } } else { if (! extOpTable[opc & 0xFF]->jitFunc) { - SaveDSPRegs(); + gpr.pushRegs(); ABI_CallFunction((void*)::zeroWriteBackLog); - LoadDSPRegs(); + gpr.popRegs(); } } return; diff --git a/Source/Core/Core/Src/DSP/Jit/DSPJitLoadStore.cpp b/Source/Core/Core/Src/DSP/Jit/DSPJitLoadStore.cpp index bd4a320bb3..f51ea96099 100644 --- a/Source/Core/Core/Src/DSP/Jit/DSPJitLoadStore.cpp +++ b/Source/Core/Core/Src/DSP/Jit/DSPJitLoadStore.cpp @@ -35,11 +35,18 @@ void DSPEmitter::srs(const UDSPInstruction opc) { u8 reg = ((opc >> 8) & 0x7) + 0x18; //u16 addr = (g_dsp.r.cr << 8) | (opc & 0xFF); - dsp_op_read_reg(reg, RCX, ZERO); + + X64Reg tmp1; + gpr.getFreeXReg(tmp1); + + dsp_op_read_reg(reg, tmp1, ZERO); dsp_op_read_reg(DSP_REG_CR, RAX, ZERO); SHL(16, R(EAX), Imm8(8)); - OR(8, R(EAX), Imm8(opc & 0xFF)); - dmem_write(); + OR(16, R(EAX), Imm16(opc & 0xFF)); + dmem_write(tmp1); + + gpr.putXReg(tmp1); + } // LRS $(0x18+D), @M @@ -50,11 +57,18 @@ void DSPEmitter::srs(const UDSPInstruction opc) void DSPEmitter::lrs(const UDSPInstruction opc) { u8 reg = ((opc >> 8) & 0x7) + 0x18; + + X64Reg tmp1; + gpr.getFreeXReg(tmp1); + //u16 addr = (g_dsp.r[DSP_REG_CR] << 8) | (opc & 0xFF); - dsp_op_read_reg(DSP_REG_CR, RCX, ZERO); - SHL(16, R(ECX), Imm8(8)); - OR(8, R(ECX), Imm8(opc & 0xFF)); - dmem_read(); + dsp_op_read_reg(DSP_REG_CR, tmp1, ZERO); + SHL(16, R(tmp1), Imm8(8)); + OR(16, R(tmp1), Imm16(opc & 0xFF)); + dmem_read(tmp1); + + gpr.putXReg(tmp1); + dsp_op_write_reg(reg, RAX); dsp_conditional_extend_accum(reg); } @@ -82,8 +96,14 @@ void DSPEmitter::sr(const UDSPInstruction opc) { u8 reg = opc & DSP_REG_MASK; u16 address = dsp_imem_read(compilePC + 1); - dsp_op_read_reg(reg, ECX); - dmem_write_imm(address); + + X64Reg tmp1; + gpr.getFreeXReg(tmp1); + + dsp_op_read_reg(reg, tmp1); + dmem_write_imm(address, tmp1); + + gpr.putXReg(tmp1); } // SI @M, #I @@ -95,8 +115,14 @@ void DSPEmitter::si(const UDSPInstruction opc) { u16 address = (s8)opc; u16 imm = dsp_imem_read(compilePC + 1); - MOV(32, R(ECX), Imm32((u32)imm)); - dmem_write_imm(address); + + X64Reg tmp1; + gpr.getFreeXReg(tmp1); + + MOV(32, R(tmp1), Imm32((u32)imm)); + dmem_write_imm(address, tmp1); + + gpr.putXReg(tmp1); } // LRR $D, @$S @@ -108,8 +134,14 @@ void DSPEmitter::lrr(const UDSPInstruction opc) u8 sreg = (opc >> 5) & 0x3; u8 dreg = opc & 0x1f; - dsp_op_read_reg(sreg, ECX); - dmem_read(); + X64Reg tmp1; + gpr.getFreeXReg(tmp1); + + dsp_op_read_reg(sreg, tmp1); + dmem_read(tmp1); + + gpr.putXReg(tmp1); + dsp_op_write_reg(dreg, EAX); dsp_conditional_extend_accum(dreg); } @@ -124,8 +156,14 @@ void DSPEmitter::lrrd(const UDSPInstruction opc) u8 sreg = (opc >> 5) & 0x3; u8 dreg = opc & 0x1f; - dsp_op_read_reg(sreg, ECX); - dmem_read(); + X64Reg tmp1; + gpr.getFreeXReg(tmp1); + + dsp_op_read_reg(sreg, tmp1); + dmem_read(tmp1); + + gpr.putXReg(tmp1); + dsp_op_write_reg(dreg, EAX); dsp_conditional_extend_accum(dreg); decrement_addr_reg(sreg); @@ -141,8 +179,14 @@ void DSPEmitter::lrri(const UDSPInstruction opc) u8 sreg = (opc >> 5) & 0x3; u8 dreg = opc & 0x1f; - dsp_op_read_reg(sreg, ECX); - dmem_read(); + X64Reg tmp1; + gpr.getFreeXReg(tmp1); + + dsp_op_read_reg(sreg, tmp1); + dmem_read(tmp1); + + gpr.putXReg(tmp1); + dsp_op_write_reg(dreg, EAX); dsp_conditional_extend_accum(dreg); increment_addr_reg(sreg); @@ -158,11 +202,17 @@ void DSPEmitter::lrrn(const UDSPInstruction opc) u8 sreg = (opc >> 5) & 0x3; u8 dreg = opc & 0x1f; - dsp_op_read_reg(sreg, ECX); - dmem_read(); + X64Reg tmp1; + gpr.getFreeXReg(tmp1); + + dsp_op_read_reg(sreg, tmp1); + dmem_read(tmp1); + + gpr.putXReg(tmp1); + dsp_op_write_reg(dreg, EAX); dsp_conditional_extend_accum(dreg); - increase_addr_reg(sreg); + increase_addr_reg(sreg, sreg); } // SRR @$D, $S @@ -175,9 +225,14 @@ void DSPEmitter::srr(const UDSPInstruction opc) u8 dreg = (opc >> 5) & 0x3; u8 sreg = opc & 0x1f; - dsp_op_read_reg(sreg, ECX); + X64Reg tmp1; + gpr.getFreeXReg(tmp1); + + dsp_op_read_reg(sreg, tmp1); dsp_op_read_reg(dreg, RAX, ZERO); - dmem_write(); + dmem_write(tmp1); + + gpr.putXReg(tmp1); } // SRRD @$D, $S @@ -190,9 +245,15 @@ void DSPEmitter::srrd(const UDSPInstruction opc) u8 dreg = (opc >> 5) & 0x3; u8 sreg = opc & 0x1f; - dsp_op_read_reg(sreg, ECX); + X64Reg tmp1; + gpr.getFreeXReg(tmp1); + + dsp_op_read_reg(sreg, tmp1); dsp_op_read_reg(dreg, RAX, ZERO); - dmem_write(); + dmem_write(tmp1); + + gpr.putXReg(tmp1); + decrement_addr_reg(dreg); } @@ -206,9 +267,15 @@ void DSPEmitter::srri(const UDSPInstruction opc) u8 dreg = (opc >> 5) & 0x3; u8 sreg = opc & 0x1f; - dsp_op_read_reg(sreg, ECX); + X64Reg tmp1; + gpr.getFreeXReg(tmp1); + + dsp_op_read_reg(sreg, tmp1); dsp_op_read_reg(dreg, RAX, ZERO); - dmem_write(); + dmem_write(tmp1); + + gpr.putXReg(tmp1); + increment_addr_reg(dreg); } @@ -222,10 +289,16 @@ void DSPEmitter::srrn(const UDSPInstruction opc) u8 dreg = (opc >> 5) & 0x3; u8 sreg = opc & 0x1f; - dsp_op_read_reg(sreg, ECX); + X64Reg tmp1; + gpr.getFreeXReg(tmp1); + + dsp_op_read_reg(sreg, tmp1); dsp_op_read_reg(dreg, RAX, ZERO); - dmem_write(); - increase_addr_reg(dreg); + dmem_write(tmp1); + + gpr.putXReg(tmp1); + + increase_addr_reg(dreg, dreg); } // ILRR $acD.m, @$arS @@ -237,8 +310,14 @@ void DSPEmitter::ilrr(const UDSPInstruction opc) u16 reg = opc & 0x3; u16 dreg = (opc >> 8) & 1; - dsp_op_read_reg(reg, RCX, ZERO); - imem_read(); + X64Reg tmp1; + gpr.getFreeXReg(tmp1); + + dsp_op_read_reg(reg, tmp1, ZERO); + imem_read(tmp1); + + gpr.putXReg(tmp1); + set_acc_m(dreg, R(RAX)); dsp_conditional_extend_accum(dreg); } @@ -252,8 +331,14 @@ void DSPEmitter::ilrrd(const UDSPInstruction opc) u16 reg = opc & 0x3; u16 dreg = (opc >> 8) & 1; - dsp_op_read_reg(reg, RCX, ZERO); - imem_read(); + X64Reg tmp1; + gpr.getFreeXReg(tmp1); + + dsp_op_read_reg(reg, tmp1, ZERO); + imem_read(tmp1); + + gpr.putXReg(tmp1); + set_acc_m(dreg, R(RAX)); dsp_conditional_extend_accum(dreg); decrement_addr_reg(reg); @@ -268,8 +353,14 @@ void DSPEmitter::ilrri(const UDSPInstruction opc) u16 reg = opc & 0x3; u16 dreg = (opc >> 8) & 1; - dsp_op_read_reg(reg, RCX, ZERO); - imem_read(); + X64Reg tmp1; + gpr.getFreeXReg(tmp1); + + dsp_op_read_reg(reg, tmp1, ZERO); + imem_read(tmp1); + + gpr.putXReg(tmp1); + set_acc_m(dreg, R(RAX)); dsp_conditional_extend_accum(dreg); increment_addr_reg(reg); @@ -285,10 +376,16 @@ void DSPEmitter::ilrrn(const UDSPInstruction opc) u16 reg = opc & 0x3; u16 dreg = (opc >> 8) & 1; - dsp_op_read_reg(reg, RCX, ZERO); - imem_read(); + X64Reg tmp1; + gpr.getFreeXReg(tmp1); + + dsp_op_read_reg(reg, tmp1, ZERO); + imem_read(tmp1); + + gpr.putXReg(tmp1); + set_acc_m(dreg, R(RAX)); dsp_conditional_extend_accum(dreg); - increase_addr_reg(reg); + increase_addr_reg(reg, reg); } diff --git a/Source/Core/Core/Src/DSP/Jit/DSPJitMisc.cpp b/Source/Core/Core/Src/DSP/Jit/DSPJitMisc.cpp index a58efb98d4..c733e0367c 100644 --- a/Source/Core/Core/Src/DSP/Jit/DSPJitMisc.cpp +++ b/Source/Core/Core/Src/DSP/Jit/DSPJitMisc.cpp @@ -25,7 +25,7 @@ using namespace Gen; //clobbers: //EAX = (s8)g_dsp.reg_stack_ptr[stack_reg] -//CX = g_dsp.reg_stack[stack_reg][g_dsp.reg_stack_ptr[stack_reg]] +//expects: void DSPEmitter::dsp_reg_stack_push(int stack_reg) { //g_dsp.reg_stack_ptr[stack_reg]++; @@ -35,30 +35,38 @@ void DSPEmitter::dsp_reg_stack_push(int stack_reg) AND(8, R(AL), Imm8(DSP_STACK_MASK)); MOV(8, M(&g_dsp.reg_stack_ptr[stack_reg]), R(AL)); + X64Reg tmp1; + gpr.getFreeXReg(tmp1); //g_dsp.reg_stack[stack_reg][g_dsp.reg_stack_ptr[stack_reg]] = g_dsp.r[DSP_REG_ST0 + stack_reg]; - MOV(16, R(CX), M(&g_dsp.r.st[stack_reg])); + MOV(16, R(tmp1), M(&g_dsp.r.st[stack_reg])); #ifdef _M_IX86 // All32 MOVZX(32, 8, EAX, R(AL)); #else MOVZX(64, 8, RAX, R(AL)); #endif - MOV(16, MComplex(EAX, EAX, 1, (u64)&g_dsp.reg_stack[stack_reg][0]), R(CX)); + MOV(16, MComplex(EAX, EAX, 1, + PtrOffset(&g_dsp.reg_stack[stack_reg][0],0)), R(tmp1)); + gpr.putXReg(tmp1); } //clobbers: //EAX = (s8)g_dsp.reg_stack_ptr[stack_reg] -//CX = g_dsp.reg_stack[stack_reg][g_dsp.reg_stack_ptr[stack_reg]] +//expects: void DSPEmitter::dsp_reg_stack_pop(int stack_reg) { //g_dsp.r[DSP_REG_ST0 + stack_reg] = g_dsp.reg_stack[stack_reg][g_dsp.reg_stack_ptr[stack_reg]]; MOV(8, R(AL), M(&g_dsp.reg_stack_ptr[stack_reg])); + X64Reg tmp1; + gpr.getFreeXReg(tmp1); #ifdef _M_IX86 // All32 MOVZX(32, 8, EAX, R(AL)); #else MOVZX(64, 8, RAX, R(AL)); #endif - MOV(16, R(CX), MComplex(EAX, EAX, 1, (u64)&g_dsp.reg_stack[stack_reg][0])); - MOV(16, M(&g_dsp.r.st[stack_reg]), R(CX)); + MOV(16, R(tmp1), MComplex(EAX, EAX, 1, + PtrOffset(&g_dsp.reg_stack[stack_reg][0],0))); + MOV(16, M(&g_dsp.r.st[stack_reg]), R(tmp1)); + gpr.putXReg(tmp1); //g_dsp.reg_stack_ptr[stack_reg]--; //g_dsp.reg_stack_ptr[stack_reg] &= DSP_STACK_MASK; @@ -165,7 +173,7 @@ void DSPEmitter::dsp_conditional_extend_accum(int reg) //} gpr.flushRegs(c); SetJumpTarget(not_40bit); - gpr.putReg(DSP_REG_SR); + gpr.putReg(DSP_REG_SR, false); } } } @@ -181,7 +189,7 @@ void DSPEmitter::dsp_conditional_extend_accum_imm(int reg, u16 val) gpr.getReg(DSP_REG_SR,sr_reg); DSPJitRegCache c(gpr); TEST(16, sr_reg, Imm16(SR_40_MODE_BIT)); - FixupBranch not_40bit = J_CC(CC_Z); + FixupBranch not_40bit = J_CC(CC_Z, true); //if (g_dsp.r[DSP_REG_SR] & SR_40_MODE_BIT) //{ // Sign extend into whole accum. @@ -192,7 +200,7 @@ void DSPEmitter::dsp_conditional_extend_accum_imm(int reg, u16 val) //} gpr.flushRegs(c); SetJumpTarget(not_40bit); - gpr.putReg(DSP_REG_SR); + gpr.putReg(DSP_REG_SR, false); } } } @@ -327,9 +335,9 @@ void DSPEmitter::addarn(const UDSPInstruction opc) // u8 dreg = opc & 0x3; // u8 sreg = (opc >> 2) & 0x3; // g_dsp.r[dreg] = dsp_increase_addr_reg(dreg, (s16)g_dsp.r[DSP_REG_IX0 + sreg]); - + // From looking around it is always called with the matching index register - increase_addr_reg(opc & 0x3); + increase_addr_reg(opc & 0x3, (opc >> 2) & 0x3); } //---- diff --git a/Source/Core/Core/Src/DSP/Jit/DSPJitMultiplier.cpp b/Source/Core/Core/Src/DSP/Jit/DSPJitMultiplier.cpp index db523c3233..cab2bb194d 100644 --- a/Source/Core/Core/Src/DSP/Jit/DSPJitMultiplier.cpp +++ b/Source/Core/Core/Src/DSP/Jit/DSPJitMultiplier.cpp @@ -31,13 +31,12 @@ using namespace Gen; // Returns s64 in RAX -// In: RSI = s16 a, RDI = s16 b +// In: RCX = s16 a, RAX = s16 b void DSPEmitter::multiply() { #ifdef _M_X64 // prod = (s16)a * (s16)b; //signed - MOV(64, R(EAX), R(RDI)); - IMUL(64, R(ESI)); + IMUL(64, R(ECX)); // Conditionally multiply by 2. // if ((g_dsp.r.sr & SR_MUL_MODIFY) == 0) @@ -46,9 +45,9 @@ void DSPEmitter::multiply() TEST(16, sr_reg, Imm16(SR_MUL_MODIFY)); FixupBranch noMult2 = J_CC(CC_NZ); // prod <<= 1; - SHL(64, R(EAX), Imm8(1)); + LEA(64, RAX, MRegSum(RAX,RAX)); SetJumpTarget(noMult2); - gpr.putReg(DSP_REG_SR); + gpr.putReg(DSP_REG_SR, false); // return prod; #endif } @@ -60,7 +59,7 @@ void DSPEmitter::multiply_add() // s64 prod = dsp_get_long_prod() + dsp_get_multiply_prod(a, b, sign); multiply(); MOV(64, R(RDX), R(RAX)); - get_long_prod(); + get_long_prod(); ADD(64, R(RAX), R(RDX)); // return prod; } @@ -72,14 +71,14 @@ void DSPEmitter::multiply_sub() // s64 prod = dsp_get_long_prod() - dsp_get_multiply_prod(a, b, sign); multiply(); MOV(64, R(RDX), R(RAX)); - get_long_prod(); + get_long_prod(); SUB(64, R(RAX), R(RDX)); // return prod; } // Only MULX family instructions have unsigned/mixed support. // Returns s64 in EAX -// In: RSI = s16 a, RDI = s16 b +// In: RCX = s16 a, RAX = s16 b // Returns s64 in RAX void DSPEmitter::multiply_mulx(u8 axh0, u8 axh1) { @@ -101,41 +100,48 @@ void DSPEmitter::multiply_mulx(u8 axh0, u8 axh1) TEST(16, sr_reg, Imm16(SR_MUL_UNSIGNED)); FixupBranch unsignedMul = J_CC(CC_NZ); // prod = (s16)a * (s16)b; //signed - MOVSX(64, 16, RAX, R(RDI)); - IMUL(64, R(RSI)); - FixupBranch signedMul = J(); + MOVSX(64, 16, RAX, R(RAX)); + IMUL(64, R(RCX)); + FixupBranch signedMul = J(true); SetJumpTarget(unsignedMul); + DSPJitRegCache c(gpr); + gpr.putReg(DSP_REG_SR, false); if ((axh0==0) && (axh1==0)) { // unsigned support ON if both ax?.l regs are used // prod = (u32)(a * b); - MOVZX(64, 16, RSI, R(RSI)); - MOVZX(64, 16, RAX, R(RDI)); - MUL(64, R(RSI)); + MOVZX(64, 16, RCX, R(RCX)); + MOVZX(64, 16, RAX, R(RAX)); + MUL(64, R(RCX)); } else if ((axh0==0) && (axh1==1)) { // mixed support ON (u16)axl.0 * (s16)axh.1 // prod = a * (s16)b; - MOVZX(64, 16, RAX, R(RSI)); - IMUL(64, R(RDI)); + X64Reg tmp; + gpr.getFreeXReg(tmp); + MOV(64, R(tmp), R(RAX)); + MOVZX(64, 16, RAX, R(RCX)); + IMUL(64, R(tmp)); + gpr.putXReg(tmp); } else if ((axh0==1) && (axh1==0)) { // mixed support ON (u16)axl.1 * (s16)axh.0 // prod = (s16)a * b; - MOVZX(64, 16, RAX, R(RDI)); - IMUL(64, R(RSI)); + MOVZX(64, 16, RAX, R(RAX)); + IMUL(64, R(RCX)); } else { // unsigned support OFF if both ax?.h regs are used // prod = (s16)a * (s16)b; //signed - MOVSX(64, 16, RAX, R(RDI)); - IMUL(64, R(RSI)); + MOVSX(64, 16, RAX, R(RAX)); + IMUL(64, R(RCX)); } + gpr.flushRegs(c); SetJumpTarget(signedMul); // Conditionally multiply by 2. @@ -143,9 +149,9 @@ void DSPEmitter::multiply_mulx(u8 axh0, u8 axh1) TEST(16, sr_reg, Imm16(SR_MUL_MODIFY)); FixupBranch noMult2 = J_CC(CC_NZ); // prod <<= 1; - SHL(64, R(RAX), Imm8(1)); + LEA(64, RAX, MRegSum(RAX,RAX)); SetJumpTarget(noMult2); - gpr.putReg(DSP_REG_SR); + gpr.putReg(DSP_REG_SR, false); // return prod; } @@ -169,7 +175,7 @@ void DSPEmitter::clrp(const UDSPInstruction opc) // g_dsp.r[DSP_REG_PRODM2] = 0x0010; //64bit move to memory does not work. use 2 32bits MOV(32, M(&g_dsp.r.prod.val), Imm32(0xfff00000U)); - MOV(32, M((u8*)(&g_dsp.r.prod.val)+4), Imm32(0x001000ffU)); + MOV(32, M(&g_dsp.r.prod.val+4), Imm32(0x001000ffU)); #else Default(opc); #endif @@ -285,14 +291,16 @@ void DSPEmitter::addpaxz(const UDSPInstruction opc) u8 sreg = (opc >> 9) & 0x1; // s64 ax = dsp_get_long_acx(sreg); - get_long_acx(sreg, RCX); - MOV(64, R(RDI), R(RCX)); + X64Reg tmp1; + gpr.getFreeXReg(tmp1); + get_long_acx(sreg, tmp1); + MOV(64, R(RDX), R(tmp1)); // s64 res = prod + (ax & ~0xffff); - MOV(64, R(RDX), Imm64(~0xffff)); - AND(64, R(RDI), R(RDX)); + MOV(64, R(RAX), Imm64(~0xffff)); + AND(64, R(RDX), R(RAX)); // s64 prod = dsp_get_long_prod_round_prodl(); get_long_prod_round_prodl(); - ADD(64, R(RAX), R(RDI)); + ADD(64, R(RAX), R(RDX)); // s64 oldprod = dsp_get_long_prod(); // dsp_set_long_acc(dreg, res); @@ -301,14 +309,15 @@ void DSPEmitter::addpaxz(const UDSPInstruction opc) if (!(DSPAnalyzer::code_flags[compilePC] & DSPAnalyzer::CODE_START_OF_INST) || (DSPAnalyzer::code_flags[compilePC] & DSPAnalyzer::CODE_UPDATE_SR)) { get_long_prod(RDX); - MOV(64, R(RSI), R(RAX)); - set_long_acc(dreg, RSI); - Update_SR_Register64_Carry(); + MOV(64, R(RCX), R(RAX)); + set_long_acc(dreg, RCX); + Update_SR_Register64_Carry(EAX, tmp1); } else { set_long_acc(dreg, RAX); } + gpr.putXReg(tmp1); #else Default(opc); #endif @@ -323,8 +332,8 @@ void DSPEmitter::mulaxh(const UDSPInstruction opc) { #ifdef _M_X64 // s64 prod = dsp_multiply(dsp_get_ax_h(0), dsp_get_ax_h(0)); - dsp_op_read_reg(DSP_REG_AXH0, RSI, SIGN); - MOV(64, R(RDI), R(RSI)); + dsp_op_read_reg(DSP_REG_AXH0, RCX, SIGN); + MOV(64, R(RAX), R(RCX)); multiply(); // dsp_set_long_prod(prod); set_long_prod(); @@ -345,9 +354,9 @@ void DSPEmitter::mul(const UDSPInstruction opc) u8 sreg = (opc >> 11) & 0x1; // u16 axl = dsp_get_ax_l(sreg); - dsp_op_read_reg(DSP_REG_AXL0+sreg, RSI, SIGN); + dsp_op_read_reg(DSP_REG_AXL0+sreg, RCX, SIGN); // u16 axh = dsp_get_ax_h(sreg); - dsp_op_read_reg(DSP_REG_AXH0+sreg, RDI, SIGN); + dsp_op_read_reg(DSP_REG_AXH0+sreg, RAX, SIGN); // s64 prod = dsp_multiply(axh, axl); multiply(); // dsp_set_long_prod(prod); @@ -377,9 +386,9 @@ void DSPEmitter::mulac(const UDSPInstruction opc) ADD(64, R(RAX), R(RDX)); PUSH(64, R(RAX)); // u16 axl = dsp_get_ax_l(sreg); - dsp_op_read_reg(DSP_REG_AXL0+sreg, RSI, SIGN); + dsp_op_read_reg(DSP_REG_AXL0+sreg, RCX, SIGN); // u16 axh = dsp_get_ax_h(sreg); - dsp_op_read_reg(DSP_REG_AXH0+sreg, RDI, SIGN); + dsp_op_read_reg(DSP_REG_AXH0+sreg, RAX, SIGN); // s64 prod = dsp_multiply(axl, axh); multiply(); // dsp_set_long_prod(prod); @@ -467,9 +476,9 @@ void DSPEmitter::mulx(const UDSPInstruction opc) u8 sreg = ((opc >> 12) & 0x1); // u16 val1 = (sreg == 0) ? dsp_get_ax_l(0) : dsp_get_ax_h(0); - dsp_op_read_reg(DSP_REG_AXL0 + sreg*2, RSI, SIGN); + dsp_op_read_reg(DSP_REG_AXL0 + sreg*2, RCX, SIGN); // u16 val2 = (treg == 0) ? dsp_get_ax_l(1) : dsp_get_ax_h(1); - dsp_op_read_reg(DSP_REG_AXL1 + treg*2, RDI, SIGN); + dsp_op_read_reg(DSP_REG_AXL1 + treg*2, RAX, SIGN); // s64 prod = dsp_multiply_mulx(sreg, treg, val1, val2); multiply_mulx(sreg, treg); // dsp_set_long_prod(prod); @@ -494,25 +503,28 @@ void DSPEmitter::mulxac(const UDSPInstruction opc) u8 sreg = (opc >> 12) & 0x1; // s64 acc = dsp_get_long_acc(rreg) + dsp_get_long_prod(); - get_long_acc(rreg, RCX); + X64Reg tmp1; + gpr.getFreeXReg(tmp1); + get_long_acc(rreg, tmp1); get_long_prod(); - ADD(64, R(RCX), R(RAX)); + ADD(64, R(tmp1), R(RAX)); // u16 val1 = (sreg == 0) ? dsp_get_ax_l(0) : dsp_get_ax_h(0); - dsp_op_read_reg(DSP_REG_AXL0 + sreg*2, RSI, SIGN); + dsp_op_read_reg(DSP_REG_AXL0 + sreg*2, RCX, SIGN); // u16 val2 = (treg == 0) ? dsp_get_ax_l(1) : dsp_get_ax_h(1); - dsp_op_read_reg(DSP_REG_AXL1 + treg*2, RDI, SIGN); + dsp_op_read_reg(DSP_REG_AXL1 + treg*2, RAX, SIGN); // s64 prod = dsp_multiply_mulx(sreg, treg, val1, val2); multiply_mulx(sreg, treg); // dsp_set_long_prod(prod); set_long_prod(); // dsp_set_long_acc(rreg, acc); - set_long_acc(rreg, RCX); + set_long_acc(rreg, tmp1); // Update_SR_Register64(dsp_get_long_acc(rreg)); if (!(DSPAnalyzer::code_flags[compilePC] & DSPAnalyzer::CODE_START_OF_INST) || (DSPAnalyzer::code_flags[compilePC] & DSPAnalyzer::CODE_UPDATE_SR)) { - Update_SR_Register64(RCX); + Update_SR_Register64(tmp1); } + gpr.putXReg(tmp1); #else Default(opc); #endif @@ -533,23 +545,26 @@ void DSPEmitter::mulxmv(const UDSPInstruction opc) u8 sreg = (opc >> 12) & 0x1; // s64 acc = dsp_get_long_prod(); - get_long_prod(RCX); + X64Reg tmp1; + gpr.getFreeXReg(tmp1); + get_long_prod(tmp1); // u16 val1 = (sreg == 0) ? dsp_get_ax_l(0) : dsp_get_ax_h(0); - dsp_op_read_reg(DSP_REG_AXL0 + sreg*2, RSI, SIGN); + dsp_op_read_reg(DSP_REG_AXL0 + sreg*2, RCX, SIGN); // u16 val2 = (treg == 0) ? dsp_get_ax_l(1) : dsp_get_ax_h(1); - dsp_op_read_reg(DSP_REG_AXL1 + treg*2, RDI, SIGN); + dsp_op_read_reg(DSP_REG_AXL1 + treg*2, RAX, SIGN); // s64 prod = dsp_multiply_mulx(sreg, treg, val1, val2); multiply_mulx(sreg, treg); // dsp_set_long_prod(prod); set_long_prod(); // dsp_set_long_acc(rreg, acc); - set_long_acc(rreg, RCX); + set_long_acc(rreg, tmp1); // Update_SR_Register64(dsp_get_long_acc(rreg)); if (!(DSPAnalyzer::code_flags[compilePC] & DSPAnalyzer::CODE_START_OF_INST) || (DSPAnalyzer::code_flags[compilePC] & DSPAnalyzer::CODE_UPDATE_SR)) { - Update_SR_Register64(RCX); + Update_SR_Register64(tmp1); } + gpr.putXReg(tmp1); #else Default(opc); #endif @@ -571,23 +586,26 @@ void DSPEmitter::mulxmvz(const UDSPInstruction opc) u8 sreg = (opc >> 12) & 0x1; // s64 acc = dsp_get_long_prod_round_prodl(); - get_long_prod_round_prodl(RCX); + X64Reg tmp1; + gpr.getFreeXReg(tmp1); + get_long_prod_round_prodl(tmp1); // u16 val1 = (sreg == 0) ? dsp_get_ax_l(0) : dsp_get_ax_h(0); - dsp_op_read_reg(DSP_REG_AXL0 + sreg*2, RSI, SIGN); + dsp_op_read_reg(DSP_REG_AXL0 + sreg*2, RCX, SIGN); // u16 val2 = (treg == 0) ? dsp_get_ax_l(1) : dsp_get_ax_h(1); - dsp_op_read_reg(DSP_REG_AXL1 + treg*2, RDI, SIGN); + dsp_op_read_reg(DSP_REG_AXL1 + treg*2, RAX, SIGN); // s64 prod = dsp_multiply_mulx(sreg, treg, val1, val2); multiply_mulx(sreg, treg); // dsp_set_long_prod(prod); set_long_prod(); // dsp_set_long_acc(rreg, acc); - set_long_acc(rreg, RCX); + set_long_acc(rreg, tmp1); // Update_SR_Register64(dsp_get_long_acc(rreg)); if (!(DSPAnalyzer::code_flags[compilePC] & DSPAnalyzer::CODE_START_OF_INST) || (DSPAnalyzer::code_flags[compilePC] & DSPAnalyzer::CODE_UPDATE_SR)) { - Update_SR_Register64(RCX); + Update_SR_Register64(tmp1); } + gpr.putXReg(tmp1); #else Default(opc); #endif @@ -606,9 +624,9 @@ void DSPEmitter::mulc(const UDSPInstruction opc) u8 sreg = (opc >> 12) & 0x1; // u16 accm = dsp_get_acc_m(sreg); - get_acc_m(sreg, ESI); + get_acc_m(sreg, ECX); // u16 axh = dsp_get_ax_h(treg); - dsp_op_read_reg(DSP_REG_AXH0+treg, RDI, SIGN); + dsp_op_read_reg(DSP_REG_AXH0+treg, RAX, SIGN); // s64 prod = dsp_multiply(accm, axh); multiply(); // dsp_set_long_prod(prod); @@ -639,9 +657,9 @@ void DSPEmitter::mulcac(const UDSPInstruction opc) ADD(64, R(RAX), R(RDX)); PUSH(64, R(RAX)); // u16 accm = dsp_get_acc_m(sreg); - get_acc_m(sreg, ESI); + get_acc_m(sreg, ECX); // u16 axh = dsp_get_ax_h(treg); - dsp_op_read_reg(DSP_REG_AXH0+treg, RDI, SIGN); + dsp_op_read_reg(DSP_REG_AXH0+treg, RAX, SIGN); // s64 prod = dsp_multiply(accm, axh); multiply(); // dsp_set_long_prod(prod); @@ -678,9 +696,9 @@ void DSPEmitter::mulcmv(const UDSPInstruction opc) get_long_prod(); PUSH(64, R(RAX)); // u16 accm = dsp_get_acc_m(sreg); - get_acc_m(sreg, ESI); + get_acc_m(sreg, ECX); // u16 axh = dsp_get_ax_h(treg); - dsp_op_read_reg(DSP_REG_AXH0+treg, RDI, SIGN); + dsp_op_read_reg(DSP_REG_AXH0+treg, RAX, SIGN); // s64 prod = dsp_multiply(accm, axh); multiply(); // dsp_set_long_prod(prod); @@ -718,9 +736,9 @@ void DSPEmitter::mulcmvz(const UDSPInstruction opc) get_long_prod_round_prodl(); PUSH(64, R(RAX)); // u16 accm = dsp_get_acc_m(sreg); - get_acc_m(sreg, ESI); + get_acc_m(sreg, ECX); // u16 axh = dsp_get_ax_h(treg); - dsp_op_read_reg(DSP_REG_AXH0+treg, RDI, SIGN); + dsp_op_read_reg(DSP_REG_AXH0+treg, RAX, SIGN); // s64 prod = dsp_multiply(accm, axh); multiply(); // dsp_set_long_prod(prod); @@ -752,9 +770,9 @@ void DSPEmitter::maddx(const UDSPInstruction opc) u8 sreg = (opc >> 9) & 0x1; // u16 val1 = (sreg == 0) ? dsp_get_ax_l(0) : dsp_get_ax_h(0); - dsp_op_read_reg(DSP_REG_AXL0 + sreg*2, RSI, SIGN); + dsp_op_read_reg(DSP_REG_AXL0 + sreg*2, RCX, SIGN); // u16 val2 = (treg == 0) ? dsp_get_ax_l(1) : dsp_get_ax_h(1); - dsp_op_read_reg(DSP_REG_AXL1 + treg*2, RDI, SIGN); + dsp_op_read_reg(DSP_REG_AXL1 + treg*2, RAX, SIGN); // s64 prod = dsp_multiply_add(val1, val2); multiply_add(); // dsp_set_long_prod(prod); @@ -776,9 +794,9 @@ void DSPEmitter::msubx(const UDSPInstruction opc) u8 sreg = (opc >> 9) & 0x1; // u16 val1 = (sreg == 0) ? dsp_get_ax_l(0) : dsp_get_ax_h(0); - dsp_op_read_reg(DSP_REG_AXL0 + sreg*2, RSI, SIGN); + dsp_op_read_reg(DSP_REG_AXL0 + sreg*2, RCX, SIGN); // u16 val2 = (treg == 0) ? dsp_get_ax_l(1) : dsp_get_ax_h(1); - dsp_op_read_reg(DSP_REG_AXL1 + treg*2, RDI, SIGN); + dsp_op_read_reg(DSP_REG_AXL1 + treg*2, RAX, SIGN); // s64 prod = dsp_multiply_sub(val1, val2); multiply_sub(); // dsp_set_long_prod(prod); @@ -800,9 +818,9 @@ void DSPEmitter::maddc(const UDSPInstruction opc) u8 sreg = (opc >> 9) & 0x1; // u16 accm = dsp_get_acc_m(sreg); - get_acc_m(sreg, ESI); + get_acc_m(sreg, ECX); // u16 axh = dsp_get_ax_h(treg); - dsp_op_read_reg(DSP_REG_AXH0+treg, RDI, SIGN); + dsp_op_read_reg(DSP_REG_AXH0+treg, RAX, SIGN); // s64 prod = dsp_multiply_add(accm, axh); multiply_add(); // dsp_set_long_prod(prod); @@ -824,9 +842,9 @@ void DSPEmitter::msubc(const UDSPInstruction opc) u8 sreg = (opc >> 9) & 0x1; // u16 accm = dsp_get_acc_m(sreg); - get_acc_m(sreg, ESI); + get_acc_m(sreg, ECX); // u16 axh = dsp_get_ax_h(treg); - dsp_op_read_reg(DSP_REG_AXH0+treg, RDI, SIGN); + dsp_op_read_reg(DSP_REG_AXH0+treg, RAX, SIGN); // s64 prod = dsp_multiply_sub(accm, axh); multiply_sub(); // dsp_set_long_prod(prod); @@ -847,9 +865,9 @@ void DSPEmitter::madd(const UDSPInstruction opc) u8 sreg = (opc >> 8) & 0x1; // u16 axl = dsp_get_ax_l(sreg); - dsp_op_read_reg(DSP_REG_AXL0+sreg, RSI, SIGN); + dsp_op_read_reg(DSP_REG_AXL0+sreg, RCX, SIGN); // u16 axh = dsp_get_ax_h(sreg); - dsp_op_read_reg(DSP_REG_AXH0+sreg, RDI, SIGN); + dsp_op_read_reg(DSP_REG_AXH0+sreg, RAX, SIGN); // s64 prod = dsp_multiply_add(axl, axh); multiply_add(); // dsp_set_long_prod(prod); @@ -870,9 +888,9 @@ void DSPEmitter::msub(const UDSPInstruction opc) u8 sreg = (opc >> 8) & 0x1; // u16 axl = dsp_get_ax_l(sreg); - dsp_op_read_reg(DSP_REG_AXL0+sreg, RSI, SIGN); + dsp_op_read_reg(DSP_REG_AXL0+sreg, RCX, SIGN); // u16 axh = dsp_get_ax_h(sreg); - dsp_op_read_reg(DSP_REG_AXH0+sreg, RDI, SIGN); + dsp_op_read_reg(DSP_REG_AXH0+sreg, RAX, SIGN); // s64 prod = dsp_multiply_sub(axl, axh); multiply_sub(); // dsp_set_long_prod(prod); diff --git a/Source/Core/Core/Src/DSP/Jit/DSPJitRegCache.cpp b/Source/Core/Core/Src/DSP/Jit/DSPJitRegCache.cpp index 11c27ca6d8..9201157618 100644 --- a/Source/Core/Core/Src/DSP/Jit/DSPJitRegCache.cpp +++ b/Source/Core/Core/Src/DSP/Jit/DSPJitRegCache.cpp @@ -21,7 +21,7 @@ using namespace Gen; -static u16 *reg_ptr(int reg) { +static void *reg_ptr(int reg) { switch(reg) { case DSP_REG_AR0: case DSP_REG_AR1: @@ -64,79 +64,117 @@ static u16 *reg_ptr(int reg) { case DSP_REG_ACM0: case DSP_REG_ACM1: return &g_dsp.r.ac[reg - DSP_REG_ACM0].m; + case DSP_REG_AX0_32: + case DSP_REG_AX1_32: + return &g_dsp.r.ax[reg - DSP_REG_AX0_32].val; +#ifdef _M_X64 + case DSP_REG_ACC0_64: + case DSP_REG_ACC1_64: + return &g_dsp.r.ac[reg - DSP_REG_ACC0_64].val; + case DSP_REG_PROD_64: + return &g_dsp.r.prod.val; +#endif default: _assert_msg_(DSPLLE, 0, "cannot happen"); return NULL; } } -#define ROTATED_REG_ACCS -//#undef ROTATED_REG_ACCS +#define STATIC_REG_ACCS +//#undef STATIC_REG_ACCS DSPJitRegCache::DSPJitRegCache(DSPEmitter &_emitter) : emitter(_emitter), temporary(false), merged(false) { for(unsigned int i = 0; i < NUMXREGS; i++) { xregs[i].guest_reg = DSP_REG_STATIC; + xregs[i].pushed = false; } - xregs[RSP].guest_reg = DSP_REG_STATIC;//stack pointer + xregs[RAX].guest_reg = DSP_REG_STATIC;// reserved for MUL/DIV + xregs[RDX].guest_reg = DSP_REG_STATIC;// reserved for MUL/DIV + xregs[RCX].guest_reg = DSP_REG_STATIC;// reserved for shifts + xregs[RBX].guest_reg = DSP_REG_STATIC;//extended op backing store + xregs[RSP].guest_reg = DSP_REG_STATIC;//stack pointer + xregs[RBP].guest_reg = DSP_REG_NONE;//definitely usable in dsplle because //all external calls are protected + xregs[RSI].guest_reg = DSP_REG_NONE; + xregs[RDI].guest_reg = DSP_REG_NONE; + #ifdef _M_X64 +#ifdef STATIC_REG_ACCS xregs[R8].guest_reg = DSP_REG_STATIC;//acc0 xregs[R9].guest_reg = DSP_REG_STATIC;//acc1 +#else + xregs[R8].guest_reg = DSP_REG_NONE; + xregs[R9].guest_reg = DSP_REG_NONE; +#endif xregs[R10].guest_reg = DSP_REG_NONE; - xregs[R11].guest_reg = DSP_REG_STATIC;//&g_dsp.r - xregs[R12].guest_reg = DSP_REG_STATIC;//used for cycle counting + xregs[R11].guest_reg = DSP_REG_NONE; + xregs[R12].guest_reg = DSP_REG_NONE; xregs[R13].guest_reg = DSP_REG_NONE; xregs[R14].guest_reg = DSP_REG_NONE; xregs[R15].guest_reg = DSP_REG_NONE; #endif -#ifdef _M_X64 - acc[0].host_reg = R8; - acc[0].shift = 0; - acc[0].dirty = false; - acc[0].used = false; - acc[0].tmp_reg = INVALID_REG; - - acc[1].host_reg = R9; - acc[1].shift = 0; - acc[1].dirty = false; - acc[1].used = false; - acc[1].tmp_reg = INVALID_REG; -#endif - for(unsigned int i = 0; i < 32; i++) { + for(unsigned int i = 0; i <= DSP_REG_MAX_MEM_BACKED; i++) { regs[i].mem = reg_ptr(i); - regs[i].size = 2; - } -#ifdef _M_X64 - regs[DSP_REG_ACC0_64].mem = &g_dsp.r.ac[0].val; - regs[DSP_REG_ACC0_64].size = 8; - regs[DSP_REG_ACC1_64].mem = &g_dsp.r.ac[1].val; - regs[DSP_REG_ACC1_64].size = 8; - regs[DSP_REG_PROD_64].mem = &g_dsp.r.prod.val; - regs[DSP_REG_PROD_64].size = 8; -#endif - regs[DSP_REG_AX0_32].mem = &g_dsp.r.ax[0].val; - regs[DSP_REG_AX0_32].size = 4; - regs[DSP_REG_AX1_32].mem = &g_dsp.r.ax[1].val; - regs[DSP_REG_AX1_32].size = 4; - for(unsigned int i = 0; i < DSP_REG_MAX_MEM_BACKED+1; i++) { + regs[i].size = 0; regs[i].dirty = false; + regs[i].used = false; + regs[i].last_use_ctr = -1; + regs[i].parentReg = DSP_REG_NONE; + regs[i].shift = 0; + regs[i].host_reg = INVALID_REG; + regs[i].loc = M(regs[i].mem); } + + for(unsigned int i = 0; i < 32; i++) + regs[i].size = 2; + //special composite registers +#ifdef _M_X64 +#ifdef STATIC_REG_ACCS + regs[DSP_REG_ACC0_64].host_reg = R8; + regs[DSP_REG_ACC1_64].host_reg = R9; +#endif + for(unsigned int i = 0; i < 2; i++) { + regs[i+DSP_REG_ACC0_64].size = 8; + regs[i+DSP_REG_ACL0].parentReg = i+DSP_REG_ACC0_64; + regs[i+DSP_REG_ACM0].parentReg = i+DSP_REG_ACC0_64; + regs[i+DSP_REG_ACH0].parentReg = i+DSP_REG_ACC0_64; + regs[i+DSP_REG_ACL0].shift = 0; + regs[i+DSP_REG_ACM0].shift = 16; + regs[i+DSP_REG_ACH0].shift = 32; + } + regs[DSP_REG_PROD_64].size = 8; + regs[DSP_REG_PRODL].parentReg = DSP_REG_PROD_64; + regs[DSP_REG_PRODM].parentReg = DSP_REG_PROD_64; + regs[DSP_REG_PRODH].parentReg = DSP_REG_PROD_64; + regs[DSP_REG_PRODM2].parentReg = DSP_REG_PROD_64; + regs[DSP_REG_PRODL].shift = 0; + regs[DSP_REG_PRODM].shift = 16; + regs[DSP_REG_PRODH].shift = 32; + regs[DSP_REG_PRODM2].shift = 48; +#endif + + for(unsigned int i = 0; i < 2; i++) { + regs[i+DSP_REG_AX0_32].size = 4; + regs[i+DSP_REG_AXL0].parentReg = i+DSP_REG_AX0_32; + regs[i+DSP_REG_AXH0].parentReg = i+DSP_REG_AX0_32; + regs[i+DSP_REG_AXL0].shift = 0; + regs[i+DSP_REG_AXH0].shift = 16; + } + + use_ctr = 0; } DSPJitRegCache::DSPJitRegCache(const DSPJitRegCache &cache) : emitter(cache.emitter), temporary(true), merged(false) { memcpy(xregs,cache.xregs,sizeof(xregs)); -#ifdef _M_X64 - memcpy(acc,cache.acc,sizeof(acc)); -#endif memcpy(regs,cache.regs,sizeof(regs)); } @@ -146,9 +184,6 @@ DSPJitRegCache& DSPJitRegCache::operator=(const DSPJitRegCache &cache) _assert_msg_(DSPLLE, temporary, "register cache not temporary??"); merged = false; memcpy(xregs,cache.xregs,sizeof(xregs)); -#ifdef _M_X64 - memcpy(acc,cache.acc,sizeof(acc)); -#endif memcpy(regs,cache.regs,sizeof(regs)); return *this; @@ -159,268 +194,578 @@ DSPJitRegCache::~DSPJitRegCache() _assert_msg_(DSPLLE, !temporary || merged, "temporary cache not merged"); } -void DSPJitRegCache::flushRegs(DSPJitRegCache &cache, bool emit) -{ - cache.merged = true; - -#ifdef _M_X64 - for(unsigned int i = 0; i < 2; i++) { - if (acc[i].shift > cache.acc[i].shift) { - if (emit) - emitter.ROL(64, R(acc[i].host_reg), - Imm8(acc[i].shift-cache.acc[i].shift)); - acc[i].shift = cache.acc[i].shift; - } - if (acc[i].shift < cache.acc[i].shift) { - if (emit) - emitter.ROR(64, R(acc[i].host_reg), - Imm8(cache.acc[i].shift-acc[i].shift)); - acc[i].shift = cache.acc[i].shift; - } - } -#endif -} - void DSPJitRegCache::drop() { merged = true; } -void DSPJitRegCache::flushRegs() +void DSPJitRegCache::flushRegs(DSPJitRegCache &cache, bool emit) +{ + cache.merged = true; + + unsigned int i; + + //drop all guest register not used by cache + for(i = 0; i <= DSP_REG_MAX_MEM_BACKED; i++) { + regs[i].used = false;//used is restored later + if (regs[i].loc.IsSimpleReg() && + !cache.regs[i].loc.IsSimpleReg()) + movToMemory(i); + } + + //try to move guest regs in the wrong host reg to the correct one + int movcnt; + do { + movcnt = 0; + for(i = 0; i <= DSP_REG_MAX_MEM_BACKED; i++) { + if (cache.regs[i].loc.GetSimpleReg() != + regs[i].loc.GetSimpleReg() && + xregs[cache.regs[i].loc.GetSimpleReg()].guest_reg == + DSP_REG_NONE) { + movToHostReg(i, + cache.regs[i].loc.GetSimpleReg(), + true); + movcnt++; + } + } + } while (movcnt != 0); + + //free all host regs that are not used for the same guest reg + for(i = 0; i <= DSP_REG_MAX_MEM_BACKED; i++) { + if (cache.regs[i].loc.GetSimpleReg() != + regs[i].loc.GetSimpleReg() && + regs[i].loc.IsSimpleReg()) + movToMemory(i); + } + + //load all guest regs that are in memory and should be in host reg + for(i = 0; i <= DSP_REG_MAX_MEM_BACKED; i++) { + if (cache.regs[i].loc.IsSimpleReg()) { + movToHostReg(i, cache.regs[i].loc.GetSimpleReg(), + true); + rotateHostReg(i, cache.regs[i].shift, true); + } else if(cache.regs[i].loc.IsImm()) { + //todo: immediates? + } + regs[i].used = cache.regs[i].used; + regs[i].dirty |= cache.regs[i].dirty; + regs[i].last_use_ctr = cache.regs[i].last_use_ctr; + } + + //consistency checks + for(i = 0; i < NUMXREGS; i++) { + _assert_msg_(DSPLLE, + xregs[i].guest_reg == cache.xregs[i].guest_reg, + "cache and current xreg guest_reg mismatch for %d", i); + } + + for(i = 0; i <= DSP_REG_MAX_MEM_BACKED; i++) { + _assert_msg_(DSPLLE, + regs[i].loc.IsImm() == cache.regs[i].loc.IsImm(), + "cache and current reg loc mismatch for %x", i); + _assert_msg_(DSPLLE, + regs[i].loc.GetSimpleReg() == cache.regs[i].loc.GetSimpleReg(), + "cache and current reg loc mismatch for %x", i); + _assert_msg_(DSPLLE, + regs[i].dirty || !cache.regs[i].dirty, + "cache and current reg dirty mismatch for %x", i); + _assert_msg_(DSPLLE, + regs[i].used == cache.regs[i].used, + "cache and current reg used mismatch for %x", i); + _assert_msg_(DSPLLE, + regs[i].shift == cache.regs[i].shift, + "cache and current reg shift mismatch for %x", i); + } + + use_ctr = cache.use_ctr; +} + +void DSPJitRegCache::flushMemBackedRegs() { //also needs to undo any dynamic changes to static allocated regs //this should have the same effect as //merge(DSPJitRegCache(emitter)); -#ifdef _M_X64 -#ifdef ROTATED_REG_ACCS - for(unsigned int i = 0; i < 2; i++) { - if (acc[i].shift > 0) { - emitter.ROL(64, R(acc[i].host_reg), - Imm8(acc[i].shift)); - acc[i].shift = 0; - } - _assert_msg_(DSPLLE, !acc[i].used, - "accumulator still in use"); - if (acc[i].used) + + unsigned int i; + for(i = 0; i <= DSP_REG_MAX_MEM_BACKED; i++) { + _assert_msg_(DSPLLE, !regs[i].used, + "register %x still in use", i); + if (regs[i].used) emitter.INT3(); + if (regs[i].host_reg != INVALID_REG) { + movToHostReg(i,regs[i].host_reg,true); + rotateHostReg(i, 0, true); + } else if (regs[i].parentReg == DSP_REG_NONE) { + movToMemory(i); + } } +} + +void DSPJitRegCache::flushRegs() +{ + flushMemBackedRegs(); + + for(unsigned int i = 0; i <= DSP_REG_MAX_MEM_BACKED; i++) { + if (regs[i].host_reg != INVALID_REG) + movToMemory(i); + } + + for(unsigned int i = 0; i <= DSP_REG_MAX_MEM_BACKED; i++) { + _assert_msg_(DSPLLE, + !regs[i].loc.IsSimpleReg(), + "register %x is still a simple reg", i); + } + + _assert_msg_(DSPLLE, + xregs[RSP].guest_reg == DSP_REG_STATIC, + "wrong xreg state for %d", RSP); + _assert_msg_(DSPLLE, + xregs[RBX].guest_reg == DSP_REG_STATIC, + "wrong xreg state for %d", RBX); + _assert_msg_(DSPLLE, + xregs[RBP].guest_reg == DSP_REG_NONE, + "wrong xreg state for %d", RBP); + _assert_msg_(DSPLLE, + xregs[RSI].guest_reg == DSP_REG_NONE, + "wrong xreg state for %d", RSI); + _assert_msg_(DSPLLE, + xregs[RDI].guest_reg == DSP_REG_NONE, + "wrong xreg state for %d", RDI); +#ifdef _M_X64 +#ifdef STATIC_REG_ACCS + _assert_msg_(DSPLLE, + xregs[R8].guest_reg == DSP_REG_STATIC, + "wrong xreg state for %d", R8); + _assert_msg_(DSPLLE, + xregs[R9].guest_reg == DSP_REG_STATIC, + "wrong xreg state for %d", R9); +#else + _assert_msg_(DSPLLE, + xregs[R8].guest_reg == DSP_REG_NONE, + "wrong xreg state for %d", R8); + _assert_msg_(DSPLLE, + xregs[R9].guest_reg == DSP_REG_NONE, + "wrong xreg state for %d", R9); #endif + _assert_msg_(DSPLLE, + xregs[R10].guest_reg == DSP_REG_NONE, + "wrong xreg state for %d", R10); + _assert_msg_(DSPLLE, + xregs[R11].guest_reg == DSP_REG_NONE, + "wrong xreg state for %d", R11); + _assert_msg_(DSPLLE, + xregs[R12].guest_reg == DSP_REG_NONE, + "wrong xreg state for %d", R12); + _assert_msg_(DSPLLE, + xregs[R13].guest_reg == DSP_REG_NONE, + "wrong xreg state for %d", R13); + _assert_msg_(DSPLLE, + xregs[R14].guest_reg == DSP_REG_NONE, + "wrong xreg state for %d", R14); + _assert_msg_(DSPLLE, + xregs[R15].guest_reg == DSP_REG_NONE, + "wrong xreg state for %d", R15); #endif + + use_ctr = 0; } static u64 ebp_store; -void DSPJitRegCache::loadStaticRegs() +void DSPJitRegCache::loadRegs(bool emit) { + for(unsigned int i = 0; i <= DSP_REG_MAX_MEM_BACKED; i++) { + if (regs[i].host_reg != INVALID_REG) + movToHostReg(i,regs[i].host_reg); + } + + if (emit) { #ifdef _M_X64 -#ifdef ROTATED_REG_ACCS - emitter.MOV(64, R(R8), M(&g_dsp.r.ac[0].val)); - emitter.MOV(64, R(R9), M(&g_dsp.r.ac[1].val)); -#endif - emitter.MOV(64, M(&ebp_store), R(RBP)); + emitter.MOV(64, M(&ebp_store), R(RBP)); #else - emitter.MOV(32, M(&ebp_store), R(EBP)); + emitter.MOV(32, M(&ebp_store), R(EBP)); #endif + } } -void DSPJitRegCache::saveStaticRegs() +void DSPJitRegCache::saveRegs() { flushRegs(); + + for(unsigned int i = 0; i <= DSP_REG_MAX_MEM_BACKED; i++) { + if (regs[i].host_reg != INVALID_REG) + movToMemory(i); + } + + for(unsigned int i = 0; i <= DSP_REG_MAX_MEM_BACKED; i++) { + _assert_msg_(DSPLLE, + !regs[i].loc.IsSimpleReg(), + "register %x is still a simple reg", i); + } + #ifdef _M_X64 -#ifdef ROTATED_REG_ACCS - emitter.MOV(64, M(&g_dsp.r.ac[0].val), R(R8)); - emitter.MOV(64, M(&g_dsp.r.ac[1].val), R(R9)); -#endif emitter.MOV(64, R(RBP), M(&ebp_store)); #else emitter.MOV(32, R(EBP), M(&ebp_store)); #endif } +void DSPJitRegCache::pushRegs() { + flushMemBackedRegs(); + + for(unsigned int i = 0; i <= DSP_REG_MAX_MEM_BACKED; i++) { + if (regs[i].host_reg != INVALID_REG) + movToMemory(i); + } + + for(unsigned int i = 0; i < NUMXREGS; i++) { + if (xregs[i].guest_reg == DSP_REG_USED) { + emitter.PUSH((X64Reg)i); + xregs[i].pushed = true; + xregs[i].guest_reg = DSP_REG_NONE; + } + } + + for(unsigned int i = 0; i <= DSP_REG_MAX_MEM_BACKED; i++) { + _assert_msg_(DSPLLE, + !regs[i].loc.IsSimpleReg(), + "register %x is still a simple reg", i); + } + + for(unsigned int i = 0; i < NUMXREGS; i++) { + _assert_msg_(DSPLLE, + xregs[i].guest_reg == DSP_REG_NONE || + xregs[i].guest_reg == DSP_REG_STATIC, + "register %x is still used", i); + } + +#ifdef _M_X64 + emitter.MOV(64, R(RBP), M(&ebp_store)); +#else + emitter.MOV(32, R(EBP), M(&ebp_store)); +#endif +} + +void DSPJitRegCache::popRegs() { +#ifdef _M_X64 + emitter.MOV(64, M(&ebp_store), R(RBP)); +#else + emitter.MOV(32, M(&ebp_store), R(EBP)); +#endif + for(int i = NUMXREGS-1; i >= 0; i--) { + if (xregs[i].pushed) { + emitter.POP((X64Reg)i); + xregs[i].pushed = false; + xregs[i].guest_reg = DSP_REG_USED; + } + } + + for(unsigned int i = 0; i <= DSP_REG_MAX_MEM_BACKED; i++) { + if (regs[i].host_reg != INVALID_REG) + movToHostReg(i,regs[i].host_reg); + } +} + +X64Reg DSPJitRegCache::makeABICallSafe(X64Reg reg) { + if (reg != RBP) { + return reg; + } + + int rbp_guest = xregs[RBP].guest_reg; + xregs[RBP].guest_reg = DSP_REG_USED; + X64Reg safe = findSpillFreeXReg(); + _assert_msg_(DSPLLE, safe != INVALID_REG, "could not find register"); + if (safe == INVALID_REG) + emitter.INT3(); + xregs[RBP].guest_reg = rbp_guest; +#ifdef _M_X64 + emitter.MOV(64,R(safe),R(reg)); +#else + emitter.MOV(32,R(safe),R(reg)); +#endif + return safe; +} + +void DSPJitRegCache::movToHostReg(int reg, X64Reg host_reg, bool load) +{ + _assert_msg_(DSPLLE, reg >= 0 && reg <= DSP_REG_MAX_MEM_BACKED, + "bad register name %x", reg); + _assert_msg_(DSPLLE, regs[reg].parentReg == DSP_REG_NONE, + "register %x is proxy for %x", reg, regs[reg].parentReg); + _assert_msg_(DSPLLE, !regs[reg].used, + "moving to host reg in use guest reg %x!", reg); + X64Reg old_reg = regs[reg].loc.GetSimpleReg(); + if (old_reg == host_reg) + return; + + if (xregs[host_reg].guest_reg != DSP_REG_STATIC) + xregs[host_reg].guest_reg = reg; + + if (load) { + switch(regs[reg].size) { + case 2: + emitter.MOV(16, R(host_reg), regs[reg].loc); break; + case 4: + emitter.MOV(32, R(host_reg), regs[reg].loc); break; +#ifdef _M_X64 + case 8: + emitter.MOV(64, R(host_reg), regs[reg].loc); break; +#endif + default: + _assert_msg_(DSPLLE, 0, "unsupported memory size"); + break; + } + } + regs[reg].loc = R(host_reg); + if (old_reg != INVALID_REG && + xregs[old_reg].guest_reg != DSP_REG_STATIC) + xregs[old_reg].guest_reg = DSP_REG_NONE; +} + +void DSPJitRegCache::movToHostReg(int reg, bool load) +{ + _assert_msg_(DSPLLE, reg >= 0 && reg <= DSP_REG_MAX_MEM_BACKED, + "bad register name %x", reg); + _assert_msg_(DSPLLE, regs[reg].parentReg == DSP_REG_NONE, + "register %x is proxy for %x", reg, regs[reg].parentReg); + _assert_msg_(DSPLLE, !regs[reg].used, + "moving to host reg in use guest reg %x!", reg); + + if (regs[reg].loc.IsSimpleReg()) + return; + X64Reg tmp; + if (regs[reg].host_reg != INVALID_REG) + tmp = regs[reg].host_reg; + else + tmp = findSpillFreeXReg(); + if (tmp == INVALID_REG) + return; + movToHostReg(reg, tmp, load); +} + +void DSPJitRegCache::rotateHostReg(int reg, int shift, bool emit) +{ + _assert_msg_(DSPLLE, reg >= 0 && reg <= DSP_REG_MAX_MEM_BACKED, + "bad register name %x", reg); + _assert_msg_(DSPLLE, regs[reg].parentReg == DSP_REG_NONE, + "register %x is proxy for %x", reg, regs[reg].parentReg); + _assert_msg_(DSPLLE, regs[reg].loc.IsSimpleReg(), + "register %x is not a simple reg", reg); + _assert_msg_(DSPLLE, !regs[reg].used, + "rotating in use guest reg %x!", reg); + if (shift > regs[reg].shift && emit) { + switch(regs[reg].size) { + case 2: + emitter.ROR(16, regs[reg].loc, + Imm8(shift - regs[reg].shift)); + break; + case 4: + emitter.ROR(32, regs[reg].loc, + Imm8(shift - regs[reg].shift)); + break; +#ifdef _M_X64 + case 8: + emitter.ROR(64, regs[reg].loc, + Imm8(shift - regs[reg].shift)); + break; +#endif + } + } else if (shift < regs[reg].shift && emit) { + switch(regs[reg].size) { + case 2: + emitter.ROL(16, regs[reg].loc, + Imm8(regs[reg].shift - shift)); + break; + case 4: + emitter.ROL(32, regs[reg].loc, + Imm8(regs[reg].shift - shift)); + break; +#ifdef _M_X64 + case 8: + emitter.ROL(64, regs[reg].loc, + Imm8(regs[reg].shift - shift)); + break; +#endif + } + } + regs[reg].shift = shift; +} + +void DSPJitRegCache::movToMemory(int reg) +{ + _assert_msg_(DSPLLE, reg >= 0 && reg <= DSP_REG_MAX_MEM_BACKED, + "bad register name %x", reg); + _assert_msg_(DSPLLE, regs[reg].parentReg == DSP_REG_NONE, + "register %x is proxy for %x", reg, regs[reg].parentReg); + _assert_msg_(DSPLLE, !regs[reg].used, + "moving to memory in use guest reg %x!", reg); + + if (regs[reg].used) + emitter.INT3(); + + if (!regs[reg].loc.IsSimpleReg() && + !regs[reg].loc.IsImm()) + return; + + //but first, check for any needed rotations + if (regs[reg].loc.IsSimpleReg()) + rotateHostReg(reg, 0, true); + else {} //todo: immediates? + + _assert_msg_(DSPLLE, regs[reg].shift == 0, "still shifted??"); + + //move to mem + OpArg tmp = M(regs[reg].mem); + + if (regs[reg].dirty) { + switch(regs[reg].size) { + case 2: + emitter.MOV(16, tmp, regs[reg].loc); break; + case 4: + emitter.MOV(32, tmp, regs[reg].loc); break; +#ifdef _M_X64 + case 8: + emitter.MOV(64, tmp, regs[reg].loc); break; +#endif + default: + _assert_msg_(DSPLLE, 0, "unsupported memory size"); + break; + } + regs[reg].dirty = false; + } + + if (regs[reg].loc.IsSimpleReg()) { + X64Reg hostreg = regs[reg].loc.GetSimpleReg(); + if (xregs[hostreg].guest_reg != DSP_REG_STATIC) + xregs[hostreg].guest_reg = DSP_REG_NONE; + } + + regs[reg].last_use_ctr = -1; + regs[reg].loc = tmp; +} + void DSPJitRegCache::getReg(int reg, OpArg &oparg, bool load) { + int real_reg; + int shift; + if (regs[reg].parentReg != DSP_REG_NONE) { + real_reg = regs[reg].parentReg; + + // always load and rotate since we need the other + // parts of the register + load = true; + + shift = regs[reg].shift; + } else { + real_reg = reg; + shift = 0; + } + + _assert_msg_(DSPLLE, !regs[real_reg].used, + "register %x already in use", real_reg); + + if (regs[real_reg].used) + emitter.INT3(); + // no nead to actually emit code for load or rotate if caller doesn't + // use the contents, but see above for a reason to force the load + movToHostReg(real_reg, load); + //todo: actually handle INVALID_REG + _assert_msg_(DSPLLE, regs[real_reg].loc.IsSimpleReg(), + "did not get host reg for %x", reg); + rotateHostReg(real_reg, shift, load); + oparg = regs[real_reg].loc; + regs[real_reg].used = true; + + //do some register specific fixup switch(reg) { #ifdef _M_X64 -#ifdef ROTATED_REG_ACCS - case DSP_REG_ACH0: - case DSP_REG_ACH1: - { - _assert_msg_(DSPLLE, !acc[reg-DSP_REG_ACH0].used, - "accumulator already in use"); - if (acc[reg-DSP_REG_ACH0].used) - emitter.INT3(); - oparg = R(acc[reg-DSP_REG_ACH0].host_reg); - if (acc[reg-DSP_REG_ACH0].shift < 32) { - emitter.ROR(64, oparg, Imm8(32-acc[reg-DSP_REG_ACH0].shift)); - acc[reg-DSP_REG_ACH0].shift = 32; - } - - acc[reg-DSP_REG_ACH0].used = true; - } - break; - case DSP_REG_ACM0: - case DSP_REG_ACM1: - { - _assert_msg_(DSPLLE, !acc[reg-DSP_REG_ACM0].used, - "accumulator already in use"); - if (acc[reg-DSP_REG_ACM0].used) - emitter.INT3(); - oparg = R(acc[reg-DSP_REG_ACM0].host_reg); - if (acc[reg-DSP_REG_ACM0].shift < 16) { - emitter.ROR(64, oparg, Imm8(16-acc[reg-DSP_REG_ACM0].shift)); - acc[reg-DSP_REG_ACM0].shift = 16; - } - if (acc[reg-DSP_REG_ACM0].shift > 16) { - emitter.ROL(64, oparg, Imm8(acc[reg-DSP_REG_ACM0].shift-16)); - acc[reg-DSP_REG_ACM0].shift = 16; - } - acc[reg-DSP_REG_ACM0].used = true; - } - break; - case DSP_REG_ACL0: - case DSP_REG_ACL1: - { - _assert_msg_(DSPLLE, !acc[reg-DSP_REG_ACL0].used, - "accumulator already in use"); - if (acc[reg-DSP_REG_ACL0].used) - emitter.INT3(); - oparg = R(acc[reg-DSP_REG_ACL0].host_reg); - if (acc[reg-DSP_REG_ACL0].shift > 0) { - emitter.ROL(64, oparg, Imm8(acc[reg-DSP_REG_ACL0].shift)); - acc[reg-DSP_REG_ACL0].shift = 0; - } - acc[reg-DSP_REG_ACL0].used = true; - } - break; case DSP_REG_ACC0_64: case DSP_REG_ACC1_64: { - if (acc[reg-DSP_REG_ACC0_64].used) - emitter.INT3(); - _assert_msg_(DSPLLE, !acc[reg-DSP_REG_ACC0_64].used, - "accumulator already in use"); - oparg = R(acc[reg-DSP_REG_ACC0_64].host_reg); if (load) { - if (acc[reg-DSP_REG_ACC0_64].shift > 0) { - emitter.ROL(64, oparg, Imm8(acc[reg-DSP_REG_ACC0_64].shift)); - } + //need to do this because interpreter only does 48 bits + //(and putReg does the same) emitter.SHL(64, oparg, Imm8(64-40));//sign extend emitter.SAR(64, oparg, Imm8(64-40)); } - //don't bother to rotate if caller replaces all data - acc[reg-DSP_REG_ACC0_64].shift = 0; - acc[reg-DSP_REG_ACC0_64].used = true; } break; -#endif #endif default: - { -/* - getFreeXReg(reg[reg].host_reg); - X64Reg tmp = reg[reg].host_reg; - oparg = R(tmp); - - if (load) { - u16 *regp = reg_ptr(reg); - emitter.MOV(16, oparg, M(regp)); - } -*/ - oparg = regs[reg].loc; //when loading/storing from/to mem, need to consider regs[reg].size - } - break; + break; } } void DSPJitRegCache::putReg(int reg, bool dirty) { + int real_reg = reg; + if (regs[reg].parentReg != DSP_REG_NONE) + real_reg = regs[reg].parentReg; + OpArg oparg = regs[real_reg].loc; switch(reg) { -#ifdef _M_X64 -#ifdef ROTATED_REG_ACCS case DSP_REG_ACH0: case DSP_REG_ACH1: { - if (dirty) { - if (acc[reg-DSP_REG_ACH0].shift > 0) { - emitter.ROL(64, R(acc[reg-DSP_REG_ACH0].host_reg), - Imm8(acc[reg-DSP_REG_ACH0].shift)); - acc[reg-DSP_REG_ACH0].shift = 0; + //no need to extend to full 64bit here until interpreter + //uses that + if (oparg.IsSimpleReg()) { + //register is already shifted correctly + //(if at all) + + // sign extend from the bottom 8 bits. +#ifndef _M_X64 + //cannot use movsx with SPL, BPL, SIL or DIL + //on 32 bit + if (oparg.GetSimpleReg() == RSP || + oparg.GetSimpleReg() == RBP || + oparg.GetSimpleReg() == RSI || + oparg.GetSimpleReg() == RDI) + { + emitter.SHL(16,oparg,Imm8(8)); + emitter.SAR(16,oparg,Imm8(8)); + } + else +#endif + { + emitter.MOVSX(16, 8, + oparg.GetSimpleReg(), + oparg); + } + } else if (oparg.IsImm()) { + //todo: immediates? + } else { + //this works on the memory, so use reg instead + //of real_reg, since it has the right loc + X64Reg tmp; + getFreeXReg(tmp); + // sign extend from the bottom 8 bits. + emitter.MOVSX(16, 8, tmp, regs[reg].loc); + emitter.MOV(16, regs[reg].loc, R(tmp)); + putXReg(tmp); } - emitter.SHL(64, R(acc[reg-DSP_REG_ACH0].host_reg), Imm8(64-40));//sign extend - emitter.SAR(64, R(acc[reg-DSP_REG_ACH0].host_reg), Imm8(64-40)); } - acc[reg-DSP_REG_ACH0].used = false; } break; - case DSP_REG_ACM0: - case DSP_REG_ACM1: - { - acc[reg-DSP_REG_ACM0].used = false; - } - break; - case DSP_REG_ACL0: - case DSP_REG_ACL1: - acc[reg-DSP_REG_ACL0].used = false; - break; +#ifdef _M_X64 case DSP_REG_ACC0_64: case DSP_REG_ACC1_64: { if (dirty) { - OpArg _reg = R(acc[reg-DSP_REG_ACC0_64].host_reg); - - emitter.SHL(64, _reg, Imm8(64-40));//sign extend - emitter.SAR(64, _reg, Imm8(64-40)); + emitter.SHL(64, oparg, Imm8(64-40));//sign extend + emitter.SAR(64, oparg, Imm8(64-40)); } - acc[reg-DSP_REG_ACC0_64].used = false; - } - break; -#else - case DSP_REG_ACH0: - case DSP_REG_ACH1: - { - //need to fix in memory for now. - u16 *regp = reg_ptr(reg); - OpArg mem; - mem = M(regp); - X64Reg tmp; - getFreeXReg(tmp); - // sign extend from the bottom 8 bits. - emitter.MOVSX(16, 8, tmp, mem); - emitter.MOV(16, mem, R(tmp)); - putXReg(tmp); - } - break; -#endif -#else - case DSP_REG_ACH0: - case DSP_REG_ACH1: - { - //need to fix in memory for now. - u16 *regp = reg_ptr(reg); - OpArg mem; - mem = M(regp); - X64Reg tmp; - getFreeXReg(tmp); - // sign extend from the bottom 8 bits. - emitter.MOVSX(16, 8, tmp, mem); - emitter.MOV(16, mem, R(tmp)); - putXReg(tmp); } break; #endif default: - { -/* - X64Reg tmp = reg[reg].host_reg; - - if(dirty) { - u16 *regp = reg_ptr(reg); - emitter.MOV(16, M(dregp), R(tmp)); - } -*/ + break; } - break; + regs[real_reg].used = false; + if (regs[real_reg].loc.IsSimpleReg()) { + regs[real_reg].dirty |= dirty; + regs[real_reg].last_use_ctr = use_ctr; + use_ctr++; } } @@ -481,39 +826,101 @@ void DSPJitRegCache::writeReg(int dreg, OpArg arg) putReg(dreg, true); } +//ordered in order of prefered use +//not all of these are actually available +static X64Reg alloc_order[] = { +#ifdef _M_X64 + R8,R9,R10,R11,R12,R13,R14,R15,RSI,RDI,RBX,RCX,RDX,RAX,RBP +#else + ESI,EDI,EBX,ECX,EDX,EAX,EBP +#endif +}; + X64Reg DSPJitRegCache::spillXReg() { - //todo: implement + unsigned int i; + unsigned int max_use_ctr_diff = 0; + X64Reg least_recent_use_reg = INVALID_REG; + for(i = 0; i < sizeof(alloc_order)/sizeof(alloc_order[0]); i++) { + X64Reg reg = alloc_order[i]; + if (xregs[reg].guest_reg <= DSP_REG_MAX_MEM_BACKED && + !regs[xregs[reg].guest_reg].used) { + unsigned int use_ctr_diff = use_ctr - + regs[xregs[reg].guest_reg].last_use_ctr; + if (use_ctr_diff >= max_use_ctr_diff) { + max_use_ctr_diff = use_ctr_diff; + least_recent_use_reg = reg; + } + } + } + + if (least_recent_use_reg != INVALID_REG) { + movToMemory(xregs[least_recent_use_reg].guest_reg); + return least_recent_use_reg; + } + + //just choose one. + for(i = 0; i < sizeof(alloc_order)/sizeof(alloc_order[0]); i++) { + X64Reg reg = alloc_order[i]; + if (xregs[reg].guest_reg <= DSP_REG_MAX_MEM_BACKED && + !regs[xregs[reg].guest_reg].used) { + movToMemory(xregs[reg].guest_reg); + return reg; + } + } + return INVALID_REG; } void DSPJitRegCache::spillXReg(X64Reg reg) { - //todo: implement + if (xregs[reg].guest_reg <= DSP_REG_MAX_MEM_BACKED) { + _assert_msg_(DSPLLE, !regs[xregs[reg].guest_reg].used, + "to be spilled host reg %x(guest reg %x) still in use!", + reg, xregs[reg].guest_reg); + movToMemory(xregs[reg].guest_reg); + } else { + _assert_msg_(DSPLLE, xregs[reg].guest_reg == DSP_REG_NONE, + "to be spilled host reg %x still in use!", + reg); + } } X64Reg DSPJitRegCache::findFreeXReg() { - int i; - for(i = 0; i < NUMXREGS; i++) { - if (xregs[i].guest_reg == DSP_REG_NONE) { - return (X64Reg)i; + unsigned int i; + for(i = 0; i < sizeof(alloc_order)/sizeof(alloc_order[0]); i++) { + if (xregs[alloc_order[i]].guest_reg == DSP_REG_NONE) { + return alloc_order[i]; } } return INVALID_REG; } -void DSPJitRegCache::getFreeXReg(X64Reg ®) +X64Reg DSPJitRegCache::findSpillFreeXReg() { - reg = findFreeXReg(); + X64Reg reg = findFreeXReg(); if (reg == INVALID_REG) reg = spillXReg(); + return reg; +} + +void DSPJitRegCache::getFreeXReg(X64Reg ®) +{ + reg = findSpillFreeXReg(); + _assert_msg_(DSPLLE, reg != INVALID_REG, "could not find register"); + if (reg == INVALID_REG) + emitter.INT3(); xregs[reg].guest_reg = DSP_REG_USED; } void DSPJitRegCache::getXReg(X64Reg reg) { + if (xregs[reg].guest_reg == DSP_REG_STATIC) { + ERROR_LOG(DSPLLE, "Trying to get statically used XReg %d", reg); + return; + } if (xregs[reg].guest_reg != DSP_REG_NONE) spillXReg(reg); _assert_msg_(DSPLLE, xregs[reg].guest_reg != DSP_REG_NONE, "register already in use"); @@ -522,6 +929,10 @@ void DSPJitRegCache::getXReg(X64Reg reg) void DSPJitRegCache::putXReg(X64Reg reg) { + if (xregs[reg].guest_reg == DSP_REG_STATIC) { + ERROR_LOG(DSPLLE, "Trying to put statically used XReg %d", reg); + return; + } _assert_msg_(DSPLLE, xregs[reg].guest_reg == DSP_REG_USED, "putXReg without get(Free)XReg"); xregs[reg].guest_reg = DSP_REG_NONE; diff --git a/Source/Core/Core/Src/DSP/Jit/DSPJitRegCache.h b/Source/Core/Core/Src/DSP/Jit/DSPJitRegCache.h index a230947c11..e9ec61d123 100644 --- a/Source/Core/Core/Src/DSP/Jit/DSPJitRegCache.h +++ b/Source/Core/Core/Src/DSP/Jit/DSPJitRegCache.h @@ -23,12 +23,16 @@ class DSPEmitter; enum DSPJitRegSpecial { - DSP_REG_ACC0_64 =32, - DSP_REG_ACC1_64 =33, - DSP_REG_AX0_32 =34, - DSP_REG_AX1_32 =35, + DSP_REG_AX0_32 =32, + DSP_REG_AX1_32 =33, +#ifdef _M_X64 + DSP_REG_ACC0_64 =34, + DSP_REG_ACC1_64 =35, DSP_REG_PROD_64 =36, DSP_REG_MAX_MEM_BACKED = 36, +#else + DSP_REG_MAX_MEM_BACKED = 33, +#endif DSP_REG_USED =253, DSP_REG_STATIC =254, @@ -50,24 +54,28 @@ private: struct X64CachedReg { int guest_reg; //including DSPJitRegSpecial + bool pushed; }; struct DynamicReg { Gen::OpArg loc; void *mem; size_t size; bool dirty; - }; - -#ifdef _M_X64 - //when there is a way to do this efficiently in x86, uncondition - struct { - Gen::X64Reg host_reg; - int shift; - bool dirty; bool used; - Gen::X64Reg tmp_reg; - } acc[2]; -#endif + int last_use_ctr; + int parentReg; + int shift;//current shift if parentReg == DSP_REG_NONE + //otherwise the shift this part can be found at + Gen::X64Reg host_reg; +/* todo: + + drop sameReg + + add parentReg + + add shift: + - if parentReg != DSP_REG_NONE, this is the shift where this + register is found in the parentReg + - if parentReg == DSP_REG_NONE, this is the current shift _state_ + */ + }; DynamicReg regs[DSP_REG_MAX_MEM_BACKED+1]; X64CachedReg xregs[NUMXREGS]; @@ -75,11 +83,21 @@ private: DSPEmitter &emitter; bool temporary; bool merged; + + int use_ctr; private: //find a free host reg Gen::X64Reg findFreeXReg(); Gen::X64Reg spillXReg(); + Gen::X64Reg findSpillFreeXReg(); void spillXReg(Gen::X64Reg reg); + + void movToHostReg(int reg, Gen::X64Reg host_reg, bool load); + void movToHostReg(int reg, bool load); + void rotateHostReg(int reg, int shift, bool emit); + void movToMemory(int reg); + void flushMemBackedRegs(); + public: DSPJitRegCache(DSPEmitter &_emitter); @@ -147,10 +165,19 @@ public: //prepare state so that another flushed DSPJitRegCache can take over void flushRegs(); - void loadStaticRegs();//load statically allocated regs from memory - void saveStaticRegs();//save statically allocated regs to memory + void loadRegs(bool emit=true);//load statically allocated regs from memory + void saveRegs();//save statically allocated regs to memory + + void pushRegs();//save registers before abi call + void popRegs();//restore registers after abi call + + //returns a register with the same contents as reg that is safe + //to use through saveStaticRegs and for ABI-calls + Gen::X64Reg makeABICallSafe(Gen::X64Reg reg); //gives no SCALE_RIP with abs(offset) >= 0x80000000 + //32/64 bit writes allowed when the register has a _64 or _32 suffix + //only 16 bit writes allowed without any suffix. void getReg(int reg, Gen::OpArg &oparg, bool load = true); //done with all usages of OpArg above void putReg(int reg, bool dirty = true); diff --git a/Source/Core/Core/Src/DSP/Jit/DSPJitUtil.cpp b/Source/Core/Core/Src/DSP/Jit/DSPJitUtil.cpp index da0857e188..b9e166f698 100644 --- a/Source/Core/Core/Src/DSP/Jit/DSPJitUtil.cpp +++ b/Source/Core/Core/Src/DSP/Jit/DSPJitUtil.cpp @@ -36,32 +36,32 @@ using namespace Gen; // EAX = g_dsp.r.ar[reg] // EDX = g_dsp.r.wr[reg] -// EDI = temp -// ECX = temp void DSPEmitter::increment_addr_reg(int reg) { OpArg ar_reg; OpArg wr_reg; gpr.getReg(DSP_REG_WR0+reg,wr_reg); MOVZX(32, 16, EDX, wr_reg); - gpr.putReg(DSP_REG_WR0+reg); + gpr.putReg(DSP_REG_WR0+reg, false); gpr.getReg(DSP_REG_AR0+reg,ar_reg); MOVZX(32, 16, EAX, ar_reg); - + X64Reg tmp1; + gpr.getFreeXReg(tmp1); //u32 nar = ar + 1; - MOV(32, R(EDI), R(EAX)); + MOV(32, R(tmp1), R(EAX)); ADD(32, R(EAX), Imm8(1)); // if ((nar ^ ar) > ((wr | 1) << 1)) // nar -= wr + 1; - XOR(32, R(EDI), R(EAX)); - LEA(32, ECX, MComplex(EDX, EDX, 1, 0)); + XOR(32, R(tmp1), R(EAX)); + LEA(32, ECX, MRegSum(EDX, EDX)); OR(32, R(ECX), Imm8(2)); - CMP(32, R(EDI), R(ECX)); + CMP(32, R(tmp1), R(ECX)); FixupBranch nowrap = J_CC(CC_BE); SUB(16, R(AX), R(DX)); SUB(16, R(AX), Imm8(1)); SetJumpTarget(nowrap); + gpr.putXReg(tmp1); // g_dsp.r.ar[reg] = nar; MOV(16, ar_reg, R(AX)); @@ -70,171 +70,173 @@ void DSPEmitter::increment_addr_reg(int reg) // EAX = g_dsp.r.ar[reg] // EDX = g_dsp.r.wr[reg] -// EDI = temp -// ECX = temp void DSPEmitter::decrement_addr_reg(int reg) { OpArg ar_reg; OpArg wr_reg; gpr.getReg(DSP_REG_WR0+reg,wr_reg); MOVZX(32, 16, EDX, wr_reg); - gpr.putReg(DSP_REG_WR0+reg); + gpr.putReg(DSP_REG_WR0+reg, false); gpr.getReg(DSP_REG_AR0+reg,ar_reg); MOVZX(32, 16, EAX, ar_reg); + X64Reg tmp1; + gpr.getFreeXReg(tmp1); // u32 nar = ar + wr; // edi = nar - LEA(32, EDI, MComplex(EAX, EDX, 1, 0)); + LEA(32, tmp1, MRegSum(EAX, EDX)); // if (((nar ^ ar) & ((wr | 1) << 1)) > wr) // nar -= wr + 1; - XOR(32, R(EAX), R(EDI)); - LEA(32, ECX, MComplex(EDX, EDX, 1, 0)); + XOR(32, R(EAX), R(tmp1)); + LEA(32, ECX, MRegSum(EDX, EDX)); OR(32, R(ECX), Imm8(2)); AND(32, R(EAX), R(ECX)); CMP(32, R(EAX), R(EDX)); FixupBranch nowrap = J_CC(CC_BE); - SUB(16, R(DI), R(DX)); - SUB(16, R(DI), Imm8(1)); - SetJumpTarget(nowrap); + SUB(16, R(tmp1), R(DX)); + SUB(16, R(tmp1), Imm8(1)); + SetJumpTarget(nowrap); // g_dsp.r.ar[reg] = nar; - MOV(16, ar_reg, R(DI)); + MOV(16, ar_reg, R(tmp1)); gpr.putReg(DSP_REG_AR0+reg); + gpr.putXReg(tmp1); } // Increase addr register according to the correspond ix register // EAX = g_dsp.r.ar[reg] // EDX = g_dsp.r.wr[reg] -// ESI = g_dsp.r.ix[reg] -// ECX = temp -// EDI = temp -void DSPEmitter::increase_addr_reg(int reg) -{ +// ECX = g_dsp.r.ix[reg] +void DSPEmitter::increase_addr_reg(int reg, int _ix_reg) +{ OpArg ar_reg; OpArg wr_reg; OpArg ix_reg; gpr.getReg(DSP_REG_WR0+reg,wr_reg); - gpr.getReg(DSP_REG_IX0+reg,ix_reg); MOVZX(32, 16, EDX, wr_reg); - MOVSX(32, 16, ESI, ix_reg); - gpr.putReg(DSP_REG_WR0+reg); - gpr.putReg(DSP_REG_IX0+reg); + gpr.putReg(DSP_REG_WR0+reg, false); + gpr.getReg(DSP_REG_IX0+_ix_reg,ix_reg); + MOVSX(32, 16, ECX, ix_reg); + gpr.putReg(DSP_REG_IX0+_ix_reg, false); gpr.getReg(DSP_REG_AR0+reg,ar_reg); MOVZX(32, 16, EAX, ar_reg); - + + X64Reg tmp1; + gpr.getFreeXReg(tmp1); //u32 nar = ar + ix; //edi = nar - LEA(32, EDI, MComplex(EAX, ESI, 1, 0)); + LEA(32, tmp1, MRegSum(EAX, ECX)); //u32 dar = (nar ^ ar ^ ix) & ((wr | 1) << 1); //eax = dar - XOR(32, R(EAX), R(ESI)); - XOR(32, R(EAX), R(EDI)); - LEA(32, ECX, MComplex(EDX, EDX, 1, 0)); + XOR(32, R(EAX), R(ECX)); + XOR(32, R(EAX), R(tmp1)); + LEA(32, ECX, MRegSum(EDX, EDX)); OR(32, R(ECX), Imm8(2)); AND(32, R(EAX), R(ECX)); //if (ix >= 0) - TEST(32, R(ESI), R(ESI)); + TEST(32, R(ECX), R(ECX)); FixupBranch negative = J_CC(CC_S); //if (dar > wr) CMP(32, R(EAX), R(EDX)); FixupBranch done = J_CC(CC_BE); //nar -= wr + 1; - SUB(16, R(DI), R(DX)); - SUB(16, R(DI), Imm8(1)); + SUB(16, R(tmp1), R(DX)); + SUB(16, R(tmp1), Imm8(1)); FixupBranch done2 = J(); //else SetJumpTarget(negative); //if ((((nar + wr + 1) ^ nar) & dar) <= wr) - LEA(32, ECX, MComplex(EDI, EDX, 1, 1)); - XOR(32, R(ECX), R(EDI)); + LEA(32, ECX, MComplex(tmp1, EDX, 1, 1)); + XOR(32, R(ECX), R(tmp1)); AND(32, R(ECX), R(EAX)); CMP(32, R(ECX), R(EDX)); FixupBranch done3 = J_CC(CC_A); //nar += wr + 1; - LEA(32, EDI, MComplex(EDI, EDX, 1, 1)); + LEA(32, tmp1, MComplex(tmp1, EDX, 1, 1)); SetJumpTarget(done); SetJumpTarget(done2); SetJumpTarget(done3); // g_dsp.r.ar[reg] = nar; - MOV(16, ar_reg, R(DI)); + MOV(16, ar_reg, R(tmp1)); gpr.putReg(DSP_REG_AR0+reg); + gpr.putXReg(tmp1); } // Decrease addr register according to the correspond ix register // EAX = g_dsp.r.ar[reg] // EDX = g_dsp.r.wr[reg] -// ESI = g_dsp.r.ix[reg] -// ECX = temp -// EDI = temp +// ECX = g_dsp.r.ix[reg] void DSPEmitter::decrease_addr_reg(int reg) { OpArg ar_reg; OpArg wr_reg; OpArg ix_reg; gpr.getReg(DSP_REG_WR0+reg,wr_reg); - gpr.getReg(DSP_REG_IX0+reg,ix_reg); MOVZX(32, 16, EDX, wr_reg); - MOVSX(32, 16, ESI, ix_reg); - gpr.putReg(DSP_REG_WR0+reg); - gpr.putReg(DSP_REG_IX0+reg); + gpr.putReg(DSP_REG_WR0+reg, false); + gpr.getReg(DSP_REG_IX0+reg,ix_reg); + MOVSX(32, 16, ECX, ix_reg); + gpr.putReg(DSP_REG_IX0+reg, false); gpr.getReg(DSP_REG_AR0+reg,ar_reg); MOVZX(32, 16, EAX, ar_reg); - NOT(32, R(ESI)); //esi = ~ix + NOT(32, R(ECX)); //esi = ~ix + X64Reg tmp1; + gpr.getFreeXReg(tmp1); //u32 nar = ar - ix; (ar + ~ix + 1) - LEA(32, EDI, MComplex(EAX, ESI, 1, 1)); + LEA(32, tmp1, MComplex(EAX, ECX, 1, 1)); //u32 dar = (nar ^ ar ^ ~ix) & ((wr | 1) << 1); //eax = dar - XOR(32, R(EAX), R(ESI)); - XOR(32, R(EAX), R(EDI)); - LEA(32, ECX, MComplex(EDX, EDX, 1, 0)); + XOR(32, R(EAX), R(ECX)); + XOR(32, R(EAX), R(tmp1)); + LEA(32, ECX, MRegSum(EDX, EDX)); OR(32, R(ECX), Imm8(2)); AND(32, R(EAX), R(ECX)); //if ((u32)ix > 0xFFFF8000) ==> (~ix < 0x00007FFF) - CMP(32, R(ESI), Imm32(0x00007FFF)); + CMP(32, R(ECX), Imm32(0x00007FFF)); FixupBranch positive = J_CC(CC_AE); //if (dar > wr) CMP(32, R(EAX), R(EDX)); FixupBranch done = J_CC(CC_BE); //nar -= wr + 1; - SUB(16, R(DI), R(DX)); - SUB(16, R(DI), Imm8(1)); + SUB(16, R(tmp1), R(DX)); + SUB(16, R(tmp1), Imm8(1)); FixupBranch done2 = J(); //else SetJumpTarget(positive); //if ((((nar + wr + 1) ^ nar) & dar) <= wr) - LEA(32, ECX, MComplex(EDI, EDX, 1, 1)); - XOR(32, R(ECX), R(EDI)); + LEA(32, ECX, MComplex(tmp1, EDX, 1, 1)); + XOR(32, R(ECX), R(tmp1)); AND(32, R(ECX), R(EAX)); CMP(32, R(ECX), R(EDX)); FixupBranch done3 = J_CC(CC_A); //nar += wr + 1; - LEA(32, EDI, MComplex(EDI, EDX, 1, 1)); + LEA(32, tmp1, MComplex(tmp1, EDX, 1, 1)); SetJumpTarget(done); SetJumpTarget(done2); SetJumpTarget(done3); //return nar - MOV(16, ar_reg, R(DI)); + MOV(16, ar_reg, R(tmp1)); gpr.putReg(DSP_REG_AR0+reg); + gpr.putXReg(tmp1); } // EAX - destination address -// ECX - value -// ESI - Base of dram -void DSPEmitter::dmem_write() +// ECX - Base of dram +void DSPEmitter::dmem_write(X64Reg value) { // if (saddr == 0) CMP(16, R(EAX), Imm16(0x0fff)); @@ -243,45 +245,47 @@ void DSPEmitter::dmem_write() // g_dsp.dram[addr & DSP_DRAM_MASK] = val; AND(16, R(EAX), Imm16(DSP_DRAM_MASK)); #ifdef _M_X64 - MOV(64, R(ESI), ImmPtr(g_dsp.dram)); + MOV(64, R(ECX), ImmPtr(g_dsp.dram)); #else - MOV(32, R(ESI), ImmPtr(g_dsp.dram)); + MOV(32, R(ECX), ImmPtr(g_dsp.dram)); #endif - MOV(16, MComplex(ESI, EAX, 2, 0), R(ECX)); + MOV(16, MComplex(ECX, EAX, 2, 0), R(value)); - FixupBranch end = J(); + FixupBranch end = J(true); // else if (saddr == 0xf) SetJumpTarget(ifx); // Does it mean gdsp_ifx_write needs u32 rather than u16? DSPJitRegCache c(gpr); - SaveDSPRegs(); - ABI_CallFunctionRR((void *)gdsp_ifx_write, EAX, ECX); - LoadDSPRegs(); + X64Reg abisafereg = gpr.makeABICallSafe(value); + gpr.pushRegs(); + ABI_CallFunctionRR((void *)gdsp_ifx_write, EAX, abisafereg); + gpr.popRegs(); gpr.flushRegs(c); SetJumpTarget(end); } -// ECX - value -void DSPEmitter::dmem_write_imm(u16 address) +void DSPEmitter::dmem_write_imm(u16 address, X64Reg value) { switch (address >> 12) { case 0x0: // 0xxx DRAM #ifdef _M_IX86 // All32 - MOV(16, M(&g_dsp.dram[address & DSP_DRAM_MASK]), R(ECX)); + MOV(16, M(&g_dsp.dram[address & DSP_DRAM_MASK]), R(value)); #else MOV(64, R(RDX), ImmPtr(g_dsp.dram)); - MOV(16, MDisp(RDX, (address & DSP_DRAM_MASK)*2), R(ECX)); + MOV(16, MDisp(RDX, (address & DSP_DRAM_MASK)*2), R(value)); #endif break; case 0xf: // Fxxx HW regs + { MOV(16, R(EAX), Imm16(address)); - SaveDSPRegs(); - ABI_CallFunctionRR((void *)gdsp_ifx_write, EAX, ECX); - LoadDSPRegs(); + X64Reg abisafereg = gpr.makeABICallSafe(value); + gpr.pushRegs(); + ABI_CallFunctionRR((void *)gdsp_ifx_write, EAX, abisafereg); + gpr.popRegs(); break; - + } default: // Unmapped/non-existing memory ERROR_LOG(DSPLLE, "%04x DSP ERROR: Write to UNKNOWN (%04x) memory", g_dsp.pc, address); @@ -289,81 +293,80 @@ void DSPEmitter::dmem_write_imm(u16 address) } } -// In: ECX - the address to read +// In: (address) - the address to read // Out: EAX - the result of the read (used by caller) -// ESI - Base -void DSPEmitter::imem_read() +// ECX - Base +void DSPEmitter::imem_read(X64Reg address) { // if (addr == 0) - CMP(16, R(ECX), Imm16(0x0fff)); + CMP(16, R(address), Imm16(0x0fff)); FixupBranch irom = J_CC(CC_A); // return g_dsp.iram[addr & DSP_IRAM_MASK]; - AND(16, R(ECX), Imm16(DSP_IRAM_MASK)); + AND(16, R(address), Imm16(DSP_IRAM_MASK)); #ifdef _M_X64 - MOV(64, R(ESI), ImmPtr(g_dsp.iram)); + MOV(64, R(ECX), ImmPtr(g_dsp.iram)); #else - MOV(32, R(ESI), ImmPtr(g_dsp.iram)); + MOV(32, R(ECX), ImmPtr(g_dsp.iram)); #endif - MOV(16, R(EAX), MComplex(ESI, ECX, 2, 0)); + MOV(16, R(EAX), MComplex(ECX, address, 2, 0)); FixupBranch end = J(); SetJumpTarget(irom); // else if (addr == 0x8) // return g_dsp.irom[addr & DSP_IROM_MASK]; - AND(16, R(ECX), Imm16(DSP_IROM_MASK)); + AND(16, R(address), Imm16(DSP_IROM_MASK)); #ifdef _M_X64 - MOV(64, R(ESI), ImmPtr(g_dsp.irom)); + MOV(64, R(ECX), ImmPtr(g_dsp.irom)); #else - MOV(32, R(ESI), ImmPtr(g_dsp.irom)); + MOV(32, R(ECX), ImmPtr(g_dsp.irom)); #endif - MOV(16, R(EAX), MComplex(ESI, ECX, 2, 0)); + MOV(16, R(EAX), MComplex(ECX, address, 2, 0)); SetJumpTarget(end); } -// In: ECX - the address to read +// In: (address) - the address to read // Out: EAX - the result of the read (used by caller) -// ESI - Base -void DSPEmitter::dmem_read() +// ECX - Base +void DSPEmitter::dmem_read(X64Reg address) { // if (saddr == 0) - CMP(16, R(ECX), Imm16(0x0fff)); + CMP(16, R(address), Imm16(0x0fff)); FixupBranch dram = J_CC(CC_A); // return g_dsp.dram[addr & DSP_DRAM_MASK]; + AND(32, R(address), Imm32(DSP_DRAM_MASK)); #ifdef _M_X64 - AND(16, R(ECX), Imm16(DSP_DRAM_MASK)); - MOVZX(64, 16, RCX, R(RCX)); - MOV(64, R(ESI), ImmPtr(g_dsp.dram)); + MOVZX(64, 16, address, R(address)); + MOV(64, R(ECX), ImmPtr(g_dsp.dram)); #else - AND(32, R(ECX), Imm32(DSP_DRAM_MASK)); - MOV(32, R(ESI), ImmPtr(g_dsp.dram)); + MOV(32, R(ECX), ImmPtr(g_dsp.dram)); #endif - MOV(16, R(EAX), MComplex(ESI, ECX, 2, 0)); + MOV(16, R(EAX), MComplex(ECX, address, 2, 0)); - FixupBranch end = J(); + FixupBranch end = J(true); SetJumpTarget(dram); // else if (saddr == 0x1) - CMP(16, R(ECX), Imm16(0x1fff)); + CMP(16, R(address), Imm16(0x1fff)); FixupBranch ifx = J_CC(CC_A); // return g_dsp.coef[addr & DSP_COEF_MASK]; + AND(32, R(address), Imm32(DSP_COEF_MASK)); #ifdef _M_X64 - AND(16, R(ECX), Imm16(DSP_COEF_MASK)); - MOVZX(64, 16, RCX, R(RCX)); - MOV(64, R(ESI), ImmPtr(g_dsp.coef)); + MOVZX(64, 16, address, R(address)); + MOV(64, R(ECX), ImmPtr(g_dsp.coef)); #else - AND(32, R(ECX), Imm32(DSP_COEF_MASK)); - MOV(32, R(ESI), ImmPtr(g_dsp.coef)); + MOV(32, R(ECX), ImmPtr(g_dsp.coef)); #endif - MOV(16, R(EAX), MComplex(ESI, ECX, 2, 0)); + MOV(16, R(EAX), MComplex(ECX, address, 2, 0)); - FixupBranch end2 = J(); + FixupBranch end2 = J(true); SetJumpTarget(ifx); // else if (saddr == 0xf) // return gdsp_ifx_read(addr); DSPJitRegCache c(gpr); - SaveDSPRegs(); - ABI_CallFunctionR((void *)gdsp_ifx_read, ECX); - LoadDSPRegs(); + X64Reg abisafereg = gpr.makeABICallSafe(address); + gpr.pushRegs(); + ABI_CallFunctionR((void *)gdsp_ifx_read, abisafereg); + gpr.popRegs(); gpr.flushRegs(c); SetJumpTarget(end); SetJumpTarget(end2); @@ -392,11 +395,12 @@ void DSPEmitter::dmem_read_imm(u16 address) break; case 0xf: // Fxxx HW regs - SaveDSPRegs(); + { + gpr.pushRegs(); ABI_CallFunctionC16((void *)gdsp_ifx_read, address); - LoadDSPRegs(); + gpr.popRegs(); break; - + } default: // Unmapped/non-existing memory ERROR_LOG(DSPLLE, "%04x DSP ERROR: Read from UNKNOWN (%04x) memory", g_dsp.pc, address); @@ -408,11 +412,13 @@ void DSPEmitter::get_long_prod(X64Reg long_prod) { #ifdef _M_X64 //s64 val = (s8)(u8)g_dsp.r[DSP_REG_PRODH]; - OpArg reg; - gpr.getReg(DSP_REG_PROD_64, reg); + OpArg prod_reg; + gpr.getReg(DSP_REG_PROD_64, prod_reg); + MOV(64, R(long_prod), prod_reg); + gpr.putReg(DSP_REG_PROD_64, false); + //no use in keeping prod_reg any longer. X64Reg tmp; gpr.getFreeXReg(tmp); - MOV(64, R(long_prod), reg); MOV(64, R(tmp), R(long_prod)); SHL(64, R(long_prod), Imm8(64-40));//sign extend SAR(64, R(long_prod), Imm8(64-40)); @@ -420,33 +426,35 @@ void DSPEmitter::get_long_prod(X64Reg long_prod) SHL(64, R(tmp), Imm8(16)); ADD(64, R(long_prod), R(tmp)); gpr.putXReg(tmp); - gpr.putReg(DSP_REG_PROD_64, false); #endif } // Returns s64 in RAX -// Clobbers RSI +// Clobbers RCX void DSPEmitter::get_long_prod_round_prodl(X64Reg long_prod) { #ifdef _M_X64 //s64 prod = dsp_get_long_prod(); get_long_prod(long_prod); + X64Reg tmp; + gpr.getFreeXReg(tmp); //if (prod & 0x10000) prod = (prod + 0x8000) & ~0xffff; TEST(32, R(long_prod), Imm32(0x10000)); FixupBranch jump = J_CC(CC_Z); ADD(64, R(long_prod), Imm32(0x8000)); - MOV(64, R(ESI), Imm64(~0xffff)); - AND(64, R(long_prod), R(RSI)); + MOV(64, R(tmp), Imm64(~0xffff)); + AND(64, R(long_prod), R(tmp)); FixupBranch _ret = J(); //else prod = (prod + 0x7fff) & ~0xffff; SetJumpTarget(jump); ADD(64, R(long_prod), Imm32(0x7fff)); - MOV(64, R(RSI), Imm64(~0xffff)); - AND(64, R(long_prod), R(RSI)); + MOV(64, R(tmp), Imm64(~0xffff)); + AND(64, R(long_prod), R(tmp)); SetJumpTarget(_ret); //return prod; + gpr.putXReg(tmp); #endif } @@ -456,23 +464,23 @@ void DSPEmitter::get_long_prod_round_prodl(X64Reg long_prod) void DSPEmitter::set_long_prod() { #ifdef _M_X64 - OpArg reg; - gpr.getReg(DSP_REG_PROD_64, reg, false); X64Reg tmp; gpr.getFreeXReg(tmp); MOV(64, R(tmp), Imm64(0x000000ffffffffffULL)); AND(64, R(RAX), R(tmp)); - // g_dsp.r[DSP_REG_PRODL] = (u16)val; - MOV(64, reg, R(RAX)); - gpr.putXReg(tmp); + OpArg prod_reg; + gpr.getReg(DSP_REG_PROD_64, prod_reg, false); + // g_dsp.r[DSP_REG_PRODL] = (u16)val; + MOV(64, prod_reg, R(RAX)); + gpr.putReg(DSP_REG_PROD_64, true); #endif } // Returns s64 in RAX -// Clobbers RSI +// Clobbers RCX void DSPEmitter::round_long_acc(X64Reg long_acc) { #ifdef _M_X64 @@ -480,14 +488,14 @@ void DSPEmitter::round_long_acc(X64Reg long_acc) TEST(32, R(long_acc), Imm32(0x10000)); FixupBranch jump = J_CC(CC_Z); ADD(64, R(long_acc), Imm32(0x8000)); - MOV(64, R(ESI), Imm64(~0xffff)); - AND(64, R(long_acc), R(RSI)); + MOV(64, R(ECX), Imm64(~0xffff)); + AND(64, R(long_acc), R(RCX)); FixupBranch _ret = J(); //else prod = (prod + 0x7fff) & ~0xffff; SetJumpTarget(jump); ADD(64, R(long_acc), Imm32(0x7fff)); - MOV(64, R(RSI), Imm64(~0xffff)); - AND(64, R(long_acc), R(RSI)); + MOV(64, R(RCX), Imm64(~0xffff)); + AND(64, R(long_acc), R(RCX)); SetJumpTarget(_ret); //return prod; #endif @@ -577,15 +585,5 @@ void DSPEmitter::get_ax_h(int _reg, X64Reg axh) gpr.readReg(_reg+DSP_REG_AXH0, axh, SIGN); } -void DSPEmitter::LoadDSPRegs() -{ - // Load DSP register state here... - gpr.loadStaticRegs(); -} -void DSPEmitter::SaveDSPRegs() -{ - // Save DSP register state here... - gpr.saveStaticRegs(); -}