From d56390407f547ca52e2711339ce12ca63286249d Mon Sep 17 00:00:00 2001 From: pierre Date: Sun, 27 Feb 2011 18:04:35 +0000 Subject: [PATCH] Core/DSP: Access all registers except ST* through the regcache No speed difference, probably because our basic blocks are too small to contain more than one access to any register or used too seldom for multiple accesses to make a difference. git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@7260 8ced0084-cf51-0410-be5f-012b33b47a6e --- Source/Core/Core/Src/DSP/DSPEmitter.cpp | 33 +- Source/Core/Core/Src/DSP/DSPEmitter.h | 25 +- .../Core/Src/DSP/Jit/DSPJitArithmetic.cpp | 257 +++-- Source/Core/Core/Src/DSP/Jit/DSPJitBranch.cpp | 23 +- Source/Core/Core/Src/DSP/Jit/DSPJitCCUtil.cpp | 41 +- Source/Core/Core/Src/DSP/Jit/DSPJitExtOps.cpp | 273 ++++-- .../Core/Core/Src/DSP/Jit/DSPJitLoadStore.cpp | 173 +++- Source/Core/Core/Src/DSP/Jit/DSPJitMisc.cpp | 30 +- .../Core/Src/DSP/Jit/DSPJitMultiplier.cpp | 168 ++-- .../Core/Core/Src/DSP/Jit/DSPJitRegCache.cpp | 891 +++++++++++++----- Source/Core/Core/Src/DSP/Jit/DSPJitRegCache.h | 61 +- Source/Core/Core/Src/DSP/Jit/DSPJitUtil.cpp | 282 +++--- 12 files changed, 1483 insertions(+), 774 deletions(-) diff --git a/Source/Core/Core/Src/DSP/DSPEmitter.cpp b/Source/Core/Core/Src/DSP/DSPEmitter.cpp index caf6f1ed74..8fc2a94923 100644 --- a/Source/Core/Core/Src/DSP/DSPEmitter.cpp +++ b/Source/Core/Core/Src/DSP/DSPEmitter.cpp @@ -87,10 +87,11 @@ void DSPEmitter::checkExceptions(u32 retval) MOV(16, M(&(g_dsp.pc)), Imm16(compilePC)); DSPJitRegCache c(gpr); - SaveDSPRegs(); + gpr.saveRegs(); ABI_CallFunction((void *)&DSPCore_CheckExceptions); MOV(32, R(EAX), Imm32(retval)); JMP(returnDispatcher, true); + gpr.loadRegs(false); gpr.flushRegs(c,false); SetJumpTarget(skipCheck); @@ -107,9 +108,9 @@ void DSPEmitter::Default(UDSPInstruction inst) } // Fall back to interpreter - SaveDSPRegs(); + gpr.pushRegs(); ABI_CallFunctionC16((void*)opTable[inst]->intFunc, inst); - LoadDSPRegs(); + gpr.popRegs(); } void DSPEmitter::EmitInstruction(UDSPInstruction inst) @@ -122,9 +123,9 @@ void DSPEmitter::EmitInstruction(UDSPInstruction inst) if ((inst >> 12) == 0x3) { if (! extOpTable[inst & 0x7F]->jitFunc) { // Fall back to interpreter - SaveDSPRegs(); + gpr.pushRegs(); ABI_CallFunctionC16((void*)extOpTable[inst & 0x7F]->intFunc, inst); - LoadDSPRegs(); + gpr.popRegs(); INFO_LOG(DSPLLE, "Instruction not JITed(ext part): %04x\n", inst); ext_is_jit = false; } else { @@ -134,9 +135,9 @@ void DSPEmitter::EmitInstruction(UDSPInstruction inst) } else { if (!extOpTable[inst & 0xFF]->jitFunc) { // Fall back to interpreter - SaveDSPRegs(); + gpr.pushRegs(); ABI_CallFunctionC16((void*)extOpTable[inst & 0xFF]->intFunc, inst); - LoadDSPRegs(); + gpr.popRegs(); INFO_LOG(DSPLLE, "Instruction not JITed(ext part): %04x\n", inst); ext_is_jit = false; } else { @@ -161,9 +162,9 @@ void DSPEmitter::EmitInstruction(UDSPInstruction inst) if (!ext_is_jit) { //need to call the online cleanup function because //the writeBackLog gets populated at runtime - SaveDSPRegs(); + gpr.pushRegs(); ABI_CallFunction((void*)::applyWriteBackLog); - LoadDSPRegs(); + gpr.popRegs(); } else { popExtValueToReg(); } @@ -189,10 +190,10 @@ void DSPEmitter::Compile(u16 start_addr) return; if (g_dsp.exceptions == 0) - return; + return; */ - LoadDSPRegs(); + gpr.loadRegs(); blockLinkEntry = GetCodePtr(); @@ -240,7 +241,7 @@ void DSPEmitter::Compile(u16 start_addr) // end of each block and in this order DSPJitRegCache c(gpr); HandleLoop(); - SaveDSPRegs(); + gpr.saveRegs(); if (!DSPHost_OnThread() && DSPAnalyzer::code_flags[start_addr] & DSPAnalyzer::CODE_IDLE_SKIP) { MOV(16, R(EAX), Imm16(DSP_IDLE_SKIP_CYCLES)); @@ -250,6 +251,7 @@ void DSPEmitter::Compile(u16 start_addr) MOV(16, R(EAX), Imm16(blockSize[start_addr])); } JMP(returnDispatcher, true); + gpr.loadRegs(false); gpr.flushRegs(c,false); SetJumpTarget(rLoopAddressExit); @@ -273,7 +275,7 @@ void DSPEmitter::Compile(u16 start_addr) DSPJitRegCache c(gpr); //don't update g_dsp.pc -- the branch insn already did - SaveDSPRegs(); + gpr.saveRegs(); if (!DSPHost_OnThread() && DSPAnalyzer::code_flags[start_addr] & DSPAnalyzer::CODE_IDLE_SKIP) { MOV(16, R(EAX), Imm16(DSP_IDLE_SKIP_CYCLES)); @@ -283,6 +285,7 @@ void DSPEmitter::Compile(u16 start_addr) MOV(16, R(EAX), Imm16(blockSize[start_addr])); } JMP(returnDispatcher, true); + gpr.loadRegs(false); gpr.flushRegs(c,false); SetJumpTarget(rNoBranch); @@ -334,7 +337,7 @@ void DSPEmitter::Compile(u16 start_addr) blockSize[start_addr] = 1; } - SaveDSPRegs(); + gpr.saveRegs(); if (!DSPHost_OnThread() && DSPAnalyzer::code_flags[start_addr] & DSPAnalyzer::CODE_IDLE_SKIP) { MOV(16, R(EAX), Imm16(DSP_IDLE_SKIP_CYCLES)); @@ -342,7 +345,7 @@ void DSPEmitter::Compile(u16 start_addr) else { MOV(16, R(EAX), Imm16(blockSize[start_addr])); - } + } JMP(returnDispatcher, true); } diff --git a/Source/Core/Core/Src/DSP/DSPEmitter.h b/Source/Core/Core/Src/DSP/DSPEmitter.h index 874caef5fe..a9b357b0a4 100644 --- a/Source/Core/Core/Src/DSP/DSPEmitter.h +++ b/Source/Core/Core/Src/DSP/DSPEmitter.h @@ -52,8 +52,10 @@ public: // CC Util void Update_SR_Register64(Gen::X64Reg val = Gen::EAX); - void Update_SR_Register64_Carry(Gen::X64Reg val = Gen::EAX); - void Update_SR_Register64_Carry2(Gen::X64Reg val = Gen::EAX); + void Update_SR_Register64_Carry(Gen::X64Reg val, + Gen::X64Reg carry_ovfl); + void Update_SR_Register64_Carry2(Gen::X64Reg val, + Gen::X64Reg carry_ovfl); void Update_SR_Register16(Gen::X64Reg val = Gen::EAX); void Update_SR_Register16_OverS32(Gen::X64Reg val = Gen::EAX); @@ -65,13 +67,13 @@ public: // Memory helper functions void increment_addr_reg(int reg); void decrement_addr_reg(int reg); - void increase_addr_reg(int reg); + void increase_addr_reg(int reg, int ix_reg); void decrease_addr_reg(int reg); - void imem_read(); - void dmem_read(); + void imem_read(Gen::X64Reg address); + void dmem_read(Gen::X64Reg address); void dmem_read_imm(u16 addr); - void dmem_write(); - void dmem_write_imm(u16 addr); + void dmem_write(Gen::X64Reg value); + void dmem_write_imm(u16 addr, Gen::X64Reg value); // Ext command helpers void pushExtValueFromReg(u16 dreg, u16 sreg); @@ -250,6 +252,7 @@ public: // CALL this to start the dispatcher const u8 *enterDispatcher; + const u8 *reenterDispatcher; const u8 *stubEntryPoint; const u8 *returnDispatcher; u16 compilePC; @@ -259,10 +262,6 @@ public: std::list *unresolvedJumps; DSPJitRegCache gpr; - - void LoadDSPRegs(); - void SaveDSPRegs(); - private: DSPCompiledCode *blocks; Block blockLinkEntry; @@ -275,12 +274,8 @@ private: // Counts down. // int cycles; - void Update_SR_Register(Gen::X64Reg val = Gen::EAX); - void ToMask(Gen::X64Reg value_reg = Gen::EDI); - void dsp_increment_one(Gen::X64Reg ar = Gen::EAX, Gen::X64Reg wr = Gen::EDX, Gen::X64Reg wr_pow = Gen::EDI, Gen::X64Reg temp_reg = Gen::ESI); - void dsp_decrement_one(Gen::X64Reg ar = Gen::EAX, Gen::X64Reg wr = Gen::EDX, Gen::X64Reg wr_pow = Gen::EDI, Gen::X64Reg temp_reg = Gen::ESI); void get_long_prod(Gen::X64Reg long_prod = Gen::RAX); void get_long_prod_round_prodl(Gen::X64Reg long_prod = Gen::RAX); void set_long_prod(); diff --git a/Source/Core/Core/Src/DSP/Jit/DSPJitArithmetic.cpp b/Source/Core/Core/Src/DSP/Jit/DSPJitArithmetic.cpp index ff536eb177..c1716d0d41 100644 --- a/Source/Core/Core/Src/DSP/Jit/DSPJitArithmetic.cpp +++ b/Source/Core/Core/Src/DSP/Jit/DSPJitArithmetic.cpp @@ -210,16 +210,19 @@ void DSPEmitter::cmp(const UDSPInstruction opc) #ifdef _M_X64 if (!(DSPAnalyzer::code_flags[compilePC] & DSPAnalyzer::CODE_START_OF_INST) || (DSPAnalyzer::code_flags[compilePC] & DSPAnalyzer::CODE_UPDATE_SR)) { + X64Reg tmp1; + gpr.getFreeXReg(tmp1); // s64 acc0 = dsp_get_long_acc(0); - get_long_acc(0, RCX); - MOV(64, R(RAX), R(RCX)); + get_long_acc(0, tmp1); + MOV(64, R(RAX), R(tmp1)); // s64 acc1 = dsp_get_long_acc(1); get_long_acc(1, RDX); // s64 res = dsp_convert_long_acc(acc0 - acc1); SUB(64, R(RAX), R(RDX)); // Update_SR_Register64(res, isCarry2(acc0, res), isOverflow(acc0, -acc1, res)); // CF -> influence on ABS/0xa100 NEG(64, R(RDX)); - Update_SR_Register64_Carry2(); + Update_SR_Register64_Carry2(EAX, tmp1); + gpr.putXReg(tmp1); } #else Default(opc); @@ -227,7 +230,7 @@ void DSPEmitter::cmp(const UDSPInstruction opc) } // CMPAR $acS axR.h -// 1100 0001 xxxx xxxx +// 110r s001 xxxx xxxx // Compares accumulator $acS with accumulator axR.h. // Not described by Duddie's doc - at least not as a separate instruction. // @@ -240,9 +243,11 @@ void DSPEmitter::cmpar(const UDSPInstruction opc) u8 rreg = ((opc >> 12) & 0x1); u8 sreg = (opc >> 11) & 0x1; + X64Reg tmp1; + gpr.getFreeXReg(tmp1); // s64 sr = dsp_get_long_acc(sreg); - get_long_acc(sreg, RCX); - MOV(64, R(RAX), R(RCX)); + get_long_acc(sreg, tmp1); + MOV(64, R(RAX), R(tmp1)); // s64 rr = (s16)g_dsp.r.axh[rreg]; get_ax_h(rreg, RDX); // rr <<= 16; @@ -251,7 +256,8 @@ void DSPEmitter::cmpar(const UDSPInstruction opc) SUB(64, R(RAX), R(RDX)); // Update_SR_Register64(res, isCarry2(sr, res), isOverflow(sr, -rr, res)); NEG(64, R(RDX)); - Update_SR_Register64_Carry2(); + Update_SR_Register64_Carry2(EAX, tmp1); + gpr.putXReg(tmp1); } #else Default(opc); @@ -271,9 +277,11 @@ void DSPEmitter::cmpi(const UDSPInstruction opc) if (!(DSPAnalyzer::code_flags[compilePC] & DSPAnalyzer::CODE_START_OF_INST) || (DSPAnalyzer::code_flags[compilePC] & DSPAnalyzer::CODE_UPDATE_SR)) { u8 reg = (opc >> 8) & 0x1; + X64Reg tmp1; + gpr.getFreeXReg(tmp1); // s64 val = dsp_get_long_acc(reg); - get_long_acc(reg, RCX); - MOV(64, R(RAX), R(RCX)); + get_long_acc(reg, tmp1); + MOV(64, R(RAX), R(tmp1)); // s64 imm = (s64)(s16)dsp_fetch_code() << 16; // Immediate is considered to be at M level in the 40-bit accumulator. u16 imm = dsp_imem_read(compilePC+1); MOV(64, R(RDX), Imm64((s64)(s16)imm << 16)); @@ -281,7 +289,8 @@ void DSPEmitter::cmpi(const UDSPInstruction opc) SUB(64, R(RAX), R(RDX)); // Update_SR_Register64(res, isCarry2(val, res), isOverflow(val, -imm, res)); NEG(64, R(RDX)); - Update_SR_Register64_Carry2(); + Update_SR_Register64_Carry2(EAX, tmp1); + gpr.putXReg(tmp1); } #else Default(opc); @@ -302,8 +311,10 @@ void DSPEmitter::cmpis(const UDSPInstruction opc) { u8 areg = (opc >> 8) & 0x1; // s64 acc = dsp_get_long_acc(areg); - get_long_acc(areg, RCX); - MOV(64, R(RAX), R(RCX)); + X64Reg tmp1; + gpr.getFreeXReg(tmp1); + get_long_acc(areg, tmp1); + MOV(64, R(RAX), R(tmp1)); // s64 val = (s8)opc; // val <<= 16; MOV(64, R(RDX), Imm64((s64)(s8)opc << 16)); @@ -311,7 +322,8 @@ void DSPEmitter::cmpis(const UDSPInstruction opc) SUB(64, R(RAX), R(RDX)); // Update_SR_Register64(res, isCarry2(acc, res), isOverflow(acc, -val, res)); NEG(64, R(RDX)); - Update_SR_Register64_Carry2(); + Update_SR_Register64_Carry2(EAX, tmp1); + gpr.putXReg(tmp1); } #else Default(opc); @@ -341,7 +353,7 @@ void DSPEmitter::xorr(const UDSPInstruction opc) // Update_SR_Register16((s16)accm, false, false, isOverS32(dsp_get_long_acc(dreg))); if (!(DSPAnalyzer::code_flags[compilePC] & DSPAnalyzer::CODE_START_OF_INST) || (DSPAnalyzer::code_flags[compilePC] & DSPAnalyzer::CODE_UPDATE_SR)) { - get_long_acc(dreg, RSI); + get_long_acc(dreg, RCX); Update_SR_Register16_OverS32(); } #else @@ -370,7 +382,7 @@ void DSPEmitter::andr(const UDSPInstruction opc) // Update_SR_Register16((s16)accm, false, false, isOverS32(dsp_get_long_acc(dreg))); if (!(DSPAnalyzer::code_flags[compilePC] & DSPAnalyzer::CODE_START_OF_INST) || (DSPAnalyzer::code_flags[compilePC] & DSPAnalyzer::CODE_UPDATE_SR)) { - get_long_acc(dreg, RSI); + get_long_acc(dreg, RCX); Update_SR_Register16_OverS32(); } #else @@ -399,7 +411,7 @@ void DSPEmitter::orr(const UDSPInstruction opc) // Update_SR_Register16((s16)accm, false, false, isOverS32(dsp_get_long_acc(dreg))); if (!(DSPAnalyzer::code_flags[compilePC] & DSPAnalyzer::CODE_START_OF_INST) || (DSPAnalyzer::code_flags[compilePC] & DSPAnalyzer::CODE_UPDATE_SR)) { - get_long_acc(dreg, RSI); + get_long_acc(dreg, RCX); Update_SR_Register16_OverS32(); } #else @@ -427,7 +439,7 @@ void DSPEmitter::andc(const UDSPInstruction opc) // Update_SR_Register16((s16)accm, false, false, isOverS32(dsp_get_long_acc(dreg))); if (!(DSPAnalyzer::code_flags[compilePC] & DSPAnalyzer::CODE_START_OF_INST) || (DSPAnalyzer::code_flags[compilePC] & DSPAnalyzer::CODE_UPDATE_SR)) { - get_long_acc(dreg, RSI); + get_long_acc(dreg, RCX); Update_SR_Register16_OverS32(); } #else @@ -455,7 +467,7 @@ void DSPEmitter::orc(const UDSPInstruction opc) // Update_SR_Register16((s16)accm, false, false, isOverS32(dsp_get_long_acc(dreg))); if (!(DSPAnalyzer::code_flags[compilePC] & DSPAnalyzer::CODE_START_OF_INST) || (DSPAnalyzer::code_flags[compilePC] & DSPAnalyzer::CODE_UPDATE_SR)) { - get_long_acc(dreg, RSI); + get_long_acc(dreg, RCX); Update_SR_Register16_OverS32(); } #else @@ -482,7 +494,7 @@ void DSPEmitter::xorc(const UDSPInstruction opc) // Update_SR_Register16((s16)accm, false, false, isOverS32(dsp_get_long_acc(dreg))); if (!(DSPAnalyzer::code_flags[compilePC] & DSPAnalyzer::CODE_START_OF_INST) || (DSPAnalyzer::code_flags[compilePC] & DSPAnalyzer::CODE_UPDATE_SR)) { - get_long_acc(dreg, RSI); + get_long_acc(dreg, RCX); Update_SR_Register16_OverS32(); } #else @@ -508,7 +520,7 @@ void DSPEmitter::notc(const UDSPInstruction opc) // Update_SR_Register16((s16)accm, false, false, isOverS32(dsp_get_long_acc(dreg))); if (!(DSPAnalyzer::code_flags[compilePC] & DSPAnalyzer::CODE_START_OF_INST) || (DSPAnalyzer::code_flags[compilePC] & DSPAnalyzer::CODE_UPDATE_SR)) { - get_long_acc(dreg, RSI); + get_long_acc(dreg, RCX); Update_SR_Register16_OverS32(); } #else @@ -536,7 +548,7 @@ void DSPEmitter::xori(const UDSPInstruction opc) // Update_SR_Register16((s16)g_dsp.r.acm[reg], false, false, isOverS32(dsp_get_long_acc(reg))); if (!(DSPAnalyzer::code_flags[compilePC] & DSPAnalyzer::CODE_START_OF_INST) || (DSPAnalyzer::code_flags[compilePC] & DSPAnalyzer::CODE_UPDATE_SR)) { - get_long_acc(reg, RSI); + get_long_acc(reg, RCX); Update_SR_Register16_OverS32(); } #else @@ -563,7 +575,7 @@ void DSPEmitter::andi(const UDSPInstruction opc) // Update_SR_Register16((s16)g_dsp.r.acm[reg], false, false, isOverS32(dsp_get_long_acc(reg))); if (!(DSPAnalyzer::code_flags[compilePC] & DSPAnalyzer::CODE_START_OF_INST) || (DSPAnalyzer::code_flags[compilePC] & DSPAnalyzer::CODE_UPDATE_SR)) { - get_long_acc(reg, RSI); + get_long_acc(reg, RCX); Update_SR_Register16_OverS32(); } #else @@ -590,7 +602,7 @@ void DSPEmitter::ori(const UDSPInstruction opc) // Update_SR_Register16((s16)g_dsp.r.acm[reg], false, false, isOverS32(dsp_get_long_acc(reg))); if (!(DSPAnalyzer::code_flags[compilePC] & DSPAnalyzer::CODE_START_OF_INST) || (DSPAnalyzer::code_flags[compilePC] & DSPAnalyzer::CODE_UPDATE_SR)) { - get_long_acc(reg, RSI); + get_long_acc(reg, RCX); Update_SR_Register16_OverS32(); } #else @@ -612,8 +624,10 @@ void DSPEmitter::addr(const UDSPInstruction opc) u8 sreg = ((opc >> 9) & 0x3) + DSP_REG_AXL0; // s64 acc = dsp_get_long_acc(dreg); - get_long_acc(dreg, RCX); - MOV(64, R(RAX), R(RCX)); + X64Reg tmp1; + gpr.getFreeXReg(tmp1); + get_long_acc(dreg, tmp1); + MOV(64, R(RAX), R(tmp1)); // s64 ax = (s16)g_dsp.r[sreg]; dsp_op_read_reg(sreg, RDX, SIGN); // ax <<= 16; @@ -624,15 +638,15 @@ void DSPEmitter::addr(const UDSPInstruction opc) // Update_SR_Register64(res, isCarry(acc, res), isOverflow(acc, ax, res)); if (!(DSPAnalyzer::code_flags[compilePC] & DSPAnalyzer::CODE_START_OF_INST) || (DSPAnalyzer::code_flags[compilePC] & DSPAnalyzer::CODE_UPDATE_SR)) { - MOV(64, R(RSI), R(RAX)); - set_long_acc(dreg, RSI); - Update_SR_Register64_Carry(); + MOV(64, R(RCX), R(RAX)); + set_long_acc(dreg, RCX); + Update_SR_Register64_Carry(EAX, tmp1); } else { set_long_acc(dreg, RAX); } - + gpr.putXReg(tmp1); #else Default(opc); #endif @@ -649,9 +663,11 @@ void DSPEmitter::addax(const UDSPInstruction opc) u8 dreg = (opc >> 8) & 0x1; u8 sreg = (opc >> 9) & 0x1; + X64Reg tmp1; + gpr.getFreeXReg(tmp1); // s64 acc = dsp_get_long_acc(dreg); - get_long_acc(dreg, RCX); - MOV(64, R(RAX), R(RCX)); + get_long_acc(dreg, tmp1); + MOV(64, R(RAX), R(tmp1)); // s64 ax = dsp_get_long_acx(sreg); get_long_acx(sreg, RDX); // s64 res = acc + ax; @@ -661,14 +677,15 @@ void DSPEmitter::addax(const UDSPInstruction opc) // Update_SR_Register64(res, isCarry(acc, res), isOverflow(acc, ax, res)); if (!(DSPAnalyzer::code_flags[compilePC] & DSPAnalyzer::CODE_START_OF_INST) || (DSPAnalyzer::code_flags[compilePC] & DSPAnalyzer::CODE_UPDATE_SR)) { - MOV(64, R(RSI), R(RAX)); - set_long_acc(dreg, RSI); - Update_SR_Register64_Carry(); + MOV(64, R(RCX), R(RAX)); + set_long_acc(dreg, RCX); + Update_SR_Register64_Carry(EAX, tmp1); } else { set_long_acc(dreg, RAX); } + gpr.putXReg(tmp1); #else Default(opc); #endif @@ -684,9 +701,11 @@ void DSPEmitter::add(const UDSPInstruction opc) #ifdef _M_X64 u8 dreg = (opc >> 8) & 0x1; + X64Reg tmp1; + gpr.getFreeXReg(tmp1); // s64 acc0 = dsp_get_long_acc(dreg); - get_long_acc(dreg, RCX); - MOV(64, R(RAX), R(RCX)); + get_long_acc(dreg, tmp1); + MOV(64, R(RAX), R(tmp1)); // s64 acc1 = dsp_get_long_acc(1 - dreg); get_long_acc(1 - dreg, RDX); // s64 res = acc0 + acc1; @@ -696,14 +715,15 @@ void DSPEmitter::add(const UDSPInstruction opc) // Update_SR_Register64(res, isCarry(acc0, res), isOverflow(acc0, acc1, res)); if (!(DSPAnalyzer::code_flags[compilePC] & DSPAnalyzer::CODE_START_OF_INST) || (DSPAnalyzer::code_flags[compilePC] & DSPAnalyzer::CODE_UPDATE_SR)) { - MOV(64, R(RSI), R(RAX)); - set_long_acc(dreg, RSI); - Update_SR_Register64_Carry(); + MOV(64, R(RCX), R(RAX)); + set_long_acc(dreg, RCX); + Update_SR_Register64_Carry(EAX, tmp1); } else { set_long_acc(dreg, RAX); } + gpr.putXReg(tmp1); #else Default(opc); #endif @@ -719,9 +739,11 @@ void DSPEmitter::addp(const UDSPInstruction opc) #ifdef _M_X64 u8 dreg = (opc >> 8) & 0x1; + X64Reg tmp1; + gpr.getFreeXReg(tmp1); // s64 acc = dsp_get_long_acc(dreg); - get_long_acc(dreg, RCX); - MOV(64, R(RAX), R(RCX)); + get_long_acc(dreg, tmp1); + MOV(64, R(RAX), R(tmp1)); // s64 prod = dsp_get_long_prod(); get_long_prod(RDX); // s64 res = acc + prod; @@ -731,14 +753,15 @@ void DSPEmitter::addp(const UDSPInstruction opc) // Update_SR_Register64(res, isCarry2(acc, res), isOverflow(acc, prod, res)); if (!(DSPAnalyzer::code_flags[compilePC] & DSPAnalyzer::CODE_START_OF_INST) || (DSPAnalyzer::code_flags[compilePC] & DSPAnalyzer::CODE_UPDATE_SR)) { - MOV(64, R(RSI), R(RAX)); - set_long_acc(dreg, RSI); - Update_SR_Register64_Carry2(); + MOV(64, R(RCX), R(RAX)); + set_long_acc(dreg, RCX); + Update_SR_Register64_Carry2(EAX, tmp1); } else { set_long_acc(dreg, RAX); } + gpr.putXReg(tmp1); #else Default(opc); #endif @@ -756,9 +779,11 @@ void DSPEmitter::addaxl(const UDSPInstruction opc) u8 sreg = (opc >> 9) & 0x1; u8 dreg = (opc >> 8) & 0x1; + X64Reg tmp1; + gpr.getFreeXReg(tmp1); // u64 acc = dsp_get_long_acc(dreg); - get_long_acc(dreg, RCX); - MOV(64, R(RAX), R(RCX)); + get_long_acc(dreg, tmp1); + MOV(64, R(RAX), R(tmp1)); // u16 acx = (u16)dsp_get_ax_l(sreg); get_ax_l(sreg, RDX); // u64 res = acc + acx; @@ -768,14 +793,15 @@ void DSPEmitter::addaxl(const UDSPInstruction opc) // Update_SR_Register64((s64)res, isCarry(acc, res), isOverflow((s64)acc, (s64)acx, (s64)res)); if (!(DSPAnalyzer::code_flags[compilePC] & DSPAnalyzer::CODE_START_OF_INST) || (DSPAnalyzer::code_flags[compilePC] & DSPAnalyzer::CODE_UPDATE_SR)) { - MOV(64, R(RSI), R(RAX)); - set_long_acc(dreg, RSI); - Update_SR_Register64_Carry(); + MOV(64, R(RCX), R(RAX)); + set_long_acc(dreg, RCX); + Update_SR_Register64_Carry(EAX, tmp1); } else { set_long_acc(dreg, RAX); } + gpr.putXReg(tmp1); #else Default(opc); #endif @@ -791,9 +817,11 @@ void DSPEmitter::addi(const UDSPInstruction opc) { #ifdef _M_X64 u8 areg = (opc >> 8) & 0x1; + X64Reg tmp1; + gpr.getFreeXReg(tmp1); // s64 acc = dsp_get_long_acc(areg); - get_long_acc(areg, RCX); - MOV(64, R(RAX), R(RCX)); + get_long_acc(areg, tmp1); + MOV(64, R(RAX), R(tmp1)); // s64 imm = (s16)dsp_fetch_code(); s16 imm = dsp_imem_read(compilePC+1); //imm <<= 16; @@ -807,14 +835,15 @@ void DSPEmitter::addi(const UDSPInstruction opc) // Update_SR_Register64(res, isCarry(acc, res), isOverflow(acc, imm, res)); if (!(DSPAnalyzer::code_flags[compilePC] & DSPAnalyzer::CODE_START_OF_INST) || (DSPAnalyzer::code_flags[compilePC] & DSPAnalyzer::CODE_UPDATE_SR)) { - MOV(64, R(RSI), R(RAX)); - set_long_acc(areg, RSI); - Update_SR_Register64_Carry(); + MOV(64, R(RCX), R(RAX)); + set_long_acc(areg, RCX); + Update_SR_Register64_Carry(EAX, tmp1); } else { set_long_acc(areg, RAX); } + gpr.putXReg(tmp1); #else Default(opc); #endif @@ -830,9 +859,11 @@ void DSPEmitter::addis(const UDSPInstruction opc) #ifdef _M_X64 u8 dreg = (opc >> 8) & 0x1; + X64Reg tmp1; + gpr.getFreeXReg(tmp1); // s64 acc = dsp_get_long_acc(dreg); - get_long_acc(dreg, RCX); - MOV(64, R(RAX), R(RCX)); + get_long_acc(dreg, tmp1); + MOV(64, R(RAX), R(tmp1)); // s64 imm = (s8)(u8)opc; // imm <<= 16; MOV(8, R(RDX), Imm8((u8)opc)); @@ -845,14 +876,15 @@ void DSPEmitter::addis(const UDSPInstruction opc) // Update_SR_Register64(res, isCarry(acc, res), isOverflow(acc, imm, res)); if (!(DSPAnalyzer::code_flags[compilePC] & DSPAnalyzer::CODE_START_OF_INST) || (DSPAnalyzer::code_flags[compilePC] & DSPAnalyzer::CODE_UPDATE_SR)) { - MOV(64, R(RSI), R(RAX)); - set_long_acc(dreg, RSI); - Update_SR_Register64_Carry(); + MOV(64, R(RCX), R(RAX)); + set_long_acc(dreg, RCX); + Update_SR_Register64_Carry(EAX, tmp1); } else { set_long_acc(dreg, RAX); } + gpr.putXReg(tmp1); #else Default(opc); #endif @@ -868,9 +900,11 @@ void DSPEmitter::incm(const UDSPInstruction opc) #ifdef _M_X64 u8 dreg = (opc >> 8) & 0x1; s64 subtract = 0x10000; + X64Reg tmp1; + gpr.getFreeXReg(tmp1); // s64 acc = dsp_get_long_acc(dreg); - get_long_acc(dreg, RCX); - MOV(64, R(RAX), R(RCX)); + get_long_acc(dreg, tmp1); + MOV(64, R(RAX), R(tmp1)); // s64 res = acc + sub; ADD(64, R(RAX), Imm32((u32)subtract)); // dsp_set_long_acc(dreg, res); @@ -879,15 +913,15 @@ void DSPEmitter::incm(const UDSPInstruction opc) if (!(DSPAnalyzer::code_flags[compilePC] & DSPAnalyzer::CODE_START_OF_INST) || (DSPAnalyzer::code_flags[compilePC] & DSPAnalyzer::CODE_UPDATE_SR)) { MOV(64, R(RDX), Imm32((u32)subtract)); - MOV(64, R(RSI), R(RAX)); - set_long_acc(dreg, RSI); - Update_SR_Register64_Carry(); + MOV(64, R(RCX), R(RAX)); + set_long_acc(dreg, RCX); + Update_SR_Register64_Carry(EAX, tmp1); } else { set_long_acc(dreg); - Update_SR_Register64_Carry(); } + gpr.putXReg(tmp1); #else Default(opc); #endif @@ -902,9 +936,11 @@ void DSPEmitter::inc(const UDSPInstruction opc) { #ifdef _M_X64 u8 dreg = (opc >> 8) & 0x1; + X64Reg tmp1; + gpr.getFreeXReg(tmp1); // s64 acc = dsp_get_long_acc(dreg); - get_long_acc(dreg, RCX); - MOV(64, R(RAX), R(RCX)); + get_long_acc(dreg, tmp1); + MOV(64, R(RAX), R(tmp1)); // s64 res = acc + 1; ADD(64, R(RAX), Imm8(1)); // dsp_set_long_acc(dreg, res); @@ -913,15 +949,16 @@ void DSPEmitter::inc(const UDSPInstruction opc) if (!(DSPAnalyzer::code_flags[compilePC] & DSPAnalyzer::CODE_START_OF_INST) || (DSPAnalyzer::code_flags[compilePC] & DSPAnalyzer::CODE_UPDATE_SR)) { MOV(64, R(RDX), Imm64(1)); - MOV(64, R(RSI), R(RAX)); - set_long_acc(dreg, RSI); - Update_SR_Register64_Carry(); + MOV(64, R(RCX), R(RAX)); + set_long_acc(dreg, RCX); + Update_SR_Register64_Carry(EAX, tmp1); } else { set_long_acc(dreg); - Update_SR_Register64_Carry(); + Update_SR_Register64_Carry(EAX, tmp1);//why is this still done? } + gpr.putXReg(tmp1); #else Default(opc); #endif @@ -940,9 +977,11 @@ void DSPEmitter::subr(const UDSPInstruction opc) u8 dreg = (opc >> 8) & 0x1; u8 sreg = ((opc >> 9) & 0x3) + DSP_REG_AXL0; + X64Reg tmp1; + gpr.getFreeXReg(tmp1); // s64 acc = dsp_get_long_acc(dreg); - get_long_acc(dreg, RCX); - MOV(64, R(RAX), R(RCX)); + get_long_acc(dreg, tmp1); + MOV(64, R(RAX), R(tmp1)); // s64 ax = (s16)g_dsp.r[sreg]; dsp_op_read_reg(sreg, RDX, SIGN); // ax <<= 16; @@ -955,14 +994,15 @@ void DSPEmitter::subr(const UDSPInstruction opc) if (!(DSPAnalyzer::code_flags[compilePC] & DSPAnalyzer::CODE_START_OF_INST) || (DSPAnalyzer::code_flags[compilePC] & DSPAnalyzer::CODE_UPDATE_SR)) { NEG(64, R(RDX)); - MOV(64, R(RSI), R(RAX)); - set_long_acc(dreg, RSI); - Update_SR_Register64_Carry2(); + MOV(64, R(RCX), R(RAX)); + set_long_acc(dreg, RCX); + Update_SR_Register64_Carry2(EAX, tmp1); } else { set_long_acc(dreg, RAX); } + gpr.putXReg(tmp1); #else Default(opc); #endif @@ -979,9 +1019,11 @@ void DSPEmitter::subax(const UDSPInstruction opc) u8 dreg = (opc >> 8) & 0x1; u8 sreg = (opc >> 9) & 0x1; + X64Reg tmp1; + gpr.getFreeXReg(tmp1); // s64 acc = dsp_get_long_acc(dreg); - get_long_acc(dreg, RCX); - MOV(64, R(RAX), R(RCX)); + get_long_acc(dreg, tmp1); + MOV(64, R(RAX), R(tmp1)); // s64 acx = dsp_get_long_acx(sreg); get_long_acx(sreg, RDX); // s64 res = acc - acx; @@ -992,14 +1034,15 @@ void DSPEmitter::subax(const UDSPInstruction opc) if (!(DSPAnalyzer::code_flags[compilePC] & DSPAnalyzer::CODE_START_OF_INST) || (DSPAnalyzer::code_flags[compilePC] & DSPAnalyzer::CODE_UPDATE_SR)) { NEG(64, R(RDX)); - MOV(64, R(RSI), R(RAX)); - set_long_acc(dreg, RSI); - Update_SR_Register64_Carry2(); + MOV(64, R(RCX), R(RAX)); + set_long_acc(dreg, RCX); + Update_SR_Register64_Carry2(EAX, tmp1); } else { set_long_acc(dreg, RAX); } + gpr.putXReg(tmp1); #else Default(opc); #endif @@ -1014,9 +1057,11 @@ void DSPEmitter::sub(const UDSPInstruction opc) { #ifdef _M_X64 u8 dreg = (opc >> 8) & 0x1; + X64Reg tmp1; + gpr.getFreeXReg(tmp1); // s64 acc1 = dsp_get_long_acc(dreg); - get_long_acc(dreg, RCX); - MOV(64, R(RAX), R(RCX)); + get_long_acc(dreg, tmp1); + MOV(64, R(RAX), R(tmp1)); // s64 acc2 = dsp_get_long_acc(1 - dreg); get_long_acc(1 - dreg, RDX); // s64 res = acc1 - acc2; @@ -1027,14 +1072,15 @@ void DSPEmitter::sub(const UDSPInstruction opc) if (!(DSPAnalyzer::code_flags[compilePC] & DSPAnalyzer::CODE_START_OF_INST) || (DSPAnalyzer::code_flags[compilePC] & DSPAnalyzer::CODE_UPDATE_SR)) { NEG(64, R(RDX)); - MOV(64, R(RSI), R(RAX)); - set_long_acc(dreg, RSI); - Update_SR_Register64_Carry2(); + MOV(64, R(RCX), R(RAX)); + set_long_acc(dreg, RCX); + Update_SR_Register64_Carry2(EAX, tmp1); } else { set_long_acc(dreg, RAX); } + gpr.putXReg(tmp1); #else Default(opc); #endif @@ -1049,9 +1095,11 @@ void DSPEmitter::subp(const UDSPInstruction opc) { #ifdef _M_X64 u8 dreg = (opc >> 8) & 0x1; + X64Reg tmp1; + gpr.getFreeXReg(tmp1); // s64 acc = dsp_get_long_acc(dreg); - get_long_acc(dreg, RCX); - MOV(64, R(RAX), R(RCX)); + get_long_acc(dreg, tmp1); + MOV(64, R(RAX), R(tmp1)); // s64 prod = dsp_get_long_prod(); get_long_prod(RDX); // s64 res = acc - prod; @@ -1062,14 +1110,15 @@ void DSPEmitter::subp(const UDSPInstruction opc) if (!(DSPAnalyzer::code_flags[compilePC] & DSPAnalyzer::CODE_START_OF_INST) || (DSPAnalyzer::code_flags[compilePC] & DSPAnalyzer::CODE_UPDATE_SR)) { NEG(64, R(RDX)); - MOV(64, R(RSI), R(RAX)); - set_long_acc(dreg, RSI); - Update_SR_Register64_Carry2(); + MOV(64, R(RCX), R(RAX)); + set_long_acc(dreg, RCX); + Update_SR_Register64_Carry2(EAX, tmp1); } else { set_long_acc(dreg, RAX); } + gpr.putXReg(tmp1); #else Default(opc); #endif @@ -1085,9 +1134,11 @@ void DSPEmitter::decm(const UDSPInstruction opc) #ifdef _M_X64 u8 dreg = (opc >> 8) & 0x01; s64 subtract = 0x10000; + X64Reg tmp1; + gpr.getFreeXReg(tmp1); // s64 acc = dsp_get_long_acc(dreg); - get_long_acc(dreg, RCX); - MOV(64, R(RAX), R(RCX)); + get_long_acc(dreg, tmp1); + MOV(64, R(RAX), R(tmp1)); // s64 res = acc - sub; SUB(64, R(RAX), Imm32((u32)subtract)); // dsp_set_long_acc(dreg, res); @@ -1096,14 +1147,15 @@ void DSPEmitter::decm(const UDSPInstruction opc) if (!(DSPAnalyzer::code_flags[compilePC] & DSPAnalyzer::CODE_START_OF_INST) || (DSPAnalyzer::code_flags[compilePC] & DSPAnalyzer::CODE_UPDATE_SR)) { MOV(64, R(RDX), Imm64(-subtract)); - MOV(64, R(RSI), R(RAX)); - set_long_acc(dreg, RSI); - Update_SR_Register64_Carry2(); + MOV(64, R(RCX), R(RAX)); + set_long_acc(dreg, RCX); + Update_SR_Register64_Carry2(EAX, tmp1); } else { set_long_acc(dreg, RAX); } + gpr.putXReg(tmp1); #else Default(opc); #endif @@ -1118,9 +1170,11 @@ void DSPEmitter::dec(const UDSPInstruction opc) { #ifdef _M_X64 u8 dreg = (opc >> 8) & 0x01; + X64Reg tmp1; + gpr.getFreeXReg(tmp1); // s64 acc = dsp_get_long_acc(dreg); - get_long_acc(dreg, RCX); - MOV(64, R(RAX), R(RCX)); + get_long_acc(dreg, tmp1); + MOV(64, R(RAX), R(tmp1)); // s64 res = acc - 1; SUB(64, R(RAX), Imm32(1)); // dsp_set_long_acc(dreg, res); @@ -1129,14 +1183,15 @@ void DSPEmitter::dec(const UDSPInstruction opc) if (!(DSPAnalyzer::code_flags[compilePC] & DSPAnalyzer::CODE_START_OF_INST) || (DSPAnalyzer::code_flags[compilePC] & DSPAnalyzer::CODE_UPDATE_SR)) { MOV(64, R(RDX), Imm64(-1)); - MOV(64, R(RSI), R(RAX)); - set_long_acc(dreg, RSI); - Update_SR_Register64_Carry2(); + MOV(64, R(RCX), R(RAX)); + set_long_acc(dreg, RCX); + Update_SR_Register64_Carry2(EAX, tmp1); } else { set_long_acc(dreg); } + gpr.putXReg(tmp1); #else Default(opc); #endif diff --git a/Source/Core/Core/Src/DSP/Jit/DSPJitBranch.cpp b/Source/Core/Core/Src/DSP/Jit/DSPJitBranch.cpp index 815cff9c1e..6c4d5e0867 100644 --- a/Source/Core/Core/Src/DSP/Jit/DSPJitBranch.cpp +++ b/Source/Core/Core/Src/DSP/Jit/DSPJitBranch.cpp @@ -56,8 +56,8 @@ static void ReJitConditional(const UDSPInstruction opc, DSPEmitter& emitter) emitter.TEST(16, R(EAX), Imm16(1)); //LE: problem in here, half the tests fail - skipCode2 = emitter.J_CC(CC_NE); - //skipCode2 = emitter.J_CC((CCFlags)(CC_NE - (cond & 1))); + skipCode2 = emitter.J_CC(CC_NE, true); + //skipCode2 = emitter.J_CC((CCFlags)(CC_NE - (cond & 1)), true); emitter.dsp_op_read_reg(DSP_REG_SR, RAX); emitter.TEST(16, R(EAX), Imm16(SR_ARITH_ZERO)); break; @@ -94,7 +94,7 @@ static void ReJitConditional(const UDSPInstruction opc, DSPEmitter& emitter) break; //c2 = emitter.gpr; //emitter.TEST(16, R(EAX), Imm16(SR_OVER_S32 | SR_TOP2BITS)); - //skipCode2 = emitter.J_CC((CCFlags)(CC_E + (cond & 1))); + //skipCode2 = emitter.J_CC((CCFlags)(CC_E + (cond & 1)), true); //emitter.TEST(16, R(EAX), Imm16(SR_ARITH_ZERO)); //break; } @@ -107,7 +107,7 @@ static void ReJitConditional(const UDSPInstruction opc, DSPEmitter& emitter) break; } DSPJitRegCache c1(emitter.gpr); - FixupBranch skipCode = cond == 0xe ? emitter.J_CC(CC_E) : emitter.J_CC((CCFlags)(CC_NE - (cond & 1))); + FixupBranch skipCode = cond == 0xe ? emitter.J_CC(CC_E,true) : emitter.J_CC((CCFlags)(CC_NE - (cond & 1)),true); jitCode(opc,emitter); emitter.gpr.flushRegs(c1); emitter.SetJumpTarget(skipCode); @@ -121,7 +121,8 @@ static void ReJitConditional(const UDSPInstruction opc, DSPEmitter& emitter) static void WriteBranchExit(DSPEmitter& emitter) { - emitter.SaveDSPRegs(); + DSPJitRegCache c(emitter.gpr); + emitter.gpr.saveRegs(); if (DSPAnalyzer::code_flags[emitter.startAddr] & DSPAnalyzer::CODE_IDLE_SKIP) { emitter.MOV(16, R(EAX), Imm16(0x1000)); @@ -131,6 +132,8 @@ static void WriteBranchExit(DSPEmitter& emitter) emitter.MOV(16, R(EAX), Imm16(emitter.blockSize[emitter.startAddr])); } emitter.JMP(emitter.returnDispatcher, true); + emitter.gpr.loadRegs(false); + emitter.gpr.flushRegs(c,false); } static void WriteBlockLink(DSPEmitter& emitter, u16 dest) @@ -142,12 +145,12 @@ static void WriteBlockLink(DSPEmitter& emitter, u16 dest) { emitter.gpr.flushRegs(); // Check if we have enough cycles to execute the next block - emitter.MOV(16, R(ESI), M(&cyclesLeft)); - emitter.CMP(16, R(ESI), Imm16(emitter.blockSize[emitter.startAddr] + emitter.blockSize[dest])); + emitter.MOV(16, R(ECX), M(&cyclesLeft)); + emitter.CMP(16, R(ECX), Imm16(emitter.blockSize[emitter.startAddr] + emitter.blockSize[dest])); FixupBranch notEnoughCycles = emitter.J_CC(CC_BE); - emitter.SUB(16, R(ESI), Imm16(emitter.blockSize[emitter.startAddr])); - emitter.MOV(16, M(&cyclesLeft), R(ESI)); + emitter.SUB(16, R(ECX), Imm16(emitter.blockSize[emitter.startAddr])); + emitter.MOV(16, M(&cyclesLeft), R(ECX)); emitter.JMP(emitter.blockLinks[dest], true); emitter.SetJumpTarget(notEnoughCycles); } @@ -339,9 +342,11 @@ void DSPEmitter::HandleLoop() FixupBranch loopUpdated = J(true); SetJumpTarget(loadStack); + DSPJitRegCache c(gpr); dsp_reg_load_stack(0); dsp_reg_load_stack(2); dsp_reg_load_stack(3); + gpr.flushRegs(c); SetJumpTarget(loopUpdated); SetJumpTarget(rLoopAddrG); diff --git a/Source/Core/Core/Src/DSP/Jit/DSPJitCCUtil.cpp b/Source/Core/Core/Src/DSP/Jit/DSPJitCCUtil.cpp index 620c2a2c43..e382b6114a 100644 --- a/Source/Core/Core/Src/DSP/Jit/DSPJitCCUtil.cpp +++ b/Source/Core/Core/Src/DSP/Jit/DSPJitCCUtil.cpp @@ -22,12 +22,12 @@ #include "../DSPIntUtil.h" #include "../DSPEmitter.h" +#include "DSPJitUtil.h" #include "x64Emitter.h" #include "ABI.h" using namespace Gen; // In: RAX: s64 _Value -// In: RCX: 1 = carry, 2 = overflow // Clobbers RDX void DSPEmitter::Update_SR_Register(Gen::X64Reg val) { @@ -72,7 +72,6 @@ void DSPEmitter::Update_SR_Register(Gen::X64Reg val) } // In: RAX: s64 _Value -// In: RCX: 1 = carry, 2 = overflow // Clobbers RDX void DSPEmitter::Update_SR_Register64(Gen::X64Reg val) { @@ -86,18 +85,18 @@ void DSPEmitter::Update_SR_Register64(Gen::X64Reg val) #endif } -// In: RAX: s64 _Value -// In: RCX: 1 = carry, 2 = overflow +// In: (val): s64 _Value +// In: (carry_ovfl): 1 = carry, 2 = overflow // Clobbers RDX -void DSPEmitter::Update_SR_Register64_Carry(Gen::X64Reg val) -{ +void DSPEmitter::Update_SR_Register64_Carry(X64Reg val, X64Reg carry_ovfl) +{ #ifdef _M_X64 OpArg sr_reg; gpr.getReg(DSP_REG_SR,sr_reg); // g_dsp.r[DSP_REG_SR] &= ~SR_CMP_MASK; AND(16, sr_reg, Imm16(~SR_CMP_MASK)); - CMP(64, R(RCX), R(val)); + CMP(64, R(carry_ovfl), R(val)); // 0x01 // g_dsp.r[DSP_REG_SR] |= SR_CARRY; @@ -110,10 +109,10 @@ void DSPEmitter::Update_SR_Register64_Carry(Gen::X64Reg val) // g_dsp.r[DSP_REG_SR] |= SR_OVERFLOW; // g_dsp.r[DSP_REG_SR] |= SR_OVERFLOW_STICKY; // Overflow = ((acc ^ res) & (ax ^ res)) < 0 - XOR(64, R(RCX), R(val)); + XOR(64, R(carry_ovfl), R(val)); XOR(64, R(RDX), R(val)); - AND(64, R(RCX), R(RDX)); - CMP(64, R(RCX), Imm8(0)); + AND(64, R(carry_ovfl), R(RDX)); + CMP(64, R(carry_ovfl), Imm8(0)); FixupBranch noOverflow = J_CC(CC_GE); OR(16, sr_reg, Imm16(SR_OVERFLOW | SR_OVERFLOW_STICKY)); SetJumpTarget(noOverflow); @@ -123,10 +122,10 @@ void DSPEmitter::Update_SR_Register64_Carry(Gen::X64Reg val) #endif } -// In: RAX: s64 _Value -// In: RCX: 1 = carry, 2 = overflow +// In: (val): s64 _Value +// In: (carry_ovfl): 1 = carry, 2 = overflow // Clobbers RDX -void DSPEmitter::Update_SR_Register64_Carry2(Gen::X64Reg val) +void DSPEmitter::Update_SR_Register64_Carry2(X64Reg val, X64Reg carry_ovfl) { #ifdef _M_X64 OpArg sr_reg; @@ -134,7 +133,7 @@ void DSPEmitter::Update_SR_Register64_Carry2(Gen::X64Reg val) // g_dsp.r[DSP_REG_SR] &= ~SR_CMP_MASK; AND(16, sr_reg, Imm16(~SR_CMP_MASK)); - CMP(64, R(RCX), R(val)); + CMP(64, R(carry_ovfl), R(val)); // 0x01 // g_dsp.r[DSP_REG_SR] |= SR_CARRY; @@ -147,10 +146,10 @@ void DSPEmitter::Update_SR_Register64_Carry2(Gen::X64Reg val) // g_dsp.r[DSP_REG_SR] |= SR_OVERFLOW; // g_dsp.r[DSP_REG_SR] |= SR_OVERFLOW_STICKY; // Overflow = ((acc ^ res) & (ax ^ res)) < 0 - XOR(64, R(RCX), R(val)); + XOR(64, R(carry_ovfl), R(val)); XOR(64, R(RDX), R(val)); - AND(64, R(RCX), R(RDX)); - CMP(64, R(RCX), Imm8(0)); + AND(64, R(carry_ovfl), R(RDX)); + CMP(64, R(carry_ovfl), Imm8(0)); FixupBranch noOverflow = J_CC(CC_GE); OR(16, sr_reg, Imm16(SR_OVERFLOW | SR_OVERFLOW_STICKY)); SetJumpTarget(noOverflow); @@ -171,9 +170,8 @@ void DSPEmitter::Update_SR_Register64_Carry2(Gen::X64Reg val) //} // In: RAX: s64 _Value -// In: RCX: 1 = carry, 2 = overflow // Clobbers RDX -void DSPEmitter::Update_SR_Register16(Gen::X64Reg val) +void DSPEmitter::Update_SR_Register16(X64Reg val) { #ifdef _M_X64 OpArg sr_reg; @@ -214,7 +212,6 @@ void DSPEmitter::Update_SR_Register16(Gen::X64Reg val) } // In: RAX: s64 _Value -// In: RCX: 1 = carry, 2 = overflow // Clobbers RDX void DSPEmitter::Update_SR_Register16_OverS32(Gen::X64Reg val) { @@ -225,8 +222,8 @@ void DSPEmitter::Update_SR_Register16_OverS32(Gen::X64Reg val) // // 0x10 // if (_Value != (s32)_Value) g_dsp.r[DSP_REG_SR] |= SR_OVER_S32; - MOVSX(64, 32, RSI, R(val)); - CMP(64, R(RSI), R(val)); + MOVSX(64, 32, RCX, R(val)); + CMP(64, R(RCX), R(val)); FixupBranch noOverS32 = J_CC(CC_E); OR(16, sr_reg, Imm16(SR_OVER_S32)); SetJumpTarget(noOverS32); diff --git a/Source/Core/Core/Src/DSP/Jit/DSPJitExtOps.cpp b/Source/Core/Core/Src/DSP/Jit/DSPJitExtOps.cpp index 938b9534ec..0d2d28deb3 100644 --- a/Source/Core/Core/Src/DSP/Jit/DSPJitExtOps.cpp +++ b/Source/Core/Core/Src/DSP/Jit/DSPJitExtOps.cpp @@ -58,7 +58,7 @@ void DSPEmitter::ir(const UDSPInstruction opc) { void DSPEmitter::nr(const UDSPInstruction opc) { u8 reg = opc & 0x3; - increase_addr_reg(reg); + increase_addr_reg(reg, reg); } // MV $axD.D, $acS.S @@ -81,9 +81,16 @@ void DSPEmitter::s(const UDSPInstruction opc) u8 sreg = ((opc >> 3) & 0x3) + DSP_REG_ACL0; // u16 addr = g_dsp.r[dest]; dsp_op_read_reg(dreg, RAX, ZERO); - dsp_op_read_reg(sreg, RCX, ZERO); + + X64Reg tmp1; + gpr.getFreeXReg(tmp1); + + dsp_op_read_reg(sreg, tmp1, ZERO); // u16 val = g_dsp.r[src]; - dmem_write(); + dmem_write(tmp1); + + gpr.putXReg(tmp1); + increment_addr_reg(dreg); } @@ -96,9 +103,16 @@ void DSPEmitter::sn(const UDSPInstruction opc) u8 dreg = opc & 0x3; u8 sreg = ((opc >> 3) & 0x3) + DSP_REG_ACL0; dsp_op_read_reg(dreg, RAX, ZERO); - dsp_op_read_reg(sreg, RCX, ZERO); - dmem_write(); - increase_addr_reg(dreg); + + X64Reg tmp1; + gpr.getFreeXReg(tmp1); + + dsp_op_read_reg(sreg, tmp1, ZERO); + dmem_write(tmp1); + + gpr.putXReg(tmp1); + + increase_addr_reg(dreg, dreg); } // L $axD.D, @$arS @@ -117,7 +131,7 @@ void DSPEmitter::l(const UDSPInstruction opc) //even if only for one bit, can only //store (up to) two registers in EBX, //so store all of SR - MOV(16, R(EAX), M(&g_dsp.r.sr)); + dsp_op_read_reg(DSP_REG_SR, RAX); SHL(32, R(EAX), Imm8(16)); OR(32, R(EBX), R(EAX)); } @@ -141,12 +155,12 @@ void DSPEmitter::ln(const UDSPInstruction opc) //even if only for one bit, can only //store (up to) two registers in EBX, //so store all of SR - MOV(16, R(EAX), M(&g_dsp.r.sr)); + dsp_op_read_reg(DSP_REG_SR, RAX); SHL(32, R(EAX), Imm8(16)); OR(32, R(EBX), R(EAX)); } - increase_addr_reg(sreg); + increase_addr_reg(sreg, sreg); } // LS $axD.D, $acS.m @@ -159,8 +173,14 @@ void DSPEmitter::ls(const UDSPInstruction opc) u8 sreg = opc & 0x1; u8 dreg = ((opc >> 4) & 0x3) + DSP_REG_AXL0; dsp_op_read_reg(DSP_REG_AR3, RAX, ZERO); - get_acc_m(sreg, ECX, false); - dmem_write(); + + X64Reg tmp1; + gpr.getFreeXReg(tmp1); + + get_acc_m(sreg, tmp1, false); + dmem_write(tmp1); + + gpr.putXReg(tmp1); pushExtValueFromMem(dreg, DSP_REG_AR0); @@ -180,13 +200,19 @@ void DSPEmitter::lsn(const UDSPInstruction opc) u8 sreg = opc & 0x1; u8 dreg = ((opc >> 4) & 0x3) + DSP_REG_AXL0; dsp_op_read_reg(DSP_REG_AR3, RAX, ZERO); - get_acc_m(sreg, ECX, false); - dmem_write(); + + X64Reg tmp1; + gpr.getFreeXReg(tmp1); + + get_acc_m(sreg, tmp1, false); + dmem_write(tmp1); + + gpr.putXReg(tmp1); pushExtValueFromMem(dreg, DSP_REG_AR0); increment_addr_reg(DSP_REG_AR3); - increase_addr_reg(DSP_REG_AR0); + increase_addr_reg(DSP_REG_AR0, DSP_REG_AR0); } // LSM $axD.D, $acS.m @@ -200,12 +226,18 @@ void DSPEmitter::lsm(const UDSPInstruction opc) u8 sreg = opc & 0x1; u8 dreg = ((opc >> 4) & 0x3) + DSP_REG_AXL0; dsp_op_read_reg(DSP_REG_AR3, RAX, ZERO); - get_acc_m(sreg, ECX, false); - dmem_write(); + + X64Reg tmp1; + gpr.getFreeXReg(tmp1); + + get_acc_m(sreg, tmp1, false); + dmem_write(tmp1); + + gpr.putXReg(tmp1); pushExtValueFromMem(dreg, DSP_REG_AR0); - increase_addr_reg(DSP_REG_AR3); + increase_addr_reg(DSP_REG_AR3, DSP_REG_AR3); increment_addr_reg(DSP_REG_AR0); } @@ -221,13 +253,19 @@ void DSPEmitter::lsnm(const UDSPInstruction opc) u8 sreg = opc & 0x1; u8 dreg = ((opc >> 4) & 0x3) + DSP_REG_AXL0; dsp_op_read_reg(DSP_REG_AR3, RAX, ZERO); - get_acc_m(sreg, ECX, false); - dmem_write(); + + X64Reg tmp1; + gpr.getFreeXReg(tmp1); + + get_acc_m(sreg, tmp1, false); + dmem_write(tmp1); + + gpr.putXReg(tmp1); pushExtValueFromMem(dreg, DSP_REG_AR0); - increase_addr_reg(DSP_REG_AR3); - increase_addr_reg(DSP_REG_AR0); + increase_addr_reg(DSP_REG_AR3, DSP_REG_AR3); + increase_addr_reg(DSP_REG_AR0, DSP_REG_AR0); } // SL $acS.m, $axD.D @@ -240,13 +278,19 @@ void DSPEmitter::sl(const UDSPInstruction opc) u8 sreg = opc & 0x1; u8 dreg = ((opc >> 4) & 0x3) + DSP_REG_AXL0; dsp_op_read_reg(DSP_REG_AR0, RAX, ZERO); - get_acc_m(sreg, ECX, false); - dmem_write(); + + X64Reg tmp1; + gpr.getFreeXReg(tmp1); + + get_acc_m(sreg, tmp1, false); + dmem_write(tmp1); + + gpr.putXReg(tmp1); pushExtValueFromMem(dreg, DSP_REG_AR3); increment_addr_reg(DSP_REG_AR3); - increment_addr_reg(DSP_REG_AR0); + increment_addr_reg(DSP_REG_AR0); } // SLN $acS.m, $axD.D @@ -260,13 +304,19 @@ void DSPEmitter::sln(const UDSPInstruction opc) u8 sreg = opc & 0x1; u8 dreg = ((opc >> 4) & 0x3) + DSP_REG_AXL0; dsp_op_read_reg(DSP_REG_AR0, RAX, ZERO); - get_acc_m(sreg, ECX, false); - dmem_write(); + + X64Reg tmp1; + gpr.getFreeXReg(tmp1); + + get_acc_m(sreg, tmp1, false); + dmem_write(tmp1); + + gpr.putXReg(tmp1); pushExtValueFromMem(dreg, DSP_REG_AR3); increment_addr_reg(DSP_REG_AR3); - increase_addr_reg(DSP_REG_AR0); + increase_addr_reg(DSP_REG_AR0, DSP_REG_AR0); } // SLM $acS.m, $axD.D @@ -280,12 +330,18 @@ void DSPEmitter::slm(const UDSPInstruction opc) u8 sreg = opc & 0x1; u8 dreg = ((opc >> 4) & 0x3) + DSP_REG_AXL0; dsp_op_read_reg(DSP_REG_AR0, RAX, ZERO); - get_acc_m(sreg, ECX, false); - dmem_write(); + + X64Reg tmp1; + gpr.getFreeXReg(tmp1); + + get_acc_m(sreg, tmp1, false); + dmem_write(tmp1); + + gpr.putXReg(tmp1); pushExtValueFromMem(dreg, DSP_REG_AR3); - increase_addr_reg(DSP_REG_AR3); + increase_addr_reg(DSP_REG_AR3, DSP_REG_AR3); increment_addr_reg(DSP_REG_AR0); } @@ -300,13 +356,19 @@ void DSPEmitter::slnm(const UDSPInstruction opc) u8 sreg = opc & 0x1; u8 dreg = ((opc >> 4) & 0x3) + DSP_REG_AXL0; dsp_op_read_reg(DSP_REG_AR0, RAX, ZERO); - get_acc_m(sreg, ECX, false); - dmem_write(); + + X64Reg tmp1; + gpr.getFreeXReg(tmp1); + + get_acc_m(sreg, tmp1, false); + dmem_write(tmp1); + + gpr.putXReg(tmp1); pushExtValueFromMem(dreg, DSP_REG_AR3); - increase_addr_reg(DSP_REG_AR3); - increase_addr_reg(DSP_REG_AR0); + increase_addr_reg(DSP_REG_AR3, DSP_REG_AR3); + increase_addr_reg(DSP_REG_AR0, DSP_REG_AR0); } // LD $ax0.d, $ax1.r, @$arS @@ -319,6 +381,9 @@ void DSPEmitter::slnm(const UDSPInstruction opc) // points into an invalid memory page (ie 0x2000), then AX0.H keeps its old // value. (not implemented yet) If AR3 points into an invalid memory page, then // AX0.L gets the same value as AX0.H. (not implemented yet) + +// LD $axr.h, @$ard +// xxxx xxxx 11dr 0011 void DSPEmitter::ld(const UDSPInstruction opc) { u8 dreg = (opc >> 5) & 0x1; @@ -329,12 +394,14 @@ void DSPEmitter::ld(const UDSPInstruction opc) pushExtValueFromMem((dreg << 1) + DSP_REG_AXL0, sreg); // if (IsSameMemArea(g_dsp.r[sreg], g_dsp.r[DSP_REG_AR3])) { - dsp_op_read_reg(sreg, RSI, NONE); - dsp_op_read_reg(DSP_REG_AR3, RDI, NONE); - SHR(16, R(ESI), Imm8(10)); - SHR(16, R(EDI), Imm8(10)); + X64Reg tmp; + gpr.getFreeXReg(tmp); + dsp_op_read_reg(sreg, RCX, NONE); + dsp_op_read_reg(DSP_REG_AR3, tmp, NONE); + XOR(16, R(ECX), R(tmp)); + gpr.putXReg(tmp); DSPJitRegCache c(gpr); - CMP(16, R(ESI), R(EDI)); + TEST(16, R(ECX), Imm16(0xfc00)); FixupBranch not_equal = J_CC(CC_NE,true); pushExtValueFromMem2((rreg << 1) + DSP_REG_AXL1, sreg); gpr.flushRegs(c); @@ -349,13 +416,15 @@ void DSPEmitter::ld(const UDSPInstruction opc) } else { pushExtValueFromMem(rreg + DSP_REG_AXH0, dreg); + X64Reg tmp; + gpr.getFreeXReg(tmp); //if (IsSameMemArea(g_dsp.r[dreg], g_dsp.r[DSP_REG_AR3])) { - dsp_op_read_reg(dreg, RSI, NONE); - dsp_op_read_reg(DSP_REG_AR3, RDI, NONE); - SHR(16, R(ESI), Imm8(10)); - SHR(16, R(EDI), Imm8(10)); + dsp_op_read_reg(dreg, RCX, NONE); + dsp_op_read_reg(DSP_REG_AR3, tmp, NONE); + XOR(16, R(ECX), R(tmp)); + gpr.putXReg(tmp); DSPJitRegCache c(gpr); - CMP(16, R(ESI), R(EDI)); + TEST(16, R(ECX), Imm16(0xfc00)); FixupBranch not_equal = J_CC(CC_NE, true); pushExtValueFromMem2(rreg + DSP_REG_AXL0, dreg); gpr.flushRegs(c); @@ -382,13 +451,15 @@ void DSPEmitter::ldn(const UDSPInstruction opc) if (sreg != DSP_REG_AR3) { pushExtValueFromMem((dreg << 1) + DSP_REG_AXL0, sreg); + X64Reg tmp; + gpr.getFreeXReg(tmp); //if (IsSameMemArea(g_dsp.r[sreg], g_dsp.r[DSP_REG_AR3])) { - dsp_op_read_reg(sreg, RSI, NONE); - dsp_op_read_reg(DSP_REG_AR3, RDI, NONE); - SHR(16, R(ESI), Imm8(10)); - SHR(16, R(EDI), Imm8(10)); + dsp_op_read_reg(sreg, RCX, NONE); + dsp_op_read_reg(DSP_REG_AR3, tmp, NONE); + XOR(16, R(ECX), R(tmp)); + gpr.putXReg(tmp); DSPJitRegCache c(gpr); - CMP(16, R(ESI), R(EDI)); + TEST(16, R(ECX), Imm16(0xfc00)); FixupBranch not_equal = J_CC(CC_NE,true); pushExtValueFromMem2((rreg << 1) + DSP_REG_AXL1, sreg); gpr.flushRegs(c); @@ -398,17 +469,19 @@ void DSPEmitter::ldn(const UDSPInstruction opc) gpr.flushRegs(c); SetJumpTarget(after); - increase_addr_reg(sreg); + increase_addr_reg(sreg, sreg); } else { pushExtValueFromMem(rreg + DSP_REG_AXH0, dreg); + X64Reg tmp; + gpr.getFreeXReg(tmp); //if (IsSameMemArea(g_dsp.r[dreg], g_dsp.r[DSP_REG_AR3])) { - dsp_op_read_reg(dreg, RSI, NONE); - dsp_op_read_reg(DSP_REG_AR3, RDI, NONE); - SHR(16, R(ESI), Imm8(10)); - SHR(16, R(EDI), Imm8(10)); + dsp_op_read_reg(dreg, RCX, NONE); + dsp_op_read_reg(DSP_REG_AR3, tmp, NONE); + XOR(16, R(ECX), R(tmp)); + gpr.putXReg(tmp); DSPJitRegCache c(gpr); - CMP(16, R(ESI), R(EDI)); + TEST(16, R(ECX), Imm16(0xfc00)); FixupBranch not_equal = J_CC(CC_NE,true); pushExtValueFromMem2(rreg + DSP_REG_AXL0, dreg); gpr.flushRegs(c); @@ -418,7 +491,7 @@ void DSPEmitter::ldn(const UDSPInstruction opc) gpr.flushRegs(c); SetJumpTarget(after); - increase_addr_reg(dreg); + increase_addr_reg(dreg, dreg); } increment_addr_reg(DSP_REG_AR3); @@ -435,13 +508,15 @@ void DSPEmitter::ldm(const UDSPInstruction opc) if (sreg != DSP_REG_AR3) { pushExtValueFromMem((dreg << 1) + DSP_REG_AXL0, sreg); + X64Reg tmp; + gpr.getFreeXReg(tmp); //if (IsSameMemArea(g_dsp.r[sreg], g_dsp.r[DSP_REG_AR3])) { - dsp_op_read_reg(sreg, RSI, NONE); - dsp_op_read_reg(DSP_REG_AR3, RDI, NONE); - SHR(16, R(ESI), Imm8(10)); - SHR(16, R(EDI), Imm8(10)); + dsp_op_read_reg(sreg, RCX, NONE); + dsp_op_read_reg(DSP_REG_AR3, tmp, NONE); + XOR(16, R(ECX), R(tmp)); + gpr.putXReg(tmp); DSPJitRegCache c(gpr); - CMP(16, R(ESI), R(EDI)); + TEST(16, R(ECX), Imm16(0xfc00)); FixupBranch not_equal = J_CC(CC_NE,true); pushExtValueFromMem2((rreg << 1) + DSP_REG_AXL1, sreg); gpr.flushRegs(c); @@ -455,13 +530,15 @@ void DSPEmitter::ldm(const UDSPInstruction opc) } else { pushExtValueFromMem(rreg + DSP_REG_AXH0, dreg); + X64Reg tmp; + gpr.getFreeXReg(tmp); //if (IsSameMemArea(g_dsp.r[dreg], g_dsp.r[DSP_REG_AR3])) { - dsp_op_read_reg(dreg, RSI, NONE); - dsp_op_read_reg(DSP_REG_AR3, RDI, NONE); - SHR(16, R(ESI), Imm8(10)); - SHR(16, R(EDI), Imm8(10)); + dsp_op_read_reg(dreg, RCX, NONE); + dsp_op_read_reg(DSP_REG_AR3, tmp, NONE); + XOR(16, R(ECX), R(tmp)); + gpr.putXReg(tmp); DSPJitRegCache c(gpr); - CMP(16, R(ESI), R(EDI)); + TEST(16, R(ECX), Imm16(0xfc00)); FixupBranch not_equal = J_CC(CC_NE,true); pushExtValueFromMem2(rreg + DSP_REG_AXL0, dreg); gpr.flushRegs(c); @@ -474,7 +551,7 @@ void DSPEmitter::ldm(const UDSPInstruction opc) increment_addr_reg(dreg); } - increase_addr_reg(DSP_REG_AR3); + increase_addr_reg(DSP_REG_AR3, DSP_REG_AR3); } // LDNM $ax0.d, $ax1.r, @$arS @@ -488,13 +565,15 @@ void DSPEmitter::ldnm(const UDSPInstruction opc) if (sreg != DSP_REG_AR3) { pushExtValueFromMem((dreg << 1) + DSP_REG_AXL0, sreg); + X64Reg tmp; + gpr.getFreeXReg(tmp); //if (IsSameMemArea(g_dsp.r[sreg], g_dsp.r[DSP_REG_AR3])) { - dsp_op_read_reg(sreg, RSI, NONE); - dsp_op_read_reg(DSP_REG_AR3, RDI, NONE); - SHR(16, R(ESI), Imm8(10)); - SHR(16, R(EDI), Imm8(10)); + dsp_op_read_reg(sreg, RCX, NONE); + dsp_op_read_reg(DSP_REG_AR3, tmp, NONE); + XOR(16, R(ECX), R(tmp)); + gpr.putXReg(tmp); DSPJitRegCache c(gpr); - CMP(16, R(ESI), R(EDI)); + TEST(16, R(ECX), Imm16(0xfc00)); FixupBranch not_equal = J_CC(CC_NE,true); pushExtValueFromMem2((rreg << 1) + DSP_REG_AXL1, sreg); gpr.flushRegs(c); @@ -504,17 +583,19 @@ void DSPEmitter::ldnm(const UDSPInstruction opc) gpr.flushRegs(c); SetJumpTarget(after); - increase_addr_reg(sreg); + increase_addr_reg(sreg, sreg); } else { pushExtValueFromMem(rreg + DSP_REG_AXH0, dreg); + X64Reg tmp; + gpr.getFreeXReg(tmp); //if (IsSameMemArea(g_dsp.r[dreg], g_dsp.r[DSP_REG_AR3])) { - dsp_op_read_reg(dreg, RSI, NONE); - dsp_op_read_reg(DSP_REG_AR3, RDI, NONE); - SHR(16, R(ESI), Imm8(10)); - SHR(16, R(EDI), Imm8(10)); + dsp_op_read_reg(dreg, RCX, NONE); + dsp_op_read_reg(DSP_REG_AR3, tmp, NONE); + XOR(16, R(ECX), R(tmp)); + gpr.putXReg(tmp); DSPJitRegCache c(gpr); - CMP(16, R(ESI), R(EDI)); + TEST(16, R(ECX), Imm16(0xfc00)); FixupBranch not_equal = J_CC(CC_NE,true); pushExtValueFromMem2(rreg + DSP_REG_AXL0, dreg); gpr.flushRegs(c); @@ -524,10 +605,10 @@ void DSPEmitter::ldnm(const UDSPInstruction opc) gpr.flushRegs(c); SetJumpTarget(after); - increase_addr_reg(dreg); + increase_addr_reg(dreg, dreg); } - increase_addr_reg(DSP_REG_AR3); + increase_addr_reg(DSP_REG_AR3, DSP_REG_AR3); } @@ -540,8 +621,15 @@ void DSPEmitter::pushExtValueFromReg(u16 dreg, u16 sreg) { void DSPEmitter::pushExtValueFromMem(u16 dreg, u16 sreg) { // u16 addr = g_dsp.r[addr]; - dsp_op_read_reg(sreg, RCX, ZERO); - dmem_read(); + + X64Reg tmp1; + gpr.getFreeXReg(tmp1); + + dsp_op_read_reg(sreg, tmp1, ZERO); + dmem_read(tmp1); + + gpr.putXReg(tmp1); + MOVZX(32, 16, EBX, R(EAX)); storeIndex = dreg; @@ -549,8 +637,15 @@ void DSPEmitter::pushExtValueFromMem(u16 dreg, u16 sreg) { void DSPEmitter::pushExtValueFromMem2(u16 dreg, u16 sreg) { // u16 addr = g_dsp.r[addr]; - dsp_op_read_reg(sreg, RCX, ZERO); - dmem_read(); + + X64Reg tmp1; + gpr.getFreeXReg(tmp1); + + dsp_op_read_reg(sreg, tmp1, ZERO); + dmem_read(tmp1); + + gpr.putXReg(tmp1); + SHL(32, R(EAX), Imm8(16)); OR(32, R(EBX), R(EAX)); @@ -569,7 +664,7 @@ void DSPEmitter::popExtValueToReg() { dsp_op_write_reg(storeIndex, RBX); if (storeIndex >= DSP_REG_ACM0 && storeIndex2 == -1) { TEST(32, R(EBX), Imm32(SR_40_MODE_BIT << 16)); - FixupBranch not_40bit = J_CC(CC_Z); + FixupBranch not_40bit = J_CC(CC_Z, true); DSPJitRegCache c(gpr); //if (g_dsp.r[DSP_REG_SR] & SR_40_MODE_BIT) //{ @@ -613,16 +708,16 @@ void DSPEmitter::zeroWriteBackLog(const UDSPInstruction opc) if ((opc >> 12) == 0x3) { if (! extOpTable[opc & 0x7F]->jitFunc) { - SaveDSPRegs(); + gpr.pushRegs(); ABI_CallFunction((void*)::zeroWriteBackLog); - LoadDSPRegs(); + gpr.popRegs(); } } else { if (! extOpTable[opc & 0xFF]->jitFunc) { - SaveDSPRegs(); + gpr.pushRegs(); ABI_CallFunction((void*)::zeroWriteBackLog); - LoadDSPRegs(); + gpr.popRegs(); } } return; diff --git a/Source/Core/Core/Src/DSP/Jit/DSPJitLoadStore.cpp b/Source/Core/Core/Src/DSP/Jit/DSPJitLoadStore.cpp index bd4a320bb3..f51ea96099 100644 --- a/Source/Core/Core/Src/DSP/Jit/DSPJitLoadStore.cpp +++ b/Source/Core/Core/Src/DSP/Jit/DSPJitLoadStore.cpp @@ -35,11 +35,18 @@ void DSPEmitter::srs(const UDSPInstruction opc) { u8 reg = ((opc >> 8) & 0x7) + 0x18; //u16 addr = (g_dsp.r.cr << 8) | (opc & 0xFF); - dsp_op_read_reg(reg, RCX, ZERO); + + X64Reg tmp1; + gpr.getFreeXReg(tmp1); + + dsp_op_read_reg(reg, tmp1, ZERO); dsp_op_read_reg(DSP_REG_CR, RAX, ZERO); SHL(16, R(EAX), Imm8(8)); - OR(8, R(EAX), Imm8(opc & 0xFF)); - dmem_write(); + OR(16, R(EAX), Imm16(opc & 0xFF)); + dmem_write(tmp1); + + gpr.putXReg(tmp1); + } // LRS $(0x18+D), @M @@ -50,11 +57,18 @@ void DSPEmitter::srs(const UDSPInstruction opc) void DSPEmitter::lrs(const UDSPInstruction opc) { u8 reg = ((opc >> 8) & 0x7) + 0x18; + + X64Reg tmp1; + gpr.getFreeXReg(tmp1); + //u16 addr = (g_dsp.r[DSP_REG_CR] << 8) | (opc & 0xFF); - dsp_op_read_reg(DSP_REG_CR, RCX, ZERO); - SHL(16, R(ECX), Imm8(8)); - OR(8, R(ECX), Imm8(opc & 0xFF)); - dmem_read(); + dsp_op_read_reg(DSP_REG_CR, tmp1, ZERO); + SHL(16, R(tmp1), Imm8(8)); + OR(16, R(tmp1), Imm16(opc & 0xFF)); + dmem_read(tmp1); + + gpr.putXReg(tmp1); + dsp_op_write_reg(reg, RAX); dsp_conditional_extend_accum(reg); } @@ -82,8 +96,14 @@ void DSPEmitter::sr(const UDSPInstruction opc) { u8 reg = opc & DSP_REG_MASK; u16 address = dsp_imem_read(compilePC + 1); - dsp_op_read_reg(reg, ECX); - dmem_write_imm(address); + + X64Reg tmp1; + gpr.getFreeXReg(tmp1); + + dsp_op_read_reg(reg, tmp1); + dmem_write_imm(address, tmp1); + + gpr.putXReg(tmp1); } // SI @M, #I @@ -95,8 +115,14 @@ void DSPEmitter::si(const UDSPInstruction opc) { u16 address = (s8)opc; u16 imm = dsp_imem_read(compilePC + 1); - MOV(32, R(ECX), Imm32((u32)imm)); - dmem_write_imm(address); + + X64Reg tmp1; + gpr.getFreeXReg(tmp1); + + MOV(32, R(tmp1), Imm32((u32)imm)); + dmem_write_imm(address, tmp1); + + gpr.putXReg(tmp1); } // LRR $D, @$S @@ -108,8 +134,14 @@ void DSPEmitter::lrr(const UDSPInstruction opc) u8 sreg = (opc >> 5) & 0x3; u8 dreg = opc & 0x1f; - dsp_op_read_reg(sreg, ECX); - dmem_read(); + X64Reg tmp1; + gpr.getFreeXReg(tmp1); + + dsp_op_read_reg(sreg, tmp1); + dmem_read(tmp1); + + gpr.putXReg(tmp1); + dsp_op_write_reg(dreg, EAX); dsp_conditional_extend_accum(dreg); } @@ -124,8 +156,14 @@ void DSPEmitter::lrrd(const UDSPInstruction opc) u8 sreg = (opc >> 5) & 0x3; u8 dreg = opc & 0x1f; - dsp_op_read_reg(sreg, ECX); - dmem_read(); + X64Reg tmp1; + gpr.getFreeXReg(tmp1); + + dsp_op_read_reg(sreg, tmp1); + dmem_read(tmp1); + + gpr.putXReg(tmp1); + dsp_op_write_reg(dreg, EAX); dsp_conditional_extend_accum(dreg); decrement_addr_reg(sreg); @@ -141,8 +179,14 @@ void DSPEmitter::lrri(const UDSPInstruction opc) u8 sreg = (opc >> 5) & 0x3; u8 dreg = opc & 0x1f; - dsp_op_read_reg(sreg, ECX); - dmem_read(); + X64Reg tmp1; + gpr.getFreeXReg(tmp1); + + dsp_op_read_reg(sreg, tmp1); + dmem_read(tmp1); + + gpr.putXReg(tmp1); + dsp_op_write_reg(dreg, EAX); dsp_conditional_extend_accum(dreg); increment_addr_reg(sreg); @@ -158,11 +202,17 @@ void DSPEmitter::lrrn(const UDSPInstruction opc) u8 sreg = (opc >> 5) & 0x3; u8 dreg = opc & 0x1f; - dsp_op_read_reg(sreg, ECX); - dmem_read(); + X64Reg tmp1; + gpr.getFreeXReg(tmp1); + + dsp_op_read_reg(sreg, tmp1); + dmem_read(tmp1); + + gpr.putXReg(tmp1); + dsp_op_write_reg(dreg, EAX); dsp_conditional_extend_accum(dreg); - increase_addr_reg(sreg); + increase_addr_reg(sreg, sreg); } // SRR @$D, $S @@ -175,9 +225,14 @@ void DSPEmitter::srr(const UDSPInstruction opc) u8 dreg = (opc >> 5) & 0x3; u8 sreg = opc & 0x1f; - dsp_op_read_reg(sreg, ECX); + X64Reg tmp1; + gpr.getFreeXReg(tmp1); + + dsp_op_read_reg(sreg, tmp1); dsp_op_read_reg(dreg, RAX, ZERO); - dmem_write(); + dmem_write(tmp1); + + gpr.putXReg(tmp1); } // SRRD @$D, $S @@ -190,9 +245,15 @@ void DSPEmitter::srrd(const UDSPInstruction opc) u8 dreg = (opc >> 5) & 0x3; u8 sreg = opc & 0x1f; - dsp_op_read_reg(sreg, ECX); + X64Reg tmp1; + gpr.getFreeXReg(tmp1); + + dsp_op_read_reg(sreg, tmp1); dsp_op_read_reg(dreg, RAX, ZERO); - dmem_write(); + dmem_write(tmp1); + + gpr.putXReg(tmp1); + decrement_addr_reg(dreg); } @@ -206,9 +267,15 @@ void DSPEmitter::srri(const UDSPInstruction opc) u8 dreg = (opc >> 5) & 0x3; u8 sreg = opc & 0x1f; - dsp_op_read_reg(sreg, ECX); + X64Reg tmp1; + gpr.getFreeXReg(tmp1); + + dsp_op_read_reg(sreg, tmp1); dsp_op_read_reg(dreg, RAX, ZERO); - dmem_write(); + dmem_write(tmp1); + + gpr.putXReg(tmp1); + increment_addr_reg(dreg); } @@ -222,10 +289,16 @@ void DSPEmitter::srrn(const UDSPInstruction opc) u8 dreg = (opc >> 5) & 0x3; u8 sreg = opc & 0x1f; - dsp_op_read_reg(sreg, ECX); + X64Reg tmp1; + gpr.getFreeXReg(tmp1); + + dsp_op_read_reg(sreg, tmp1); dsp_op_read_reg(dreg, RAX, ZERO); - dmem_write(); - increase_addr_reg(dreg); + dmem_write(tmp1); + + gpr.putXReg(tmp1); + + increase_addr_reg(dreg, dreg); } // ILRR $acD.m, @$arS @@ -237,8 +310,14 @@ void DSPEmitter::ilrr(const UDSPInstruction opc) u16 reg = opc & 0x3; u16 dreg = (opc >> 8) & 1; - dsp_op_read_reg(reg, RCX, ZERO); - imem_read(); + X64Reg tmp1; + gpr.getFreeXReg(tmp1); + + dsp_op_read_reg(reg, tmp1, ZERO); + imem_read(tmp1); + + gpr.putXReg(tmp1); + set_acc_m(dreg, R(RAX)); dsp_conditional_extend_accum(dreg); } @@ -252,8 +331,14 @@ void DSPEmitter::ilrrd(const UDSPInstruction opc) u16 reg = opc & 0x3; u16 dreg = (opc >> 8) & 1; - dsp_op_read_reg(reg, RCX, ZERO); - imem_read(); + X64Reg tmp1; + gpr.getFreeXReg(tmp1); + + dsp_op_read_reg(reg, tmp1, ZERO); + imem_read(tmp1); + + gpr.putXReg(tmp1); + set_acc_m(dreg, R(RAX)); dsp_conditional_extend_accum(dreg); decrement_addr_reg(reg); @@ -268,8 +353,14 @@ void DSPEmitter::ilrri(const UDSPInstruction opc) u16 reg = opc & 0x3; u16 dreg = (opc >> 8) & 1; - dsp_op_read_reg(reg, RCX, ZERO); - imem_read(); + X64Reg tmp1; + gpr.getFreeXReg(tmp1); + + dsp_op_read_reg(reg, tmp1, ZERO); + imem_read(tmp1); + + gpr.putXReg(tmp1); + set_acc_m(dreg, R(RAX)); dsp_conditional_extend_accum(dreg); increment_addr_reg(reg); @@ -285,10 +376,16 @@ void DSPEmitter::ilrrn(const UDSPInstruction opc) u16 reg = opc & 0x3; u16 dreg = (opc >> 8) & 1; - dsp_op_read_reg(reg, RCX, ZERO); - imem_read(); + X64Reg tmp1; + gpr.getFreeXReg(tmp1); + + dsp_op_read_reg(reg, tmp1, ZERO); + imem_read(tmp1); + + gpr.putXReg(tmp1); + set_acc_m(dreg, R(RAX)); dsp_conditional_extend_accum(dreg); - increase_addr_reg(reg); + increase_addr_reg(reg, reg); } diff --git a/Source/Core/Core/Src/DSP/Jit/DSPJitMisc.cpp b/Source/Core/Core/Src/DSP/Jit/DSPJitMisc.cpp index a58efb98d4..c733e0367c 100644 --- a/Source/Core/Core/Src/DSP/Jit/DSPJitMisc.cpp +++ b/Source/Core/Core/Src/DSP/Jit/DSPJitMisc.cpp @@ -25,7 +25,7 @@ using namespace Gen; //clobbers: //EAX = (s8)g_dsp.reg_stack_ptr[stack_reg] -//CX = g_dsp.reg_stack[stack_reg][g_dsp.reg_stack_ptr[stack_reg]] +//expects: void DSPEmitter::dsp_reg_stack_push(int stack_reg) { //g_dsp.reg_stack_ptr[stack_reg]++; @@ -35,30 +35,38 @@ void DSPEmitter::dsp_reg_stack_push(int stack_reg) AND(8, R(AL), Imm8(DSP_STACK_MASK)); MOV(8, M(&g_dsp.reg_stack_ptr[stack_reg]), R(AL)); + X64Reg tmp1; + gpr.getFreeXReg(tmp1); //g_dsp.reg_stack[stack_reg][g_dsp.reg_stack_ptr[stack_reg]] = g_dsp.r[DSP_REG_ST0 + stack_reg]; - MOV(16, R(CX), M(&g_dsp.r.st[stack_reg])); + MOV(16, R(tmp1), M(&g_dsp.r.st[stack_reg])); #ifdef _M_IX86 // All32 MOVZX(32, 8, EAX, R(AL)); #else MOVZX(64, 8, RAX, R(AL)); #endif - MOV(16, MComplex(EAX, EAX, 1, (u64)&g_dsp.reg_stack[stack_reg][0]), R(CX)); + MOV(16, MComplex(EAX, EAX, 1, + PtrOffset(&g_dsp.reg_stack[stack_reg][0],0)), R(tmp1)); + gpr.putXReg(tmp1); } //clobbers: //EAX = (s8)g_dsp.reg_stack_ptr[stack_reg] -//CX = g_dsp.reg_stack[stack_reg][g_dsp.reg_stack_ptr[stack_reg]] +//expects: void DSPEmitter::dsp_reg_stack_pop(int stack_reg) { //g_dsp.r[DSP_REG_ST0 + stack_reg] = g_dsp.reg_stack[stack_reg][g_dsp.reg_stack_ptr[stack_reg]]; MOV(8, R(AL), M(&g_dsp.reg_stack_ptr[stack_reg])); + X64Reg tmp1; + gpr.getFreeXReg(tmp1); #ifdef _M_IX86 // All32 MOVZX(32, 8, EAX, R(AL)); #else MOVZX(64, 8, RAX, R(AL)); #endif - MOV(16, R(CX), MComplex(EAX, EAX, 1, (u64)&g_dsp.reg_stack[stack_reg][0])); - MOV(16, M(&g_dsp.r.st[stack_reg]), R(CX)); + MOV(16, R(tmp1), MComplex(EAX, EAX, 1, + PtrOffset(&g_dsp.reg_stack[stack_reg][0],0))); + MOV(16, M(&g_dsp.r.st[stack_reg]), R(tmp1)); + gpr.putXReg(tmp1); //g_dsp.reg_stack_ptr[stack_reg]--; //g_dsp.reg_stack_ptr[stack_reg] &= DSP_STACK_MASK; @@ -165,7 +173,7 @@ void DSPEmitter::dsp_conditional_extend_accum(int reg) //} gpr.flushRegs(c); SetJumpTarget(not_40bit); - gpr.putReg(DSP_REG_SR); + gpr.putReg(DSP_REG_SR, false); } } } @@ -181,7 +189,7 @@ void DSPEmitter::dsp_conditional_extend_accum_imm(int reg, u16 val) gpr.getReg(DSP_REG_SR,sr_reg); DSPJitRegCache c(gpr); TEST(16, sr_reg, Imm16(SR_40_MODE_BIT)); - FixupBranch not_40bit = J_CC(CC_Z); + FixupBranch not_40bit = J_CC(CC_Z, true); //if (g_dsp.r[DSP_REG_SR] & SR_40_MODE_BIT) //{ // Sign extend into whole accum. @@ -192,7 +200,7 @@ void DSPEmitter::dsp_conditional_extend_accum_imm(int reg, u16 val) //} gpr.flushRegs(c); SetJumpTarget(not_40bit); - gpr.putReg(DSP_REG_SR); + gpr.putReg(DSP_REG_SR, false); } } } @@ -327,9 +335,9 @@ void DSPEmitter::addarn(const UDSPInstruction opc) // u8 dreg = opc & 0x3; // u8 sreg = (opc >> 2) & 0x3; // g_dsp.r[dreg] = dsp_increase_addr_reg(dreg, (s16)g_dsp.r[DSP_REG_IX0 + sreg]); - + // From looking around it is always called with the matching index register - increase_addr_reg(opc & 0x3); + increase_addr_reg(opc & 0x3, (opc >> 2) & 0x3); } //---- diff --git a/Source/Core/Core/Src/DSP/Jit/DSPJitMultiplier.cpp b/Source/Core/Core/Src/DSP/Jit/DSPJitMultiplier.cpp index db523c3233..cab2bb194d 100644 --- a/Source/Core/Core/Src/DSP/Jit/DSPJitMultiplier.cpp +++ b/Source/Core/Core/Src/DSP/Jit/DSPJitMultiplier.cpp @@ -31,13 +31,12 @@ using namespace Gen; // Returns s64 in RAX -// In: RSI = s16 a, RDI = s16 b +// In: RCX = s16 a, RAX = s16 b void DSPEmitter::multiply() { #ifdef _M_X64 // prod = (s16)a * (s16)b; //signed - MOV(64, R(EAX), R(RDI)); - IMUL(64, R(ESI)); + IMUL(64, R(ECX)); // Conditionally multiply by 2. // if ((g_dsp.r.sr & SR_MUL_MODIFY) == 0) @@ -46,9 +45,9 @@ void DSPEmitter::multiply() TEST(16, sr_reg, Imm16(SR_MUL_MODIFY)); FixupBranch noMult2 = J_CC(CC_NZ); // prod <<= 1; - SHL(64, R(EAX), Imm8(1)); + LEA(64, RAX, MRegSum(RAX,RAX)); SetJumpTarget(noMult2); - gpr.putReg(DSP_REG_SR); + gpr.putReg(DSP_REG_SR, false); // return prod; #endif } @@ -60,7 +59,7 @@ void DSPEmitter::multiply_add() // s64 prod = dsp_get_long_prod() + dsp_get_multiply_prod(a, b, sign); multiply(); MOV(64, R(RDX), R(RAX)); - get_long_prod(); + get_long_prod(); ADD(64, R(RAX), R(RDX)); // return prod; } @@ -72,14 +71,14 @@ void DSPEmitter::multiply_sub() // s64 prod = dsp_get_long_prod() - dsp_get_multiply_prod(a, b, sign); multiply(); MOV(64, R(RDX), R(RAX)); - get_long_prod(); + get_long_prod(); SUB(64, R(RAX), R(RDX)); // return prod; } // Only MULX family instructions have unsigned/mixed support. // Returns s64 in EAX -// In: RSI = s16 a, RDI = s16 b +// In: RCX = s16 a, RAX = s16 b // Returns s64 in RAX void DSPEmitter::multiply_mulx(u8 axh0, u8 axh1) { @@ -101,41 +100,48 @@ void DSPEmitter::multiply_mulx(u8 axh0, u8 axh1) TEST(16, sr_reg, Imm16(SR_MUL_UNSIGNED)); FixupBranch unsignedMul = J_CC(CC_NZ); // prod = (s16)a * (s16)b; //signed - MOVSX(64, 16, RAX, R(RDI)); - IMUL(64, R(RSI)); - FixupBranch signedMul = J(); + MOVSX(64, 16, RAX, R(RAX)); + IMUL(64, R(RCX)); + FixupBranch signedMul = J(true); SetJumpTarget(unsignedMul); + DSPJitRegCache c(gpr); + gpr.putReg(DSP_REG_SR, false); if ((axh0==0) && (axh1==0)) { // unsigned support ON if both ax?.l regs are used // prod = (u32)(a * b); - MOVZX(64, 16, RSI, R(RSI)); - MOVZX(64, 16, RAX, R(RDI)); - MUL(64, R(RSI)); + MOVZX(64, 16, RCX, R(RCX)); + MOVZX(64, 16, RAX, R(RAX)); + MUL(64, R(RCX)); } else if ((axh0==0) && (axh1==1)) { // mixed support ON (u16)axl.0 * (s16)axh.1 // prod = a * (s16)b; - MOVZX(64, 16, RAX, R(RSI)); - IMUL(64, R(RDI)); + X64Reg tmp; + gpr.getFreeXReg(tmp); + MOV(64, R(tmp), R(RAX)); + MOVZX(64, 16, RAX, R(RCX)); + IMUL(64, R(tmp)); + gpr.putXReg(tmp); } else if ((axh0==1) && (axh1==0)) { // mixed support ON (u16)axl.1 * (s16)axh.0 // prod = (s16)a * b; - MOVZX(64, 16, RAX, R(RDI)); - IMUL(64, R(RSI)); + MOVZX(64, 16, RAX, R(RAX)); + IMUL(64, R(RCX)); } else { // unsigned support OFF if both ax?.h regs are used // prod = (s16)a * (s16)b; //signed - MOVSX(64, 16, RAX, R(RDI)); - IMUL(64, R(RSI)); + MOVSX(64, 16, RAX, R(RAX)); + IMUL(64, R(RCX)); } + gpr.flushRegs(c); SetJumpTarget(signedMul); // Conditionally multiply by 2. @@ -143,9 +149,9 @@ void DSPEmitter::multiply_mulx(u8 axh0, u8 axh1) TEST(16, sr_reg, Imm16(SR_MUL_MODIFY)); FixupBranch noMult2 = J_CC(CC_NZ); // prod <<= 1; - SHL(64, R(RAX), Imm8(1)); + LEA(64, RAX, MRegSum(RAX,RAX)); SetJumpTarget(noMult2); - gpr.putReg(DSP_REG_SR); + gpr.putReg(DSP_REG_SR, false); // return prod; } @@ -169,7 +175,7 @@ void DSPEmitter::clrp(const UDSPInstruction opc) // g_dsp.r[DSP_REG_PRODM2] = 0x0010; //64bit move to memory does not work. use 2 32bits MOV(32, M(&g_dsp.r.prod.val), Imm32(0xfff00000U)); - MOV(32, M((u8*)(&g_dsp.r.prod.val)+4), Imm32(0x001000ffU)); + MOV(32, M(&g_dsp.r.prod.val+4), Imm32(0x001000ffU)); #else Default(opc); #endif @@ -285,14 +291,16 @@ void DSPEmitter::addpaxz(const UDSPInstruction opc) u8 sreg = (opc >> 9) & 0x1; // s64 ax = dsp_get_long_acx(sreg); - get_long_acx(sreg, RCX); - MOV(64, R(RDI), R(RCX)); + X64Reg tmp1; + gpr.getFreeXReg(tmp1); + get_long_acx(sreg, tmp1); + MOV(64, R(RDX), R(tmp1)); // s64 res = prod + (ax & ~0xffff); - MOV(64, R(RDX), Imm64(~0xffff)); - AND(64, R(RDI), R(RDX)); + MOV(64, R(RAX), Imm64(~0xffff)); + AND(64, R(RDX), R(RAX)); // s64 prod = dsp_get_long_prod_round_prodl(); get_long_prod_round_prodl(); - ADD(64, R(RAX), R(RDI)); + ADD(64, R(RAX), R(RDX)); // s64 oldprod = dsp_get_long_prod(); // dsp_set_long_acc(dreg, res); @@ -301,14 +309,15 @@ void DSPEmitter::addpaxz(const UDSPInstruction opc) if (!(DSPAnalyzer::code_flags[compilePC] & DSPAnalyzer::CODE_START_OF_INST) || (DSPAnalyzer::code_flags[compilePC] & DSPAnalyzer::CODE_UPDATE_SR)) { get_long_prod(RDX); - MOV(64, R(RSI), R(RAX)); - set_long_acc(dreg, RSI); - Update_SR_Register64_Carry(); + MOV(64, R(RCX), R(RAX)); + set_long_acc(dreg, RCX); + Update_SR_Register64_Carry(EAX, tmp1); } else { set_long_acc(dreg, RAX); } + gpr.putXReg(tmp1); #else Default(opc); #endif @@ -323,8 +332,8 @@ void DSPEmitter::mulaxh(const UDSPInstruction opc) { #ifdef _M_X64 // s64 prod = dsp_multiply(dsp_get_ax_h(0), dsp_get_ax_h(0)); - dsp_op_read_reg(DSP_REG_AXH0, RSI, SIGN); - MOV(64, R(RDI), R(RSI)); + dsp_op_read_reg(DSP_REG_AXH0, RCX, SIGN); + MOV(64, R(RAX), R(RCX)); multiply(); // dsp_set_long_prod(prod); set_long_prod(); @@ -345,9 +354,9 @@ void DSPEmitter::mul(const UDSPInstruction opc) u8 sreg = (opc >> 11) & 0x1; // u16 axl = dsp_get_ax_l(sreg); - dsp_op_read_reg(DSP_REG_AXL0+sreg, RSI, SIGN); + dsp_op_read_reg(DSP_REG_AXL0+sreg, RCX, SIGN); // u16 axh = dsp_get_ax_h(sreg); - dsp_op_read_reg(DSP_REG_AXH0+sreg, RDI, SIGN); + dsp_op_read_reg(DSP_REG_AXH0+sreg, RAX, SIGN); // s64 prod = dsp_multiply(axh, axl); multiply(); // dsp_set_long_prod(prod); @@ -377,9 +386,9 @@ void DSPEmitter::mulac(const UDSPInstruction opc) ADD(64, R(RAX), R(RDX)); PUSH(64, R(RAX)); // u16 axl = dsp_get_ax_l(sreg); - dsp_op_read_reg(DSP_REG_AXL0+sreg, RSI, SIGN); + dsp_op_read_reg(DSP_REG_AXL0+sreg, RCX, SIGN); // u16 axh = dsp_get_ax_h(sreg); - dsp_op_read_reg(DSP_REG_AXH0+sreg, RDI, SIGN); + dsp_op_read_reg(DSP_REG_AXH0+sreg, RAX, SIGN); // s64 prod = dsp_multiply(axl, axh); multiply(); // dsp_set_long_prod(prod); @@ -467,9 +476,9 @@ void DSPEmitter::mulx(const UDSPInstruction opc) u8 sreg = ((opc >> 12) & 0x1); // u16 val1 = (sreg == 0) ? dsp_get_ax_l(0) : dsp_get_ax_h(0); - dsp_op_read_reg(DSP_REG_AXL0 + sreg*2, RSI, SIGN); + dsp_op_read_reg(DSP_REG_AXL0 + sreg*2, RCX, SIGN); // u16 val2 = (treg == 0) ? dsp_get_ax_l(1) : dsp_get_ax_h(1); - dsp_op_read_reg(DSP_REG_AXL1 + treg*2, RDI, SIGN); + dsp_op_read_reg(DSP_REG_AXL1 + treg*2, RAX, SIGN); // s64 prod = dsp_multiply_mulx(sreg, treg, val1, val2); multiply_mulx(sreg, treg); // dsp_set_long_prod(prod); @@ -494,25 +503,28 @@ void DSPEmitter::mulxac(const UDSPInstruction opc) u8 sreg = (opc >> 12) & 0x1; // s64 acc = dsp_get_long_acc(rreg) + dsp_get_long_prod(); - get_long_acc(rreg, RCX); + X64Reg tmp1; + gpr.getFreeXReg(tmp1); + get_long_acc(rreg, tmp1); get_long_prod(); - ADD(64, R(RCX), R(RAX)); + ADD(64, R(tmp1), R(RAX)); // u16 val1 = (sreg == 0) ? dsp_get_ax_l(0) : dsp_get_ax_h(0); - dsp_op_read_reg(DSP_REG_AXL0 + sreg*2, RSI, SIGN); + dsp_op_read_reg(DSP_REG_AXL0 + sreg*2, RCX, SIGN); // u16 val2 = (treg == 0) ? dsp_get_ax_l(1) : dsp_get_ax_h(1); - dsp_op_read_reg(DSP_REG_AXL1 + treg*2, RDI, SIGN); + dsp_op_read_reg(DSP_REG_AXL1 + treg*2, RAX, SIGN); // s64 prod = dsp_multiply_mulx(sreg, treg, val1, val2); multiply_mulx(sreg, treg); // dsp_set_long_prod(prod); set_long_prod(); // dsp_set_long_acc(rreg, acc); - set_long_acc(rreg, RCX); + set_long_acc(rreg, tmp1); // Update_SR_Register64(dsp_get_long_acc(rreg)); if (!(DSPAnalyzer::code_flags[compilePC] & DSPAnalyzer::CODE_START_OF_INST) || (DSPAnalyzer::code_flags[compilePC] & DSPAnalyzer::CODE_UPDATE_SR)) { - Update_SR_Register64(RCX); + Update_SR_Register64(tmp1); } + gpr.putXReg(tmp1); #else Default(opc); #endif @@ -533,23 +545,26 @@ void DSPEmitter::mulxmv(const UDSPInstruction opc) u8 sreg = (opc >> 12) & 0x1; // s64 acc = dsp_get_long_prod(); - get_long_prod(RCX); + X64Reg tmp1; + gpr.getFreeXReg(tmp1); + get_long_prod(tmp1); // u16 val1 = (sreg == 0) ? dsp_get_ax_l(0) : dsp_get_ax_h(0); - dsp_op_read_reg(DSP_REG_AXL0 + sreg*2, RSI, SIGN); + dsp_op_read_reg(DSP_REG_AXL0 + sreg*2, RCX, SIGN); // u16 val2 = (treg == 0) ? dsp_get_ax_l(1) : dsp_get_ax_h(1); - dsp_op_read_reg(DSP_REG_AXL1 + treg*2, RDI, SIGN); + dsp_op_read_reg(DSP_REG_AXL1 + treg*2, RAX, SIGN); // s64 prod = dsp_multiply_mulx(sreg, treg, val1, val2); multiply_mulx(sreg, treg); // dsp_set_long_prod(prod); set_long_prod(); // dsp_set_long_acc(rreg, acc); - set_long_acc(rreg, RCX); + set_long_acc(rreg, tmp1); // Update_SR_Register64(dsp_get_long_acc(rreg)); if (!(DSPAnalyzer::code_flags[compilePC] & DSPAnalyzer::CODE_START_OF_INST) || (DSPAnalyzer::code_flags[compilePC] & DSPAnalyzer::CODE_UPDATE_SR)) { - Update_SR_Register64(RCX); + Update_SR_Register64(tmp1); } + gpr.putXReg(tmp1); #else Default(opc); #endif @@ -571,23 +586,26 @@ void DSPEmitter::mulxmvz(const UDSPInstruction opc) u8 sreg = (opc >> 12) & 0x1; // s64 acc = dsp_get_long_prod_round_prodl(); - get_long_prod_round_prodl(RCX); + X64Reg tmp1; + gpr.getFreeXReg(tmp1); + get_long_prod_round_prodl(tmp1); // u16 val1 = (sreg == 0) ? dsp_get_ax_l(0) : dsp_get_ax_h(0); - dsp_op_read_reg(DSP_REG_AXL0 + sreg*2, RSI, SIGN); + dsp_op_read_reg(DSP_REG_AXL0 + sreg*2, RCX, SIGN); // u16 val2 = (treg == 0) ? dsp_get_ax_l(1) : dsp_get_ax_h(1); - dsp_op_read_reg(DSP_REG_AXL1 + treg*2, RDI, SIGN); + dsp_op_read_reg(DSP_REG_AXL1 + treg*2, RAX, SIGN); // s64 prod = dsp_multiply_mulx(sreg, treg, val1, val2); multiply_mulx(sreg, treg); // dsp_set_long_prod(prod); set_long_prod(); // dsp_set_long_acc(rreg, acc); - set_long_acc(rreg, RCX); + set_long_acc(rreg, tmp1); // Update_SR_Register64(dsp_get_long_acc(rreg)); if (!(DSPAnalyzer::code_flags[compilePC] & DSPAnalyzer::CODE_START_OF_INST) || (DSPAnalyzer::code_flags[compilePC] & DSPAnalyzer::CODE_UPDATE_SR)) { - Update_SR_Register64(RCX); + Update_SR_Register64(tmp1); } + gpr.putXReg(tmp1); #else Default(opc); #endif @@ -606,9 +624,9 @@ void DSPEmitter::mulc(const UDSPInstruction opc) u8 sreg = (opc >> 12) & 0x1; // u16 accm = dsp_get_acc_m(sreg); - get_acc_m(sreg, ESI); + get_acc_m(sreg, ECX); // u16 axh = dsp_get_ax_h(treg); - dsp_op_read_reg(DSP_REG_AXH0+treg, RDI, SIGN); + dsp_op_read_reg(DSP_REG_AXH0+treg, RAX, SIGN); // s64 prod = dsp_multiply(accm, axh); multiply(); // dsp_set_long_prod(prod); @@ -639,9 +657,9 @@ void DSPEmitter::mulcac(const UDSPInstruction opc) ADD(64, R(RAX), R(RDX)); PUSH(64, R(RAX)); // u16 accm = dsp_get_acc_m(sreg); - get_acc_m(sreg, ESI); + get_acc_m(sreg, ECX); // u16 axh = dsp_get_ax_h(treg); - dsp_op_read_reg(DSP_REG_AXH0+treg, RDI, SIGN); + dsp_op_read_reg(DSP_REG_AXH0+treg, RAX, SIGN); // s64 prod = dsp_multiply(accm, axh); multiply(); // dsp_set_long_prod(prod); @@ -678,9 +696,9 @@ void DSPEmitter::mulcmv(const UDSPInstruction opc) get_long_prod(); PUSH(64, R(RAX)); // u16 accm = dsp_get_acc_m(sreg); - get_acc_m(sreg, ESI); + get_acc_m(sreg, ECX); // u16 axh = dsp_get_ax_h(treg); - dsp_op_read_reg(DSP_REG_AXH0+treg, RDI, SIGN); + dsp_op_read_reg(DSP_REG_AXH0+treg, RAX, SIGN); // s64 prod = dsp_multiply(accm, axh); multiply(); // dsp_set_long_prod(prod); @@ -718,9 +736,9 @@ void DSPEmitter::mulcmvz(const UDSPInstruction opc) get_long_prod_round_prodl(); PUSH(64, R(RAX)); // u16 accm = dsp_get_acc_m(sreg); - get_acc_m(sreg, ESI); + get_acc_m(sreg, ECX); // u16 axh = dsp_get_ax_h(treg); - dsp_op_read_reg(DSP_REG_AXH0+treg, RDI, SIGN); + dsp_op_read_reg(DSP_REG_AXH0+treg, RAX, SIGN); // s64 prod = dsp_multiply(accm, axh); multiply(); // dsp_set_long_prod(prod); @@ -752,9 +770,9 @@ void DSPEmitter::maddx(const UDSPInstruction opc) u8 sreg = (opc >> 9) & 0x1; // u16 val1 = (sreg == 0) ? dsp_get_ax_l(0) : dsp_get_ax_h(0); - dsp_op_read_reg(DSP_REG_AXL0 + sreg*2, RSI, SIGN); + dsp_op_read_reg(DSP_REG_AXL0 + sreg*2, RCX, SIGN); // u16 val2 = (treg == 0) ? dsp_get_ax_l(1) : dsp_get_ax_h(1); - dsp_op_read_reg(DSP_REG_AXL1 + treg*2, RDI, SIGN); + dsp_op_read_reg(DSP_REG_AXL1 + treg*2, RAX, SIGN); // s64 prod = dsp_multiply_add(val1, val2); multiply_add(); // dsp_set_long_prod(prod); @@ -776,9 +794,9 @@ void DSPEmitter::msubx(const UDSPInstruction opc) u8 sreg = (opc >> 9) & 0x1; // u16 val1 = (sreg == 0) ? dsp_get_ax_l(0) : dsp_get_ax_h(0); - dsp_op_read_reg(DSP_REG_AXL0 + sreg*2, RSI, SIGN); + dsp_op_read_reg(DSP_REG_AXL0 + sreg*2, RCX, SIGN); // u16 val2 = (treg == 0) ? dsp_get_ax_l(1) : dsp_get_ax_h(1); - dsp_op_read_reg(DSP_REG_AXL1 + treg*2, RDI, SIGN); + dsp_op_read_reg(DSP_REG_AXL1 + treg*2, RAX, SIGN); // s64 prod = dsp_multiply_sub(val1, val2); multiply_sub(); // dsp_set_long_prod(prod); @@ -800,9 +818,9 @@ void DSPEmitter::maddc(const UDSPInstruction opc) u8 sreg = (opc >> 9) & 0x1; // u16 accm = dsp_get_acc_m(sreg); - get_acc_m(sreg, ESI); + get_acc_m(sreg, ECX); // u16 axh = dsp_get_ax_h(treg); - dsp_op_read_reg(DSP_REG_AXH0+treg, RDI, SIGN); + dsp_op_read_reg(DSP_REG_AXH0+treg, RAX, SIGN); // s64 prod = dsp_multiply_add(accm, axh); multiply_add(); // dsp_set_long_prod(prod); @@ -824,9 +842,9 @@ void DSPEmitter::msubc(const UDSPInstruction opc) u8 sreg = (opc >> 9) & 0x1; // u16 accm = dsp_get_acc_m(sreg); - get_acc_m(sreg, ESI); + get_acc_m(sreg, ECX); // u16 axh = dsp_get_ax_h(treg); - dsp_op_read_reg(DSP_REG_AXH0+treg, RDI, SIGN); + dsp_op_read_reg(DSP_REG_AXH0+treg, RAX, SIGN); // s64 prod = dsp_multiply_sub(accm, axh); multiply_sub(); // dsp_set_long_prod(prod); @@ -847,9 +865,9 @@ void DSPEmitter::madd(const UDSPInstruction opc) u8 sreg = (opc >> 8) & 0x1; // u16 axl = dsp_get_ax_l(sreg); - dsp_op_read_reg(DSP_REG_AXL0+sreg, RSI, SIGN); + dsp_op_read_reg(DSP_REG_AXL0+sreg, RCX, SIGN); // u16 axh = dsp_get_ax_h(sreg); - dsp_op_read_reg(DSP_REG_AXH0+sreg, RDI, SIGN); + dsp_op_read_reg(DSP_REG_AXH0+sreg, RAX, SIGN); // s64 prod = dsp_multiply_add(axl, axh); multiply_add(); // dsp_set_long_prod(prod); @@ -870,9 +888,9 @@ void DSPEmitter::msub(const UDSPInstruction opc) u8 sreg = (opc >> 8) & 0x1; // u16 axl = dsp_get_ax_l(sreg); - dsp_op_read_reg(DSP_REG_AXL0+sreg, RSI, SIGN); + dsp_op_read_reg(DSP_REG_AXL0+sreg, RCX, SIGN); // u16 axh = dsp_get_ax_h(sreg); - dsp_op_read_reg(DSP_REG_AXH0+sreg, RDI, SIGN); + dsp_op_read_reg(DSP_REG_AXH0+sreg, RAX, SIGN); // s64 prod = dsp_multiply_sub(axl, axh); multiply_sub(); // dsp_set_long_prod(prod); diff --git a/Source/Core/Core/Src/DSP/Jit/DSPJitRegCache.cpp b/Source/Core/Core/Src/DSP/Jit/DSPJitRegCache.cpp index 11c27ca6d8..9201157618 100644 --- a/Source/Core/Core/Src/DSP/Jit/DSPJitRegCache.cpp +++ b/Source/Core/Core/Src/DSP/Jit/DSPJitRegCache.cpp @@ -21,7 +21,7 @@ using namespace Gen; -static u16 *reg_ptr(int reg) { +static void *reg_ptr(int reg) { switch(reg) { case DSP_REG_AR0: case DSP_REG_AR1: @@ -64,79 +64,117 @@ static u16 *reg_ptr(int reg) { case DSP_REG_ACM0: case DSP_REG_ACM1: return &g_dsp.r.ac[reg - DSP_REG_ACM0].m; + case DSP_REG_AX0_32: + case DSP_REG_AX1_32: + return &g_dsp.r.ax[reg - DSP_REG_AX0_32].val; +#ifdef _M_X64 + case DSP_REG_ACC0_64: + case DSP_REG_ACC1_64: + return &g_dsp.r.ac[reg - DSP_REG_ACC0_64].val; + case DSP_REG_PROD_64: + return &g_dsp.r.prod.val; +#endif default: _assert_msg_(DSPLLE, 0, "cannot happen"); return NULL; } } -#define ROTATED_REG_ACCS -//#undef ROTATED_REG_ACCS +#define STATIC_REG_ACCS +//#undef STATIC_REG_ACCS DSPJitRegCache::DSPJitRegCache(DSPEmitter &_emitter) : emitter(_emitter), temporary(false), merged(false) { for(unsigned int i = 0; i < NUMXREGS; i++) { xregs[i].guest_reg = DSP_REG_STATIC; + xregs[i].pushed = false; } - xregs[RSP].guest_reg = DSP_REG_STATIC;//stack pointer + xregs[RAX].guest_reg = DSP_REG_STATIC;// reserved for MUL/DIV + xregs[RDX].guest_reg = DSP_REG_STATIC;// reserved for MUL/DIV + xregs[RCX].guest_reg = DSP_REG_STATIC;// reserved for shifts + xregs[RBX].guest_reg = DSP_REG_STATIC;//extended op backing store + xregs[RSP].guest_reg = DSP_REG_STATIC;//stack pointer + xregs[RBP].guest_reg = DSP_REG_NONE;//definitely usable in dsplle because //all external calls are protected + xregs[RSI].guest_reg = DSP_REG_NONE; + xregs[RDI].guest_reg = DSP_REG_NONE; + #ifdef _M_X64 +#ifdef STATIC_REG_ACCS xregs[R8].guest_reg = DSP_REG_STATIC;//acc0 xregs[R9].guest_reg = DSP_REG_STATIC;//acc1 +#else + xregs[R8].guest_reg = DSP_REG_NONE; + xregs[R9].guest_reg = DSP_REG_NONE; +#endif xregs[R10].guest_reg = DSP_REG_NONE; - xregs[R11].guest_reg = DSP_REG_STATIC;//&g_dsp.r - xregs[R12].guest_reg = DSP_REG_STATIC;//used for cycle counting + xregs[R11].guest_reg = DSP_REG_NONE; + xregs[R12].guest_reg = DSP_REG_NONE; xregs[R13].guest_reg = DSP_REG_NONE; xregs[R14].guest_reg = DSP_REG_NONE; xregs[R15].guest_reg = DSP_REG_NONE; #endif -#ifdef _M_X64 - acc[0].host_reg = R8; - acc[0].shift = 0; - acc[0].dirty = false; - acc[0].used = false; - acc[0].tmp_reg = INVALID_REG; - - acc[1].host_reg = R9; - acc[1].shift = 0; - acc[1].dirty = false; - acc[1].used = false; - acc[1].tmp_reg = INVALID_REG; -#endif - for(unsigned int i = 0; i < 32; i++) { + for(unsigned int i = 0; i <= DSP_REG_MAX_MEM_BACKED; i++) { regs[i].mem = reg_ptr(i); - regs[i].size = 2; - } -#ifdef _M_X64 - regs[DSP_REG_ACC0_64].mem = &g_dsp.r.ac[0].val; - regs[DSP_REG_ACC0_64].size = 8; - regs[DSP_REG_ACC1_64].mem = &g_dsp.r.ac[1].val; - regs[DSP_REG_ACC1_64].size = 8; - regs[DSP_REG_PROD_64].mem = &g_dsp.r.prod.val; - regs[DSP_REG_PROD_64].size = 8; -#endif - regs[DSP_REG_AX0_32].mem = &g_dsp.r.ax[0].val; - regs[DSP_REG_AX0_32].size = 4; - regs[DSP_REG_AX1_32].mem = &g_dsp.r.ax[1].val; - regs[DSP_REG_AX1_32].size = 4; - for(unsigned int i = 0; i < DSP_REG_MAX_MEM_BACKED+1; i++) { + regs[i].size = 0; regs[i].dirty = false; + regs[i].used = false; + regs[i].last_use_ctr = -1; + regs[i].parentReg = DSP_REG_NONE; + regs[i].shift = 0; + regs[i].host_reg = INVALID_REG; + regs[i].loc = M(regs[i].mem); } + + for(unsigned int i = 0; i < 32; i++) + regs[i].size = 2; + //special composite registers +#ifdef _M_X64 +#ifdef STATIC_REG_ACCS + regs[DSP_REG_ACC0_64].host_reg = R8; + regs[DSP_REG_ACC1_64].host_reg = R9; +#endif + for(unsigned int i = 0; i < 2; i++) { + regs[i+DSP_REG_ACC0_64].size = 8; + regs[i+DSP_REG_ACL0].parentReg = i+DSP_REG_ACC0_64; + regs[i+DSP_REG_ACM0].parentReg = i+DSP_REG_ACC0_64; + regs[i+DSP_REG_ACH0].parentReg = i+DSP_REG_ACC0_64; + regs[i+DSP_REG_ACL0].shift = 0; + regs[i+DSP_REG_ACM0].shift = 16; + regs[i+DSP_REG_ACH0].shift = 32; + } + regs[DSP_REG_PROD_64].size = 8; + regs[DSP_REG_PRODL].parentReg = DSP_REG_PROD_64; + regs[DSP_REG_PRODM].parentReg = DSP_REG_PROD_64; + regs[DSP_REG_PRODH].parentReg = DSP_REG_PROD_64; + regs[DSP_REG_PRODM2].parentReg = DSP_REG_PROD_64; + regs[DSP_REG_PRODL].shift = 0; + regs[DSP_REG_PRODM].shift = 16; + regs[DSP_REG_PRODH].shift = 32; + regs[DSP_REG_PRODM2].shift = 48; +#endif + + for(unsigned int i = 0; i < 2; i++) { + regs[i+DSP_REG_AX0_32].size = 4; + regs[i+DSP_REG_AXL0].parentReg = i+DSP_REG_AX0_32; + regs[i+DSP_REG_AXH0].parentReg = i+DSP_REG_AX0_32; + regs[i+DSP_REG_AXL0].shift = 0; + regs[i+DSP_REG_AXH0].shift = 16; + } + + use_ctr = 0; } DSPJitRegCache::DSPJitRegCache(const DSPJitRegCache &cache) : emitter(cache.emitter), temporary(true), merged(false) { memcpy(xregs,cache.xregs,sizeof(xregs)); -#ifdef _M_X64 - memcpy(acc,cache.acc,sizeof(acc)); -#endif memcpy(regs,cache.regs,sizeof(regs)); } @@ -146,9 +184,6 @@ DSPJitRegCache& DSPJitRegCache::operator=(const DSPJitRegCache &cache) _assert_msg_(DSPLLE, temporary, "register cache not temporary??"); merged = false; memcpy(xregs,cache.xregs,sizeof(xregs)); -#ifdef _M_X64 - memcpy(acc,cache.acc,sizeof(acc)); -#endif memcpy(regs,cache.regs,sizeof(regs)); return *this; @@ -159,268 +194,578 @@ DSPJitRegCache::~DSPJitRegCache() _assert_msg_(DSPLLE, !temporary || merged, "temporary cache not merged"); } -void DSPJitRegCache::flushRegs(DSPJitRegCache &cache, bool emit) -{ - cache.merged = true; - -#ifdef _M_X64 - for(unsigned int i = 0; i < 2; i++) { - if (acc[i].shift > cache.acc[i].shift) { - if (emit) - emitter.ROL(64, R(acc[i].host_reg), - Imm8(acc[i].shift-cache.acc[i].shift)); - acc[i].shift = cache.acc[i].shift; - } - if (acc[i].shift < cache.acc[i].shift) { - if (emit) - emitter.ROR(64, R(acc[i].host_reg), - Imm8(cache.acc[i].shift-acc[i].shift)); - acc[i].shift = cache.acc[i].shift; - } - } -#endif -} - void DSPJitRegCache::drop() { merged = true; } -void DSPJitRegCache::flushRegs() +void DSPJitRegCache::flushRegs(DSPJitRegCache &cache, bool emit) +{ + cache.merged = true; + + unsigned int i; + + //drop all guest register not used by cache + for(i = 0; i <= DSP_REG_MAX_MEM_BACKED; i++) { + regs[i].used = false;//used is restored later + if (regs[i].loc.IsSimpleReg() && + !cache.regs[i].loc.IsSimpleReg()) + movToMemory(i); + } + + //try to move guest regs in the wrong host reg to the correct one + int movcnt; + do { + movcnt = 0; + for(i = 0; i <= DSP_REG_MAX_MEM_BACKED; i++) { + if (cache.regs[i].loc.GetSimpleReg() != + regs[i].loc.GetSimpleReg() && + xregs[cache.regs[i].loc.GetSimpleReg()].guest_reg == + DSP_REG_NONE) { + movToHostReg(i, + cache.regs[i].loc.GetSimpleReg(), + true); + movcnt++; + } + } + } while (movcnt != 0); + + //free all host regs that are not used for the same guest reg + for(i = 0; i <= DSP_REG_MAX_MEM_BACKED; i++) { + if (cache.regs[i].loc.GetSimpleReg() != + regs[i].loc.GetSimpleReg() && + regs[i].loc.IsSimpleReg()) + movToMemory(i); + } + + //load all guest regs that are in memory and should be in host reg + for(i = 0; i <= DSP_REG_MAX_MEM_BACKED; i++) { + if (cache.regs[i].loc.IsSimpleReg()) { + movToHostReg(i, cache.regs[i].loc.GetSimpleReg(), + true); + rotateHostReg(i, cache.regs[i].shift, true); + } else if(cache.regs[i].loc.IsImm()) { + //todo: immediates? + } + regs[i].used = cache.regs[i].used; + regs[i].dirty |= cache.regs[i].dirty; + regs[i].last_use_ctr = cache.regs[i].last_use_ctr; + } + + //consistency checks + for(i = 0; i < NUMXREGS; i++) { + _assert_msg_(DSPLLE, + xregs[i].guest_reg == cache.xregs[i].guest_reg, + "cache and current xreg guest_reg mismatch for %d", i); + } + + for(i = 0; i <= DSP_REG_MAX_MEM_BACKED; i++) { + _assert_msg_(DSPLLE, + regs[i].loc.IsImm() == cache.regs[i].loc.IsImm(), + "cache and current reg loc mismatch for %x", i); + _assert_msg_(DSPLLE, + regs[i].loc.GetSimpleReg() == cache.regs[i].loc.GetSimpleReg(), + "cache and current reg loc mismatch for %x", i); + _assert_msg_(DSPLLE, + regs[i].dirty || !cache.regs[i].dirty, + "cache and current reg dirty mismatch for %x", i); + _assert_msg_(DSPLLE, + regs[i].used == cache.regs[i].used, + "cache and current reg used mismatch for %x", i); + _assert_msg_(DSPLLE, + regs[i].shift == cache.regs[i].shift, + "cache and current reg shift mismatch for %x", i); + } + + use_ctr = cache.use_ctr; +} + +void DSPJitRegCache::flushMemBackedRegs() { //also needs to undo any dynamic changes to static allocated regs //this should have the same effect as //merge(DSPJitRegCache(emitter)); -#ifdef _M_X64 -#ifdef ROTATED_REG_ACCS - for(unsigned int i = 0; i < 2; i++) { - if (acc[i].shift > 0) { - emitter.ROL(64, R(acc[i].host_reg), - Imm8(acc[i].shift)); - acc[i].shift = 0; - } - _assert_msg_(DSPLLE, !acc[i].used, - "accumulator still in use"); - if (acc[i].used) + + unsigned int i; + for(i = 0; i <= DSP_REG_MAX_MEM_BACKED; i++) { + _assert_msg_(DSPLLE, !regs[i].used, + "register %x still in use", i); + if (regs[i].used) emitter.INT3(); + if (regs[i].host_reg != INVALID_REG) { + movToHostReg(i,regs[i].host_reg,true); + rotateHostReg(i, 0, true); + } else if (regs[i].parentReg == DSP_REG_NONE) { + movToMemory(i); + } } +} + +void DSPJitRegCache::flushRegs() +{ + flushMemBackedRegs(); + + for(unsigned int i = 0; i <= DSP_REG_MAX_MEM_BACKED; i++) { + if (regs[i].host_reg != INVALID_REG) + movToMemory(i); + } + + for(unsigned int i = 0; i <= DSP_REG_MAX_MEM_BACKED; i++) { + _assert_msg_(DSPLLE, + !regs[i].loc.IsSimpleReg(), + "register %x is still a simple reg", i); + } + + _assert_msg_(DSPLLE, + xregs[RSP].guest_reg == DSP_REG_STATIC, + "wrong xreg state for %d", RSP); + _assert_msg_(DSPLLE, + xregs[RBX].guest_reg == DSP_REG_STATIC, + "wrong xreg state for %d", RBX); + _assert_msg_(DSPLLE, + xregs[RBP].guest_reg == DSP_REG_NONE, + "wrong xreg state for %d", RBP); + _assert_msg_(DSPLLE, + xregs[RSI].guest_reg == DSP_REG_NONE, + "wrong xreg state for %d", RSI); + _assert_msg_(DSPLLE, + xregs[RDI].guest_reg == DSP_REG_NONE, + "wrong xreg state for %d", RDI); +#ifdef _M_X64 +#ifdef STATIC_REG_ACCS + _assert_msg_(DSPLLE, + xregs[R8].guest_reg == DSP_REG_STATIC, + "wrong xreg state for %d", R8); + _assert_msg_(DSPLLE, + xregs[R9].guest_reg == DSP_REG_STATIC, + "wrong xreg state for %d", R9); +#else + _assert_msg_(DSPLLE, + xregs[R8].guest_reg == DSP_REG_NONE, + "wrong xreg state for %d", R8); + _assert_msg_(DSPLLE, + xregs[R9].guest_reg == DSP_REG_NONE, + "wrong xreg state for %d", R9); #endif + _assert_msg_(DSPLLE, + xregs[R10].guest_reg == DSP_REG_NONE, + "wrong xreg state for %d", R10); + _assert_msg_(DSPLLE, + xregs[R11].guest_reg == DSP_REG_NONE, + "wrong xreg state for %d", R11); + _assert_msg_(DSPLLE, + xregs[R12].guest_reg == DSP_REG_NONE, + "wrong xreg state for %d", R12); + _assert_msg_(DSPLLE, + xregs[R13].guest_reg == DSP_REG_NONE, + "wrong xreg state for %d", R13); + _assert_msg_(DSPLLE, + xregs[R14].guest_reg == DSP_REG_NONE, + "wrong xreg state for %d", R14); + _assert_msg_(DSPLLE, + xregs[R15].guest_reg == DSP_REG_NONE, + "wrong xreg state for %d", R15); #endif + + use_ctr = 0; } static u64 ebp_store; -void DSPJitRegCache::loadStaticRegs() +void DSPJitRegCache::loadRegs(bool emit) { + for(unsigned int i = 0; i <= DSP_REG_MAX_MEM_BACKED; i++) { + if (regs[i].host_reg != INVALID_REG) + movToHostReg(i,regs[i].host_reg); + } + + if (emit) { #ifdef _M_X64 -#ifdef ROTATED_REG_ACCS - emitter.MOV(64, R(R8), M(&g_dsp.r.ac[0].val)); - emitter.MOV(64, R(R9), M(&g_dsp.r.ac[1].val)); -#endif - emitter.MOV(64, M(&ebp_store), R(RBP)); + emitter.MOV(64, M(&ebp_store), R(RBP)); #else - emitter.MOV(32, M(&ebp_store), R(EBP)); + emitter.MOV(32, M(&ebp_store), R(EBP)); #endif + } } -void DSPJitRegCache::saveStaticRegs() +void DSPJitRegCache::saveRegs() { flushRegs(); + + for(unsigned int i = 0; i <= DSP_REG_MAX_MEM_BACKED; i++) { + if (regs[i].host_reg != INVALID_REG) + movToMemory(i); + } + + for(unsigned int i = 0; i <= DSP_REG_MAX_MEM_BACKED; i++) { + _assert_msg_(DSPLLE, + !regs[i].loc.IsSimpleReg(), + "register %x is still a simple reg", i); + } + #ifdef _M_X64 -#ifdef ROTATED_REG_ACCS - emitter.MOV(64, M(&g_dsp.r.ac[0].val), R(R8)); - emitter.MOV(64, M(&g_dsp.r.ac[1].val), R(R9)); -#endif emitter.MOV(64, R(RBP), M(&ebp_store)); #else emitter.MOV(32, R(EBP), M(&ebp_store)); #endif } +void DSPJitRegCache::pushRegs() { + flushMemBackedRegs(); + + for(unsigned int i = 0; i <= DSP_REG_MAX_MEM_BACKED; i++) { + if (regs[i].host_reg != INVALID_REG) + movToMemory(i); + } + + for(unsigned int i = 0; i < NUMXREGS; i++) { + if (xregs[i].guest_reg == DSP_REG_USED) { + emitter.PUSH((X64Reg)i); + xregs[i].pushed = true; + xregs[i].guest_reg = DSP_REG_NONE; + } + } + + for(unsigned int i = 0; i <= DSP_REG_MAX_MEM_BACKED; i++) { + _assert_msg_(DSPLLE, + !regs[i].loc.IsSimpleReg(), + "register %x is still a simple reg", i); + } + + for(unsigned int i = 0; i < NUMXREGS; i++) { + _assert_msg_(DSPLLE, + xregs[i].guest_reg == DSP_REG_NONE || + xregs[i].guest_reg == DSP_REG_STATIC, + "register %x is still used", i); + } + +#ifdef _M_X64 + emitter.MOV(64, R(RBP), M(&ebp_store)); +#else + emitter.MOV(32, R(EBP), M(&ebp_store)); +#endif +} + +void DSPJitRegCache::popRegs() { +#ifdef _M_X64 + emitter.MOV(64, M(&ebp_store), R(RBP)); +#else + emitter.MOV(32, M(&ebp_store), R(EBP)); +#endif + for(int i = NUMXREGS-1; i >= 0; i--) { + if (xregs[i].pushed) { + emitter.POP((X64Reg)i); + xregs[i].pushed = false; + xregs[i].guest_reg = DSP_REG_USED; + } + } + + for(unsigned int i = 0; i <= DSP_REG_MAX_MEM_BACKED; i++) { + if (regs[i].host_reg != INVALID_REG) + movToHostReg(i,regs[i].host_reg); + } +} + +X64Reg DSPJitRegCache::makeABICallSafe(X64Reg reg) { + if (reg != RBP) { + return reg; + } + + int rbp_guest = xregs[RBP].guest_reg; + xregs[RBP].guest_reg = DSP_REG_USED; + X64Reg safe = findSpillFreeXReg(); + _assert_msg_(DSPLLE, safe != INVALID_REG, "could not find register"); + if (safe == INVALID_REG) + emitter.INT3(); + xregs[RBP].guest_reg = rbp_guest; +#ifdef _M_X64 + emitter.MOV(64,R(safe),R(reg)); +#else + emitter.MOV(32,R(safe),R(reg)); +#endif + return safe; +} + +void DSPJitRegCache::movToHostReg(int reg, X64Reg host_reg, bool load) +{ + _assert_msg_(DSPLLE, reg >= 0 && reg <= DSP_REG_MAX_MEM_BACKED, + "bad register name %x", reg); + _assert_msg_(DSPLLE, regs[reg].parentReg == DSP_REG_NONE, + "register %x is proxy for %x", reg, regs[reg].parentReg); + _assert_msg_(DSPLLE, !regs[reg].used, + "moving to host reg in use guest reg %x!", reg); + X64Reg old_reg = regs[reg].loc.GetSimpleReg(); + if (old_reg == host_reg) + return; + + if (xregs[host_reg].guest_reg != DSP_REG_STATIC) + xregs[host_reg].guest_reg = reg; + + if (load) { + switch(regs[reg].size) { + case 2: + emitter.MOV(16, R(host_reg), regs[reg].loc); break; + case 4: + emitter.MOV(32, R(host_reg), regs[reg].loc); break; +#ifdef _M_X64 + case 8: + emitter.MOV(64, R(host_reg), regs[reg].loc); break; +#endif + default: + _assert_msg_(DSPLLE, 0, "unsupported memory size"); + break; + } + } + regs[reg].loc = R(host_reg); + if (old_reg != INVALID_REG && + xregs[old_reg].guest_reg != DSP_REG_STATIC) + xregs[old_reg].guest_reg = DSP_REG_NONE; +} + +void DSPJitRegCache::movToHostReg(int reg, bool load) +{ + _assert_msg_(DSPLLE, reg >= 0 && reg <= DSP_REG_MAX_MEM_BACKED, + "bad register name %x", reg); + _assert_msg_(DSPLLE, regs[reg].parentReg == DSP_REG_NONE, + "register %x is proxy for %x", reg, regs[reg].parentReg); + _assert_msg_(DSPLLE, !regs[reg].used, + "moving to host reg in use guest reg %x!", reg); + + if (regs[reg].loc.IsSimpleReg()) + return; + X64Reg tmp; + if (regs[reg].host_reg != INVALID_REG) + tmp = regs[reg].host_reg; + else + tmp = findSpillFreeXReg(); + if (tmp == INVALID_REG) + return; + movToHostReg(reg, tmp, load); +} + +void DSPJitRegCache::rotateHostReg(int reg, int shift, bool emit) +{ + _assert_msg_(DSPLLE, reg >= 0 && reg <= DSP_REG_MAX_MEM_BACKED, + "bad register name %x", reg); + _assert_msg_(DSPLLE, regs[reg].parentReg == DSP_REG_NONE, + "register %x is proxy for %x", reg, regs[reg].parentReg); + _assert_msg_(DSPLLE, regs[reg].loc.IsSimpleReg(), + "register %x is not a simple reg", reg); + _assert_msg_(DSPLLE, !regs[reg].used, + "rotating in use guest reg %x!", reg); + if (shift > regs[reg].shift && emit) { + switch(regs[reg].size) { + case 2: + emitter.ROR(16, regs[reg].loc, + Imm8(shift - regs[reg].shift)); + break; + case 4: + emitter.ROR(32, regs[reg].loc, + Imm8(shift - regs[reg].shift)); + break; +#ifdef _M_X64 + case 8: + emitter.ROR(64, regs[reg].loc, + Imm8(shift - regs[reg].shift)); + break; +#endif + } + } else if (shift < regs[reg].shift && emit) { + switch(regs[reg].size) { + case 2: + emitter.ROL(16, regs[reg].loc, + Imm8(regs[reg].shift - shift)); + break; + case 4: + emitter.ROL(32, regs[reg].loc, + Imm8(regs[reg].shift - shift)); + break; +#ifdef _M_X64 + case 8: + emitter.ROL(64, regs[reg].loc, + Imm8(regs[reg].shift - shift)); + break; +#endif + } + } + regs[reg].shift = shift; +} + +void DSPJitRegCache::movToMemory(int reg) +{ + _assert_msg_(DSPLLE, reg >= 0 && reg <= DSP_REG_MAX_MEM_BACKED, + "bad register name %x", reg); + _assert_msg_(DSPLLE, regs[reg].parentReg == DSP_REG_NONE, + "register %x is proxy for %x", reg, regs[reg].parentReg); + _assert_msg_(DSPLLE, !regs[reg].used, + "moving to memory in use guest reg %x!", reg); + + if (regs[reg].used) + emitter.INT3(); + + if (!regs[reg].loc.IsSimpleReg() && + !regs[reg].loc.IsImm()) + return; + + //but first, check for any needed rotations + if (regs[reg].loc.IsSimpleReg()) + rotateHostReg(reg, 0, true); + else {} //todo: immediates? + + _assert_msg_(DSPLLE, regs[reg].shift == 0, "still shifted??"); + + //move to mem + OpArg tmp = M(regs[reg].mem); + + if (regs[reg].dirty) { + switch(regs[reg].size) { + case 2: + emitter.MOV(16, tmp, regs[reg].loc); break; + case 4: + emitter.MOV(32, tmp, regs[reg].loc); break; +#ifdef _M_X64 + case 8: + emitter.MOV(64, tmp, regs[reg].loc); break; +#endif + default: + _assert_msg_(DSPLLE, 0, "unsupported memory size"); + break; + } + regs[reg].dirty = false; + } + + if (regs[reg].loc.IsSimpleReg()) { + X64Reg hostreg = regs[reg].loc.GetSimpleReg(); + if (xregs[hostreg].guest_reg != DSP_REG_STATIC) + xregs[hostreg].guest_reg = DSP_REG_NONE; + } + + regs[reg].last_use_ctr = -1; + regs[reg].loc = tmp; +} + void DSPJitRegCache::getReg(int reg, OpArg &oparg, bool load) { + int real_reg; + int shift; + if (regs[reg].parentReg != DSP_REG_NONE) { + real_reg = regs[reg].parentReg; + + // always load and rotate since we need the other + // parts of the register + load = true; + + shift = regs[reg].shift; + } else { + real_reg = reg; + shift = 0; + } + + _assert_msg_(DSPLLE, !regs[real_reg].used, + "register %x already in use", real_reg); + + if (regs[real_reg].used) + emitter.INT3(); + // no nead to actually emit code for load or rotate if caller doesn't + // use the contents, but see above for a reason to force the load + movToHostReg(real_reg, load); + //todo: actually handle INVALID_REG + _assert_msg_(DSPLLE, regs[real_reg].loc.IsSimpleReg(), + "did not get host reg for %x", reg); + rotateHostReg(real_reg, shift, load); + oparg = regs[real_reg].loc; + regs[real_reg].used = true; + + //do some register specific fixup switch(reg) { #ifdef _M_X64 -#ifdef ROTATED_REG_ACCS - case DSP_REG_ACH0: - case DSP_REG_ACH1: - { - _assert_msg_(DSPLLE, !acc[reg-DSP_REG_ACH0].used, - "accumulator already in use"); - if (acc[reg-DSP_REG_ACH0].used) - emitter.INT3(); - oparg = R(acc[reg-DSP_REG_ACH0].host_reg); - if (acc[reg-DSP_REG_ACH0].shift < 32) { - emitter.ROR(64, oparg, Imm8(32-acc[reg-DSP_REG_ACH0].shift)); - acc[reg-DSP_REG_ACH0].shift = 32; - } - - acc[reg-DSP_REG_ACH0].used = true; - } - break; - case DSP_REG_ACM0: - case DSP_REG_ACM1: - { - _assert_msg_(DSPLLE, !acc[reg-DSP_REG_ACM0].used, - "accumulator already in use"); - if (acc[reg-DSP_REG_ACM0].used) - emitter.INT3(); - oparg = R(acc[reg-DSP_REG_ACM0].host_reg); - if (acc[reg-DSP_REG_ACM0].shift < 16) { - emitter.ROR(64, oparg, Imm8(16-acc[reg-DSP_REG_ACM0].shift)); - acc[reg-DSP_REG_ACM0].shift = 16; - } - if (acc[reg-DSP_REG_ACM0].shift > 16) { - emitter.ROL(64, oparg, Imm8(acc[reg-DSP_REG_ACM0].shift-16)); - acc[reg-DSP_REG_ACM0].shift = 16; - } - acc[reg-DSP_REG_ACM0].used = true; - } - break; - case DSP_REG_ACL0: - case DSP_REG_ACL1: - { - _assert_msg_(DSPLLE, !acc[reg-DSP_REG_ACL0].used, - "accumulator already in use"); - if (acc[reg-DSP_REG_ACL0].used) - emitter.INT3(); - oparg = R(acc[reg-DSP_REG_ACL0].host_reg); - if (acc[reg-DSP_REG_ACL0].shift > 0) { - emitter.ROL(64, oparg, Imm8(acc[reg-DSP_REG_ACL0].shift)); - acc[reg-DSP_REG_ACL0].shift = 0; - } - acc[reg-DSP_REG_ACL0].used = true; - } - break; case DSP_REG_ACC0_64: case DSP_REG_ACC1_64: { - if (acc[reg-DSP_REG_ACC0_64].used) - emitter.INT3(); - _assert_msg_(DSPLLE, !acc[reg-DSP_REG_ACC0_64].used, - "accumulator already in use"); - oparg = R(acc[reg-DSP_REG_ACC0_64].host_reg); if (load) { - if (acc[reg-DSP_REG_ACC0_64].shift > 0) { - emitter.ROL(64, oparg, Imm8(acc[reg-DSP_REG_ACC0_64].shift)); - } + //need to do this because interpreter only does 48 bits + //(and putReg does the same) emitter.SHL(64, oparg, Imm8(64-40));//sign extend emitter.SAR(64, oparg, Imm8(64-40)); } - //don't bother to rotate if caller replaces all data - acc[reg-DSP_REG_ACC0_64].shift = 0; - acc[reg-DSP_REG_ACC0_64].used = true; } break; -#endif #endif default: - { -/* - getFreeXReg(reg[reg].host_reg); - X64Reg tmp = reg[reg].host_reg; - oparg = R(tmp); - - if (load) { - u16 *regp = reg_ptr(reg); - emitter.MOV(16, oparg, M(regp)); - } -*/ - oparg = regs[reg].loc; //when loading/storing from/to mem, need to consider regs[reg].size - } - break; + break; } } void DSPJitRegCache::putReg(int reg, bool dirty) { + int real_reg = reg; + if (regs[reg].parentReg != DSP_REG_NONE) + real_reg = regs[reg].parentReg; + OpArg oparg = regs[real_reg].loc; switch(reg) { -#ifdef _M_X64 -#ifdef ROTATED_REG_ACCS case DSP_REG_ACH0: case DSP_REG_ACH1: { - if (dirty) { - if (acc[reg-DSP_REG_ACH0].shift > 0) { - emitter.ROL(64, R(acc[reg-DSP_REG_ACH0].host_reg), - Imm8(acc[reg-DSP_REG_ACH0].shift)); - acc[reg-DSP_REG_ACH0].shift = 0; + //no need to extend to full 64bit here until interpreter + //uses that + if (oparg.IsSimpleReg()) { + //register is already shifted correctly + //(if at all) + + // sign extend from the bottom 8 bits. +#ifndef _M_X64 + //cannot use movsx with SPL, BPL, SIL or DIL + //on 32 bit + if (oparg.GetSimpleReg() == RSP || + oparg.GetSimpleReg() == RBP || + oparg.GetSimpleReg() == RSI || + oparg.GetSimpleReg() == RDI) + { + emitter.SHL(16,oparg,Imm8(8)); + emitter.SAR(16,oparg,Imm8(8)); + } + else +#endif + { + emitter.MOVSX(16, 8, + oparg.GetSimpleReg(), + oparg); + } + } else if (oparg.IsImm()) { + //todo: immediates? + } else { + //this works on the memory, so use reg instead + //of real_reg, since it has the right loc + X64Reg tmp; + getFreeXReg(tmp); + // sign extend from the bottom 8 bits. + emitter.MOVSX(16, 8, tmp, regs[reg].loc); + emitter.MOV(16, regs[reg].loc, R(tmp)); + putXReg(tmp); } - emitter.SHL(64, R(acc[reg-DSP_REG_ACH0].host_reg), Imm8(64-40));//sign extend - emitter.SAR(64, R(acc[reg-DSP_REG_ACH0].host_reg), Imm8(64-40)); } - acc[reg-DSP_REG_ACH0].used = false; } break; - case DSP_REG_ACM0: - case DSP_REG_ACM1: - { - acc[reg-DSP_REG_ACM0].used = false; - } - break; - case DSP_REG_ACL0: - case DSP_REG_ACL1: - acc[reg-DSP_REG_ACL0].used = false; - break; +#ifdef _M_X64 case DSP_REG_ACC0_64: case DSP_REG_ACC1_64: { if (dirty) { - OpArg _reg = R(acc[reg-DSP_REG_ACC0_64].host_reg); - - emitter.SHL(64, _reg, Imm8(64-40));//sign extend - emitter.SAR(64, _reg, Imm8(64-40)); + emitter.SHL(64, oparg, Imm8(64-40));//sign extend + emitter.SAR(64, oparg, Imm8(64-40)); } - acc[reg-DSP_REG_ACC0_64].used = false; - } - break; -#else - case DSP_REG_ACH0: - case DSP_REG_ACH1: - { - //need to fix in memory for now. - u16 *regp = reg_ptr(reg); - OpArg mem; - mem = M(regp); - X64Reg tmp; - getFreeXReg(tmp); - // sign extend from the bottom 8 bits. - emitter.MOVSX(16, 8, tmp, mem); - emitter.MOV(16, mem, R(tmp)); - putXReg(tmp); - } - break; -#endif -#else - case DSP_REG_ACH0: - case DSP_REG_ACH1: - { - //need to fix in memory for now. - u16 *regp = reg_ptr(reg); - OpArg mem; - mem = M(regp); - X64Reg tmp; - getFreeXReg(tmp); - // sign extend from the bottom 8 bits. - emitter.MOVSX(16, 8, tmp, mem); - emitter.MOV(16, mem, R(tmp)); - putXReg(tmp); } break; #endif default: - { -/* - X64Reg tmp = reg[reg].host_reg; - - if(dirty) { - u16 *regp = reg_ptr(reg); - emitter.MOV(16, M(dregp), R(tmp)); - } -*/ + break; } - break; + regs[real_reg].used = false; + if (regs[real_reg].loc.IsSimpleReg()) { + regs[real_reg].dirty |= dirty; + regs[real_reg].last_use_ctr = use_ctr; + use_ctr++; } } @@ -481,39 +826,101 @@ void DSPJitRegCache::writeReg(int dreg, OpArg arg) putReg(dreg, true); } +//ordered in order of prefered use +//not all of these are actually available +static X64Reg alloc_order[] = { +#ifdef _M_X64 + R8,R9,R10,R11,R12,R13,R14,R15,RSI,RDI,RBX,RCX,RDX,RAX,RBP +#else + ESI,EDI,EBX,ECX,EDX,EAX,EBP +#endif +}; + X64Reg DSPJitRegCache::spillXReg() { - //todo: implement + unsigned int i; + unsigned int max_use_ctr_diff = 0; + X64Reg least_recent_use_reg = INVALID_REG; + for(i = 0; i < sizeof(alloc_order)/sizeof(alloc_order[0]); i++) { + X64Reg reg = alloc_order[i]; + if (xregs[reg].guest_reg <= DSP_REG_MAX_MEM_BACKED && + !regs[xregs[reg].guest_reg].used) { + unsigned int use_ctr_diff = use_ctr - + regs[xregs[reg].guest_reg].last_use_ctr; + if (use_ctr_diff >= max_use_ctr_diff) { + max_use_ctr_diff = use_ctr_diff; + least_recent_use_reg = reg; + } + } + } + + if (least_recent_use_reg != INVALID_REG) { + movToMemory(xregs[least_recent_use_reg].guest_reg); + return least_recent_use_reg; + } + + //just choose one. + for(i = 0; i < sizeof(alloc_order)/sizeof(alloc_order[0]); i++) { + X64Reg reg = alloc_order[i]; + if (xregs[reg].guest_reg <= DSP_REG_MAX_MEM_BACKED && + !regs[xregs[reg].guest_reg].used) { + movToMemory(xregs[reg].guest_reg); + return reg; + } + } + return INVALID_REG; } void DSPJitRegCache::spillXReg(X64Reg reg) { - //todo: implement + if (xregs[reg].guest_reg <= DSP_REG_MAX_MEM_BACKED) { + _assert_msg_(DSPLLE, !regs[xregs[reg].guest_reg].used, + "to be spilled host reg %x(guest reg %x) still in use!", + reg, xregs[reg].guest_reg); + movToMemory(xregs[reg].guest_reg); + } else { + _assert_msg_(DSPLLE, xregs[reg].guest_reg == DSP_REG_NONE, + "to be spilled host reg %x still in use!", + reg); + } } X64Reg DSPJitRegCache::findFreeXReg() { - int i; - for(i = 0; i < NUMXREGS; i++) { - if (xregs[i].guest_reg == DSP_REG_NONE) { - return (X64Reg)i; + unsigned int i; + for(i = 0; i < sizeof(alloc_order)/sizeof(alloc_order[0]); i++) { + if (xregs[alloc_order[i]].guest_reg == DSP_REG_NONE) { + return alloc_order[i]; } } return INVALID_REG; } -void DSPJitRegCache::getFreeXReg(X64Reg ®) +X64Reg DSPJitRegCache::findSpillFreeXReg() { - reg = findFreeXReg(); + X64Reg reg = findFreeXReg(); if (reg == INVALID_REG) reg = spillXReg(); + return reg; +} + +void DSPJitRegCache::getFreeXReg(X64Reg ®) +{ + reg = findSpillFreeXReg(); + _assert_msg_(DSPLLE, reg != INVALID_REG, "could not find register"); + if (reg == INVALID_REG) + emitter.INT3(); xregs[reg].guest_reg = DSP_REG_USED; } void DSPJitRegCache::getXReg(X64Reg reg) { + if (xregs[reg].guest_reg == DSP_REG_STATIC) { + ERROR_LOG(DSPLLE, "Trying to get statically used XReg %d", reg); + return; + } if (xregs[reg].guest_reg != DSP_REG_NONE) spillXReg(reg); _assert_msg_(DSPLLE, xregs[reg].guest_reg != DSP_REG_NONE, "register already in use"); @@ -522,6 +929,10 @@ void DSPJitRegCache::getXReg(X64Reg reg) void DSPJitRegCache::putXReg(X64Reg reg) { + if (xregs[reg].guest_reg == DSP_REG_STATIC) { + ERROR_LOG(DSPLLE, "Trying to put statically used XReg %d", reg); + return; + } _assert_msg_(DSPLLE, xregs[reg].guest_reg == DSP_REG_USED, "putXReg without get(Free)XReg"); xregs[reg].guest_reg = DSP_REG_NONE; diff --git a/Source/Core/Core/Src/DSP/Jit/DSPJitRegCache.h b/Source/Core/Core/Src/DSP/Jit/DSPJitRegCache.h index a230947c11..e9ec61d123 100644 --- a/Source/Core/Core/Src/DSP/Jit/DSPJitRegCache.h +++ b/Source/Core/Core/Src/DSP/Jit/DSPJitRegCache.h @@ -23,12 +23,16 @@ class DSPEmitter; enum DSPJitRegSpecial { - DSP_REG_ACC0_64 =32, - DSP_REG_ACC1_64 =33, - DSP_REG_AX0_32 =34, - DSP_REG_AX1_32 =35, + DSP_REG_AX0_32 =32, + DSP_REG_AX1_32 =33, +#ifdef _M_X64 + DSP_REG_ACC0_64 =34, + DSP_REG_ACC1_64 =35, DSP_REG_PROD_64 =36, DSP_REG_MAX_MEM_BACKED = 36, +#else + DSP_REG_MAX_MEM_BACKED = 33, +#endif DSP_REG_USED =253, DSP_REG_STATIC =254, @@ -50,24 +54,28 @@ private: struct X64CachedReg { int guest_reg; //including DSPJitRegSpecial + bool pushed; }; struct DynamicReg { Gen::OpArg loc; void *mem; size_t size; bool dirty; - }; - -#ifdef _M_X64 - //when there is a way to do this efficiently in x86, uncondition - struct { - Gen::X64Reg host_reg; - int shift; - bool dirty; bool used; - Gen::X64Reg tmp_reg; - } acc[2]; -#endif + int last_use_ctr; + int parentReg; + int shift;//current shift if parentReg == DSP_REG_NONE + //otherwise the shift this part can be found at + Gen::X64Reg host_reg; +/* todo: + + drop sameReg + + add parentReg + + add shift: + - if parentReg != DSP_REG_NONE, this is the shift where this + register is found in the parentReg + - if parentReg == DSP_REG_NONE, this is the current shift _state_ + */ + }; DynamicReg regs[DSP_REG_MAX_MEM_BACKED+1]; X64CachedReg xregs[NUMXREGS]; @@ -75,11 +83,21 @@ private: DSPEmitter &emitter; bool temporary; bool merged; + + int use_ctr; private: //find a free host reg Gen::X64Reg findFreeXReg(); Gen::X64Reg spillXReg(); + Gen::X64Reg findSpillFreeXReg(); void spillXReg(Gen::X64Reg reg); + + void movToHostReg(int reg, Gen::X64Reg host_reg, bool load); + void movToHostReg(int reg, bool load); + void rotateHostReg(int reg, int shift, bool emit); + void movToMemory(int reg); + void flushMemBackedRegs(); + public: DSPJitRegCache(DSPEmitter &_emitter); @@ -147,10 +165,19 @@ public: //prepare state so that another flushed DSPJitRegCache can take over void flushRegs(); - void loadStaticRegs();//load statically allocated regs from memory - void saveStaticRegs();//save statically allocated regs to memory + void loadRegs(bool emit=true);//load statically allocated regs from memory + void saveRegs();//save statically allocated regs to memory + + void pushRegs();//save registers before abi call + void popRegs();//restore registers after abi call + + //returns a register with the same contents as reg that is safe + //to use through saveStaticRegs and for ABI-calls + Gen::X64Reg makeABICallSafe(Gen::X64Reg reg); //gives no SCALE_RIP with abs(offset) >= 0x80000000 + //32/64 bit writes allowed when the register has a _64 or _32 suffix + //only 16 bit writes allowed without any suffix. void getReg(int reg, Gen::OpArg &oparg, bool load = true); //done with all usages of OpArg above void putReg(int reg, bool dirty = true); diff --git a/Source/Core/Core/Src/DSP/Jit/DSPJitUtil.cpp b/Source/Core/Core/Src/DSP/Jit/DSPJitUtil.cpp index da0857e188..b9e166f698 100644 --- a/Source/Core/Core/Src/DSP/Jit/DSPJitUtil.cpp +++ b/Source/Core/Core/Src/DSP/Jit/DSPJitUtil.cpp @@ -36,32 +36,32 @@ using namespace Gen; // EAX = g_dsp.r.ar[reg] // EDX = g_dsp.r.wr[reg] -// EDI = temp -// ECX = temp void DSPEmitter::increment_addr_reg(int reg) { OpArg ar_reg; OpArg wr_reg; gpr.getReg(DSP_REG_WR0+reg,wr_reg); MOVZX(32, 16, EDX, wr_reg); - gpr.putReg(DSP_REG_WR0+reg); + gpr.putReg(DSP_REG_WR0+reg, false); gpr.getReg(DSP_REG_AR0+reg,ar_reg); MOVZX(32, 16, EAX, ar_reg); - + X64Reg tmp1; + gpr.getFreeXReg(tmp1); //u32 nar = ar + 1; - MOV(32, R(EDI), R(EAX)); + MOV(32, R(tmp1), R(EAX)); ADD(32, R(EAX), Imm8(1)); // if ((nar ^ ar) > ((wr | 1) << 1)) // nar -= wr + 1; - XOR(32, R(EDI), R(EAX)); - LEA(32, ECX, MComplex(EDX, EDX, 1, 0)); + XOR(32, R(tmp1), R(EAX)); + LEA(32, ECX, MRegSum(EDX, EDX)); OR(32, R(ECX), Imm8(2)); - CMP(32, R(EDI), R(ECX)); + CMP(32, R(tmp1), R(ECX)); FixupBranch nowrap = J_CC(CC_BE); SUB(16, R(AX), R(DX)); SUB(16, R(AX), Imm8(1)); SetJumpTarget(nowrap); + gpr.putXReg(tmp1); // g_dsp.r.ar[reg] = nar; MOV(16, ar_reg, R(AX)); @@ -70,171 +70,173 @@ void DSPEmitter::increment_addr_reg(int reg) // EAX = g_dsp.r.ar[reg] // EDX = g_dsp.r.wr[reg] -// EDI = temp -// ECX = temp void DSPEmitter::decrement_addr_reg(int reg) { OpArg ar_reg; OpArg wr_reg; gpr.getReg(DSP_REG_WR0+reg,wr_reg); MOVZX(32, 16, EDX, wr_reg); - gpr.putReg(DSP_REG_WR0+reg); + gpr.putReg(DSP_REG_WR0+reg, false); gpr.getReg(DSP_REG_AR0+reg,ar_reg); MOVZX(32, 16, EAX, ar_reg); + X64Reg tmp1; + gpr.getFreeXReg(tmp1); // u32 nar = ar + wr; // edi = nar - LEA(32, EDI, MComplex(EAX, EDX, 1, 0)); + LEA(32, tmp1, MRegSum(EAX, EDX)); // if (((nar ^ ar) & ((wr | 1) << 1)) > wr) // nar -= wr + 1; - XOR(32, R(EAX), R(EDI)); - LEA(32, ECX, MComplex(EDX, EDX, 1, 0)); + XOR(32, R(EAX), R(tmp1)); + LEA(32, ECX, MRegSum(EDX, EDX)); OR(32, R(ECX), Imm8(2)); AND(32, R(EAX), R(ECX)); CMP(32, R(EAX), R(EDX)); FixupBranch nowrap = J_CC(CC_BE); - SUB(16, R(DI), R(DX)); - SUB(16, R(DI), Imm8(1)); - SetJumpTarget(nowrap); + SUB(16, R(tmp1), R(DX)); + SUB(16, R(tmp1), Imm8(1)); + SetJumpTarget(nowrap); // g_dsp.r.ar[reg] = nar; - MOV(16, ar_reg, R(DI)); + MOV(16, ar_reg, R(tmp1)); gpr.putReg(DSP_REG_AR0+reg); + gpr.putXReg(tmp1); } // Increase addr register according to the correspond ix register // EAX = g_dsp.r.ar[reg] // EDX = g_dsp.r.wr[reg] -// ESI = g_dsp.r.ix[reg] -// ECX = temp -// EDI = temp -void DSPEmitter::increase_addr_reg(int reg) -{ +// ECX = g_dsp.r.ix[reg] +void DSPEmitter::increase_addr_reg(int reg, int _ix_reg) +{ OpArg ar_reg; OpArg wr_reg; OpArg ix_reg; gpr.getReg(DSP_REG_WR0+reg,wr_reg); - gpr.getReg(DSP_REG_IX0+reg,ix_reg); MOVZX(32, 16, EDX, wr_reg); - MOVSX(32, 16, ESI, ix_reg); - gpr.putReg(DSP_REG_WR0+reg); - gpr.putReg(DSP_REG_IX0+reg); + gpr.putReg(DSP_REG_WR0+reg, false); + gpr.getReg(DSP_REG_IX0+_ix_reg,ix_reg); + MOVSX(32, 16, ECX, ix_reg); + gpr.putReg(DSP_REG_IX0+_ix_reg, false); gpr.getReg(DSP_REG_AR0+reg,ar_reg); MOVZX(32, 16, EAX, ar_reg); - + + X64Reg tmp1; + gpr.getFreeXReg(tmp1); //u32 nar = ar + ix; //edi = nar - LEA(32, EDI, MComplex(EAX, ESI, 1, 0)); + LEA(32, tmp1, MRegSum(EAX, ECX)); //u32 dar = (nar ^ ar ^ ix) & ((wr | 1) << 1); //eax = dar - XOR(32, R(EAX), R(ESI)); - XOR(32, R(EAX), R(EDI)); - LEA(32, ECX, MComplex(EDX, EDX, 1, 0)); + XOR(32, R(EAX), R(ECX)); + XOR(32, R(EAX), R(tmp1)); + LEA(32, ECX, MRegSum(EDX, EDX)); OR(32, R(ECX), Imm8(2)); AND(32, R(EAX), R(ECX)); //if (ix >= 0) - TEST(32, R(ESI), R(ESI)); + TEST(32, R(ECX), R(ECX)); FixupBranch negative = J_CC(CC_S); //if (dar > wr) CMP(32, R(EAX), R(EDX)); FixupBranch done = J_CC(CC_BE); //nar -= wr + 1; - SUB(16, R(DI), R(DX)); - SUB(16, R(DI), Imm8(1)); + SUB(16, R(tmp1), R(DX)); + SUB(16, R(tmp1), Imm8(1)); FixupBranch done2 = J(); //else SetJumpTarget(negative); //if ((((nar + wr + 1) ^ nar) & dar) <= wr) - LEA(32, ECX, MComplex(EDI, EDX, 1, 1)); - XOR(32, R(ECX), R(EDI)); + LEA(32, ECX, MComplex(tmp1, EDX, 1, 1)); + XOR(32, R(ECX), R(tmp1)); AND(32, R(ECX), R(EAX)); CMP(32, R(ECX), R(EDX)); FixupBranch done3 = J_CC(CC_A); //nar += wr + 1; - LEA(32, EDI, MComplex(EDI, EDX, 1, 1)); + LEA(32, tmp1, MComplex(tmp1, EDX, 1, 1)); SetJumpTarget(done); SetJumpTarget(done2); SetJumpTarget(done3); // g_dsp.r.ar[reg] = nar; - MOV(16, ar_reg, R(DI)); + MOV(16, ar_reg, R(tmp1)); gpr.putReg(DSP_REG_AR0+reg); + gpr.putXReg(tmp1); } // Decrease addr register according to the correspond ix register // EAX = g_dsp.r.ar[reg] // EDX = g_dsp.r.wr[reg] -// ESI = g_dsp.r.ix[reg] -// ECX = temp -// EDI = temp +// ECX = g_dsp.r.ix[reg] void DSPEmitter::decrease_addr_reg(int reg) { OpArg ar_reg; OpArg wr_reg; OpArg ix_reg; gpr.getReg(DSP_REG_WR0+reg,wr_reg); - gpr.getReg(DSP_REG_IX0+reg,ix_reg); MOVZX(32, 16, EDX, wr_reg); - MOVSX(32, 16, ESI, ix_reg); - gpr.putReg(DSP_REG_WR0+reg); - gpr.putReg(DSP_REG_IX0+reg); + gpr.putReg(DSP_REG_WR0+reg, false); + gpr.getReg(DSP_REG_IX0+reg,ix_reg); + MOVSX(32, 16, ECX, ix_reg); + gpr.putReg(DSP_REG_IX0+reg, false); gpr.getReg(DSP_REG_AR0+reg,ar_reg); MOVZX(32, 16, EAX, ar_reg); - NOT(32, R(ESI)); //esi = ~ix + NOT(32, R(ECX)); //esi = ~ix + X64Reg tmp1; + gpr.getFreeXReg(tmp1); //u32 nar = ar - ix; (ar + ~ix + 1) - LEA(32, EDI, MComplex(EAX, ESI, 1, 1)); + LEA(32, tmp1, MComplex(EAX, ECX, 1, 1)); //u32 dar = (nar ^ ar ^ ~ix) & ((wr | 1) << 1); //eax = dar - XOR(32, R(EAX), R(ESI)); - XOR(32, R(EAX), R(EDI)); - LEA(32, ECX, MComplex(EDX, EDX, 1, 0)); + XOR(32, R(EAX), R(ECX)); + XOR(32, R(EAX), R(tmp1)); + LEA(32, ECX, MRegSum(EDX, EDX)); OR(32, R(ECX), Imm8(2)); AND(32, R(EAX), R(ECX)); //if ((u32)ix > 0xFFFF8000) ==> (~ix < 0x00007FFF) - CMP(32, R(ESI), Imm32(0x00007FFF)); + CMP(32, R(ECX), Imm32(0x00007FFF)); FixupBranch positive = J_CC(CC_AE); //if (dar > wr) CMP(32, R(EAX), R(EDX)); FixupBranch done = J_CC(CC_BE); //nar -= wr + 1; - SUB(16, R(DI), R(DX)); - SUB(16, R(DI), Imm8(1)); + SUB(16, R(tmp1), R(DX)); + SUB(16, R(tmp1), Imm8(1)); FixupBranch done2 = J(); //else SetJumpTarget(positive); //if ((((nar + wr + 1) ^ nar) & dar) <= wr) - LEA(32, ECX, MComplex(EDI, EDX, 1, 1)); - XOR(32, R(ECX), R(EDI)); + LEA(32, ECX, MComplex(tmp1, EDX, 1, 1)); + XOR(32, R(ECX), R(tmp1)); AND(32, R(ECX), R(EAX)); CMP(32, R(ECX), R(EDX)); FixupBranch done3 = J_CC(CC_A); //nar += wr + 1; - LEA(32, EDI, MComplex(EDI, EDX, 1, 1)); + LEA(32, tmp1, MComplex(tmp1, EDX, 1, 1)); SetJumpTarget(done); SetJumpTarget(done2); SetJumpTarget(done3); //return nar - MOV(16, ar_reg, R(DI)); + MOV(16, ar_reg, R(tmp1)); gpr.putReg(DSP_REG_AR0+reg); + gpr.putXReg(tmp1); } // EAX - destination address -// ECX - value -// ESI - Base of dram -void DSPEmitter::dmem_write() +// ECX - Base of dram +void DSPEmitter::dmem_write(X64Reg value) { // if (saddr == 0) CMP(16, R(EAX), Imm16(0x0fff)); @@ -243,45 +245,47 @@ void DSPEmitter::dmem_write() // g_dsp.dram[addr & DSP_DRAM_MASK] = val; AND(16, R(EAX), Imm16(DSP_DRAM_MASK)); #ifdef _M_X64 - MOV(64, R(ESI), ImmPtr(g_dsp.dram)); + MOV(64, R(ECX), ImmPtr(g_dsp.dram)); #else - MOV(32, R(ESI), ImmPtr(g_dsp.dram)); + MOV(32, R(ECX), ImmPtr(g_dsp.dram)); #endif - MOV(16, MComplex(ESI, EAX, 2, 0), R(ECX)); + MOV(16, MComplex(ECX, EAX, 2, 0), R(value)); - FixupBranch end = J(); + FixupBranch end = J(true); // else if (saddr == 0xf) SetJumpTarget(ifx); // Does it mean gdsp_ifx_write needs u32 rather than u16? DSPJitRegCache c(gpr); - SaveDSPRegs(); - ABI_CallFunctionRR((void *)gdsp_ifx_write, EAX, ECX); - LoadDSPRegs(); + X64Reg abisafereg = gpr.makeABICallSafe(value); + gpr.pushRegs(); + ABI_CallFunctionRR((void *)gdsp_ifx_write, EAX, abisafereg); + gpr.popRegs(); gpr.flushRegs(c); SetJumpTarget(end); } -// ECX - value -void DSPEmitter::dmem_write_imm(u16 address) +void DSPEmitter::dmem_write_imm(u16 address, X64Reg value) { switch (address >> 12) { case 0x0: // 0xxx DRAM #ifdef _M_IX86 // All32 - MOV(16, M(&g_dsp.dram[address & DSP_DRAM_MASK]), R(ECX)); + MOV(16, M(&g_dsp.dram[address & DSP_DRAM_MASK]), R(value)); #else MOV(64, R(RDX), ImmPtr(g_dsp.dram)); - MOV(16, MDisp(RDX, (address & DSP_DRAM_MASK)*2), R(ECX)); + MOV(16, MDisp(RDX, (address & DSP_DRAM_MASK)*2), R(value)); #endif break; case 0xf: // Fxxx HW regs + { MOV(16, R(EAX), Imm16(address)); - SaveDSPRegs(); - ABI_CallFunctionRR((void *)gdsp_ifx_write, EAX, ECX); - LoadDSPRegs(); + X64Reg abisafereg = gpr.makeABICallSafe(value); + gpr.pushRegs(); + ABI_CallFunctionRR((void *)gdsp_ifx_write, EAX, abisafereg); + gpr.popRegs(); break; - + } default: // Unmapped/non-existing memory ERROR_LOG(DSPLLE, "%04x DSP ERROR: Write to UNKNOWN (%04x) memory", g_dsp.pc, address); @@ -289,81 +293,80 @@ void DSPEmitter::dmem_write_imm(u16 address) } } -// In: ECX - the address to read +// In: (address) - the address to read // Out: EAX - the result of the read (used by caller) -// ESI - Base -void DSPEmitter::imem_read() +// ECX - Base +void DSPEmitter::imem_read(X64Reg address) { // if (addr == 0) - CMP(16, R(ECX), Imm16(0x0fff)); + CMP(16, R(address), Imm16(0x0fff)); FixupBranch irom = J_CC(CC_A); // return g_dsp.iram[addr & DSP_IRAM_MASK]; - AND(16, R(ECX), Imm16(DSP_IRAM_MASK)); + AND(16, R(address), Imm16(DSP_IRAM_MASK)); #ifdef _M_X64 - MOV(64, R(ESI), ImmPtr(g_dsp.iram)); + MOV(64, R(ECX), ImmPtr(g_dsp.iram)); #else - MOV(32, R(ESI), ImmPtr(g_dsp.iram)); + MOV(32, R(ECX), ImmPtr(g_dsp.iram)); #endif - MOV(16, R(EAX), MComplex(ESI, ECX, 2, 0)); + MOV(16, R(EAX), MComplex(ECX, address, 2, 0)); FixupBranch end = J(); SetJumpTarget(irom); // else if (addr == 0x8) // return g_dsp.irom[addr & DSP_IROM_MASK]; - AND(16, R(ECX), Imm16(DSP_IROM_MASK)); + AND(16, R(address), Imm16(DSP_IROM_MASK)); #ifdef _M_X64 - MOV(64, R(ESI), ImmPtr(g_dsp.irom)); + MOV(64, R(ECX), ImmPtr(g_dsp.irom)); #else - MOV(32, R(ESI), ImmPtr(g_dsp.irom)); + MOV(32, R(ECX), ImmPtr(g_dsp.irom)); #endif - MOV(16, R(EAX), MComplex(ESI, ECX, 2, 0)); + MOV(16, R(EAX), MComplex(ECX, address, 2, 0)); SetJumpTarget(end); } -// In: ECX - the address to read +// In: (address) - the address to read // Out: EAX - the result of the read (used by caller) -// ESI - Base -void DSPEmitter::dmem_read() +// ECX - Base +void DSPEmitter::dmem_read(X64Reg address) { // if (saddr == 0) - CMP(16, R(ECX), Imm16(0x0fff)); + CMP(16, R(address), Imm16(0x0fff)); FixupBranch dram = J_CC(CC_A); // return g_dsp.dram[addr & DSP_DRAM_MASK]; + AND(32, R(address), Imm32(DSP_DRAM_MASK)); #ifdef _M_X64 - AND(16, R(ECX), Imm16(DSP_DRAM_MASK)); - MOVZX(64, 16, RCX, R(RCX)); - MOV(64, R(ESI), ImmPtr(g_dsp.dram)); + MOVZX(64, 16, address, R(address)); + MOV(64, R(ECX), ImmPtr(g_dsp.dram)); #else - AND(32, R(ECX), Imm32(DSP_DRAM_MASK)); - MOV(32, R(ESI), ImmPtr(g_dsp.dram)); + MOV(32, R(ECX), ImmPtr(g_dsp.dram)); #endif - MOV(16, R(EAX), MComplex(ESI, ECX, 2, 0)); + MOV(16, R(EAX), MComplex(ECX, address, 2, 0)); - FixupBranch end = J(); + FixupBranch end = J(true); SetJumpTarget(dram); // else if (saddr == 0x1) - CMP(16, R(ECX), Imm16(0x1fff)); + CMP(16, R(address), Imm16(0x1fff)); FixupBranch ifx = J_CC(CC_A); // return g_dsp.coef[addr & DSP_COEF_MASK]; + AND(32, R(address), Imm32(DSP_COEF_MASK)); #ifdef _M_X64 - AND(16, R(ECX), Imm16(DSP_COEF_MASK)); - MOVZX(64, 16, RCX, R(RCX)); - MOV(64, R(ESI), ImmPtr(g_dsp.coef)); + MOVZX(64, 16, address, R(address)); + MOV(64, R(ECX), ImmPtr(g_dsp.coef)); #else - AND(32, R(ECX), Imm32(DSP_COEF_MASK)); - MOV(32, R(ESI), ImmPtr(g_dsp.coef)); + MOV(32, R(ECX), ImmPtr(g_dsp.coef)); #endif - MOV(16, R(EAX), MComplex(ESI, ECX, 2, 0)); + MOV(16, R(EAX), MComplex(ECX, address, 2, 0)); - FixupBranch end2 = J(); + FixupBranch end2 = J(true); SetJumpTarget(ifx); // else if (saddr == 0xf) // return gdsp_ifx_read(addr); DSPJitRegCache c(gpr); - SaveDSPRegs(); - ABI_CallFunctionR((void *)gdsp_ifx_read, ECX); - LoadDSPRegs(); + X64Reg abisafereg = gpr.makeABICallSafe(address); + gpr.pushRegs(); + ABI_CallFunctionR((void *)gdsp_ifx_read, abisafereg); + gpr.popRegs(); gpr.flushRegs(c); SetJumpTarget(end); SetJumpTarget(end2); @@ -392,11 +395,12 @@ void DSPEmitter::dmem_read_imm(u16 address) break; case 0xf: // Fxxx HW regs - SaveDSPRegs(); + { + gpr.pushRegs(); ABI_CallFunctionC16((void *)gdsp_ifx_read, address); - LoadDSPRegs(); + gpr.popRegs(); break; - + } default: // Unmapped/non-existing memory ERROR_LOG(DSPLLE, "%04x DSP ERROR: Read from UNKNOWN (%04x) memory", g_dsp.pc, address); @@ -408,11 +412,13 @@ void DSPEmitter::get_long_prod(X64Reg long_prod) { #ifdef _M_X64 //s64 val = (s8)(u8)g_dsp.r[DSP_REG_PRODH]; - OpArg reg; - gpr.getReg(DSP_REG_PROD_64, reg); + OpArg prod_reg; + gpr.getReg(DSP_REG_PROD_64, prod_reg); + MOV(64, R(long_prod), prod_reg); + gpr.putReg(DSP_REG_PROD_64, false); + //no use in keeping prod_reg any longer. X64Reg tmp; gpr.getFreeXReg(tmp); - MOV(64, R(long_prod), reg); MOV(64, R(tmp), R(long_prod)); SHL(64, R(long_prod), Imm8(64-40));//sign extend SAR(64, R(long_prod), Imm8(64-40)); @@ -420,33 +426,35 @@ void DSPEmitter::get_long_prod(X64Reg long_prod) SHL(64, R(tmp), Imm8(16)); ADD(64, R(long_prod), R(tmp)); gpr.putXReg(tmp); - gpr.putReg(DSP_REG_PROD_64, false); #endif } // Returns s64 in RAX -// Clobbers RSI +// Clobbers RCX void DSPEmitter::get_long_prod_round_prodl(X64Reg long_prod) { #ifdef _M_X64 //s64 prod = dsp_get_long_prod(); get_long_prod(long_prod); + X64Reg tmp; + gpr.getFreeXReg(tmp); //if (prod & 0x10000) prod = (prod + 0x8000) & ~0xffff; TEST(32, R(long_prod), Imm32(0x10000)); FixupBranch jump = J_CC(CC_Z); ADD(64, R(long_prod), Imm32(0x8000)); - MOV(64, R(ESI), Imm64(~0xffff)); - AND(64, R(long_prod), R(RSI)); + MOV(64, R(tmp), Imm64(~0xffff)); + AND(64, R(long_prod), R(tmp)); FixupBranch _ret = J(); //else prod = (prod + 0x7fff) & ~0xffff; SetJumpTarget(jump); ADD(64, R(long_prod), Imm32(0x7fff)); - MOV(64, R(RSI), Imm64(~0xffff)); - AND(64, R(long_prod), R(RSI)); + MOV(64, R(tmp), Imm64(~0xffff)); + AND(64, R(long_prod), R(tmp)); SetJumpTarget(_ret); //return prod; + gpr.putXReg(tmp); #endif } @@ -456,23 +464,23 @@ void DSPEmitter::get_long_prod_round_prodl(X64Reg long_prod) void DSPEmitter::set_long_prod() { #ifdef _M_X64 - OpArg reg; - gpr.getReg(DSP_REG_PROD_64, reg, false); X64Reg tmp; gpr.getFreeXReg(tmp); MOV(64, R(tmp), Imm64(0x000000ffffffffffULL)); AND(64, R(RAX), R(tmp)); - // g_dsp.r[DSP_REG_PRODL] = (u16)val; - MOV(64, reg, R(RAX)); - gpr.putXReg(tmp); + OpArg prod_reg; + gpr.getReg(DSP_REG_PROD_64, prod_reg, false); + // g_dsp.r[DSP_REG_PRODL] = (u16)val; + MOV(64, prod_reg, R(RAX)); + gpr.putReg(DSP_REG_PROD_64, true); #endif } // Returns s64 in RAX -// Clobbers RSI +// Clobbers RCX void DSPEmitter::round_long_acc(X64Reg long_acc) { #ifdef _M_X64 @@ -480,14 +488,14 @@ void DSPEmitter::round_long_acc(X64Reg long_acc) TEST(32, R(long_acc), Imm32(0x10000)); FixupBranch jump = J_CC(CC_Z); ADD(64, R(long_acc), Imm32(0x8000)); - MOV(64, R(ESI), Imm64(~0xffff)); - AND(64, R(long_acc), R(RSI)); + MOV(64, R(ECX), Imm64(~0xffff)); + AND(64, R(long_acc), R(RCX)); FixupBranch _ret = J(); //else prod = (prod + 0x7fff) & ~0xffff; SetJumpTarget(jump); ADD(64, R(long_acc), Imm32(0x7fff)); - MOV(64, R(RSI), Imm64(~0xffff)); - AND(64, R(long_acc), R(RSI)); + MOV(64, R(RCX), Imm64(~0xffff)); + AND(64, R(long_acc), R(RCX)); SetJumpTarget(_ret); //return prod; #endif @@ -577,15 +585,5 @@ void DSPEmitter::get_ax_h(int _reg, X64Reg axh) gpr.readReg(_reg+DSP_REG_AXH0, axh, SIGN); } -void DSPEmitter::LoadDSPRegs() -{ - // Load DSP register state here... - gpr.loadStaticRegs(); -} -void DSPEmitter::SaveDSPRegs() -{ - // Save DSP register state here... - gpr.saveStaticRegs(); -}