diff --git a/Source/Core/DSPCore/Src/DSPEmitter.cpp b/Source/Core/DSPCore/Src/DSPEmitter.cpp index ea4b8a86e7..02e48b0312 100644 --- a/Source/Core/DSPCore/Src/DSPEmitter.cpp +++ b/Source/Core/DSPCore/Src/DSPEmitter.cpp @@ -299,7 +299,7 @@ void DSPEmitter::Compile(int start_addr) MOVZX(32, 16, EAX, MDisp(R11,DSP_REG_ST2*2)); #endif CMP(32, R(EAX), Imm32(0)); - FixupBranch rLoopAddressExit = J_CC(CC_LE); + FixupBranch rLoopAddressExit = J_CC(CC_LE, true); #ifdef _M_IX86 // All32 MOVZX(32, 16, EAX, M(&(g_dsp.r[DSP_REG_ST3]))); @@ -307,7 +307,7 @@ void DSPEmitter::Compile(int start_addr) MOVZX(32, 16, EAX, MDisp(R11,DSP_REG_ST3*2)); #endif CMP(32, R(EAX), Imm32(0)); - FixupBranch rLoopCounterExit = J_CC(CC_LE); + FixupBranch rLoopCounterExit = J_CC(CC_LE, true); if (!opcode->branch) { @@ -322,7 +322,7 @@ void DSPEmitter::Compile(int start_addr) // These functions branch and therefore only need to be called in the // end of each block and in this order - ABI_CallFunction((void *)&DSPInterpreter::HandleLoop); + HandleLoop(); // ABI_RestoreStack(0); ABI_PopAllCalleeSavedRegsAndAdjustStack(); if (DSPAnalyzer::code_flags[start_addr] & DSPAnalyzer::CODE_IDLE_SKIP) diff --git a/Source/Core/DSPCore/Src/DSPEmitter.h b/Source/Core/DSPCore/Src/DSPEmitter.h index 9991ca4520..2e61764dce 100644 --- a/Source/Core/DSPCore/Src/DSPEmitter.h +++ b/Source/Core/DSPCore/Src/DSPEmitter.h @@ -104,8 +104,8 @@ public: // Command helpers void dsp_reg_stack_push(int stack_reg); void dsp_reg_stack_pop(int stack_reg); - void dsp_reg_store_stack(int stack_reg, Gen::X64Reg host_sreg); - void dsp_reg_load_stack(int stack_reg, Gen::X64Reg host_dreg); + void dsp_reg_store_stack(int stack_reg, Gen::X64Reg host_sreg = Gen::EDX); + void dsp_reg_load_stack(int stack_reg, Gen::X64Reg host_dreg = Gen::EDX); void dsp_reg_store_stack_imm(int stack_reg, u16 val); void dsp_op_write_reg(int reg, Gen::X64Reg host_sreg); void dsp_op_write_reg_imm(int reg, u16 val); @@ -127,10 +127,15 @@ public: void nx(const UDSPInstruction opc); // Branch + void HandleLoop(); void jcc(const UDSPInstruction opc); void jmprcc(const UDSPInstruction opc); void call(const UDSPInstruction opc); void callr(const UDSPInstruction opc); + void loop(const UDSPInstruction opc); + void loopi(const UDSPInstruction opc); + void bloop(const UDSPInstruction opc); + void bloopi(const UDSPInstruction opc); // Load/Store void srs(const UDSPInstruction opc); diff --git a/Source/Core/DSPCore/Src/DSPTables.cpp b/Source/Core/DSPCore/Src/DSPTables.cpp index d2f6f6f175..884d28a5da 100644 --- a/Source/Core/DSPCore/Src/DSPTables.cpp +++ b/Source/Core/DSPCore/Src/DSPTables.cpp @@ -184,10 +184,10 @@ const DSPOPCTemplate opcodes[] = {"ILRRN", 0x021c, 0xfefc, DSPInterpreter::ilrrn, NULL, 1, 2, {{P_ACCM, 1, 0, 8, 0x0100}, {P_PRG, 1, 0, 0, 0x0003}}, false, false, false, false, false}, // LOOPS - {"LOOP", 0x0040, 0xffe0, DSPInterpreter::loop, NULL, 1, 1, {{P_REG, 1, 0, 0, 0x001f}}, false, true, false, true, false}, - {"BLOOP", 0x0060, 0xffe0, DSPInterpreter::bloop, NULL, 2, 2, {{P_REG, 1, 0, 0, 0x001f}, {P_ADDR_I, 2, 1, 0, 0xffff}}, false, true, false, true, false}, - {"LOOPI", 0x1000, 0xff00, DSPInterpreter::loopi, NULL, 1, 1, {{P_IMM, 1, 0, 0, 0x00ff}}, false, true, false, true, false}, - {"BLOOPI", 0x1100, 0xff00, DSPInterpreter::bloopi, NULL, 2, 2, {{P_IMM, 1, 0, 0, 0x00ff}, {P_ADDR_I, 2, 1, 0, 0xffff}}, false, true, false, true, false}, + {"LOOP", 0x0040, 0xffe0, DSPInterpreter::loop, &DSPEmitter::loop, 1, 1, {{P_REG, 1, 0, 0, 0x001f}}, false, true, false, true, false}, + {"BLOOP", 0x0060, 0xffe0, DSPInterpreter::bloop, &DSPEmitter::bloop, 2, 2, {{P_REG, 1, 0, 0, 0x001f}, {P_ADDR_I, 2, 1, 0, 0xffff}}, false, true, false, true, false}, + {"LOOPI", 0x1000, 0xff00, DSPInterpreter::loopi, &DSPEmitter::loopi, 1, 1, {{P_IMM, 1, 0, 0, 0x00ff}}, false, true, false, true, false}, + {"BLOOPI", 0x1100, 0xff00, DSPInterpreter::bloopi, &DSPEmitter::bloopi, 2, 2, {{P_IMM, 1, 0, 0, 0x00ff}, {P_ADDR_I, 2, 1, 0, 0xffff}}, false, true, false, true, false}, // load and store value pointed by indexing reg and increment; LRR/SRR variants {"LRR", 0x1800, 0xff80, DSPInterpreter::lrr, &DSPEmitter::lrr, 1, 2, {{P_REG, 1, 0, 0, 0x001f}, {P_PRG, 1, 0, 5, 0x0060}}, false, false, false, false, false}, diff --git a/Source/Core/DSPCore/Src/DspIntBranch.cpp b/Source/Core/DSPCore/Src/DspIntBranch.cpp index 3a473aa204..bddb3b36cf 100644 --- a/Source/Core/DSPCore/Src/DspIntBranch.cpp +++ b/Source/Core/DSPCore/Src/DspIntBranch.cpp @@ -142,7 +142,7 @@ void halt(const UDSPInstruction opc) // instructions. Whenever there is value on stack $st2 and current PC is equal // value at $st2, then value at stack $st3 is decremented. If value is not zero // then PC is modified with value from call stack $st0. Otherwise values from -// call stack $st0 and both loop stacks $st2 and $st3 are poped and execution +// call stack $st0 and both loop stacks $st2 and $st3 are popped and execution // continues at next opcode. void HandleLoop() { @@ -225,7 +225,7 @@ void loopi(const UDSPInstruction opc) // specified address addrA inclusive, ie. opcode at addrA is the last opcode // included in loop. Counter is pushed on loop stack $st3, end of block address // is pushed on loop stack $st2 and repeat address is pushed on call stack $st0. -// Up to 4 nested loops is allowed. +// Up to 4 nested loops are allowed. void bloop(const UDSPInstruction opc) { u16 reg = opc & 0x1f; @@ -253,7 +253,7 @@ void bloop(const UDSPInstruction opc) // address addrA inclusive, ie. opcode at addrA is the last opcode included in // loop. Counter is pushed on loop stack $st3, end of block address is pushed // on loop stack $st2 and repeat address is pushed on call stack $st0. Up to 4 -// nested loops is allowed. +// nested loops are allowed. void bloopi(const UDSPInstruction opc) { u16 cnt = opc & 0xff; diff --git a/Source/Core/DSPCore/Src/Jit/DSPJitBranch.cpp b/Source/Core/DSPCore/Src/Jit/DSPJitBranch.cpp index 77afef2566..ba7a548cd7 100644 --- a/Source/Core/DSPCore/Src/Jit/DSPJitBranch.cpp +++ b/Source/Core/DSPCore/Src/Jit/DSPJitBranch.cpp @@ -284,3 +284,216 @@ void DSPEmitter::callr(const UDSPInstruction opc) { ReJitConditional(opc, *this); } + +// LOOP handling: Loop stack is used to control execution of repeated blocks of +// instructions. Whenever there is value on stack $st2 and current PC is equal +// value at $st2, then value at stack $st3 is decremented. If value is not zero +// then PC is modified with value from call stack $st0. Otherwise values from +// call stack $st0 and both loop stacks $st2 and $st3 are popped and execution +// continues at next opcode. +void DSPEmitter::HandleLoop() +{ +#ifdef _M_IX86 // All32 + MOVZX(32, 16, EAX, M(&(g_dsp.r[DSP_REG_ST2]))); + MOVZX(32, 16, ECX, M(&(g_dsp.r[DSP_REG_ST3]))); +#else + MOV(64, R(R11), ImmPtr(&g_dsp.r)); + MOVZX(32, 16, EAX, MDisp(R11,DSP_REG_ST2*2)); + MOVZX(32, 16, ECX, MDisp(R11,DSP_REG_ST3*2)); +#endif + + CMP(32, R(RCX), Imm32(0)); + FixupBranch rLoopCntG = J_CC(CC_LE, true); + CMP(16, R(RAX), Imm16(compilePC - 1)); + FixupBranch rLoopAddrG = J_CC(CC_NE, true); + +#ifdef _M_IX86 // All32 + SUB(16, M(&(g_dsp.r[DSP_REG_ST3])), Imm16(1)); + CMP(16, M(&(g_dsp.r[DSP_REG_ST3])), Imm16(0)); +#else + SUB(16, MDisp(R11,DSP_REG_ST3*2), Imm16(1)); + CMP(16, MDisp(R11,DSP_REG_ST3*2), Imm16(0)); +#endif + + FixupBranch loadStack = J_CC(CC_LE, true); +#ifdef _M_IX86 // All32 + MOVZX(32, 16, ECX, M(&(g_dsp.r[DSP_REG_ST0]))); + MOV(16, M(&g_dsp.pc), R(RCX)); +#else + MOVZX(32, 16, RCX, MDisp(R11,DSP_REG_ST0*2)); + MOV(64, R(RAX), ImmPtr(&(g_dsp.pc))); + MOV(16, MatR(RAX), R(RCX)); +#endif + FixupBranch loopUpdated = J(true); + + SetJumpTarget(loadStack); + dsp_reg_load_stack(0); + dsp_reg_load_stack(2); + dsp_reg_load_stack(3); + + SetJumpTarget(loopUpdated); + SetJumpTarget(rLoopAddrG); + SetJumpTarget(rLoopCntG); + +} + +// LOOP $R +// 0000 0000 010r rrrr +// Repeatedly execute following opcode until counter specified by value +// from register $R reaches zero. Each execution decrement counter. Register +// $R remains unchanged. If register $R is set to zero at the beginning of loop +// then looped instruction will not get executed. +// Actually, this instruction simply prepares the loop stacks for the above. +// The looping hardware takes care of the rest. +void DSPEmitter::loop(const UDSPInstruction opc) +{ + u16 reg = opc & 0x1f; +// u16 cnt = g_dsp.r[reg]; +#ifdef _M_IX86 // All32 + MOVZX(32, 16, EDX, M(&(g_dsp.r[reg]))); +#else + MOV(64, R(R11), ImmPtr(&g_dsp.r)); + MOVZX(32, 16, EDX, MDisp(R11,reg*2)); +#endif + u16 loop_pc = compilePC + 1; + + CMP(16, R(EDX), Imm16(0)); + FixupBranch cnt = J_CC(CC_Z, true); + dsp_reg_store_stack(3); + MOV(16, R(RDX), Imm16(compilePC + 1)); + dsp_reg_store_stack(0); + MOV(16, R(RDX), Imm16(loop_pc)); + dsp_reg_store_stack(2); + + SetJumpTarget(cnt); +#ifdef _M_IX86 // All32 + MOV(16, M(&(g_dsp.pc)), Imm16(compilePC + 1)); +#else + MOV(64, R(RAX), ImmPtr(&(g_dsp.pc))); + MOV(16, MDisp(RAX,0), Imm16(compilePC + 1)); +#endif +} + +// LOOPI #I +// 0001 0000 iiii iiii +// Repeatedly execute following opcode until counter specified by +// immediate value I reaches zero. Each execution decrement counter. If +// immediate value I is set to zero at the beginning of loop then looped +// instruction will not get executed. +// Actually, this instruction simply prepares the loop stacks for the above. +// The looping hardware takes care of the rest. +void DSPEmitter::loopi(const UDSPInstruction opc) +{ + u16 cnt = opc & 0xff; + u16 loop_pc = compilePC + 1; + + if (cnt) + { + MOV(16, R(RDX), Imm16(compilePC + 1)); + dsp_reg_store_stack(0); + MOV(16, R(RDX), Imm16(loop_pc)); + dsp_reg_store_stack(2); + MOV(16, R(RDX), Imm16(cnt)); + dsp_reg_store_stack(3); + +#ifdef _M_IX86 // All32 + MOV(16, M(&(g_dsp.pc)), Imm16(compilePC + 1)); +#else + MOV(64, R(RAX), ImmPtr(&(g_dsp.pc))); + MOV(16, MDisp(RAX,0), Imm16(compilePC + 1)); +#endif + } +} + + +// BLOOP $R, addrA +// 0000 0000 011r rrrr +// aaaa aaaa aaaa aaaa +// Repeatedly execute block of code starting at following opcode until +// counter specified by value from register $R reaches zero. Block ends at +// specified address addrA inclusive, ie. opcode at addrA is the last opcode +// included in loop. Counter is pushed on loop stack $st3, end of block address +// is pushed on loop stack $st2 and repeat address is pushed on call stack $st0. +// Up to 4 nested loops are allowed. +void DSPEmitter::bloop(const UDSPInstruction opc) +{ + u16 reg = opc & 0x1f; +// u16 cnt = g_dsp.r[reg]; +#ifdef _M_IX86 // All32 + MOVZX(32, 16, EDX, M(&(g_dsp.r[reg]))); +#else + MOV(64, R(R11), ImmPtr(&g_dsp.r)); + MOVZX(32, 16, EDX, MDisp(R11,reg*2)); +#endif + u16 loop_pc = dsp_imem_read(compilePC + 1); + + CMP(16, R(EDX), Imm16(0)); + FixupBranch cnt = J_CC(CC_Z, true); + dsp_reg_store_stack(3); + MOV(16, R(RDX), Imm16(compilePC + 2)); + dsp_reg_store_stack(0); + MOV(16, R(RDX), Imm16(loop_pc)); + dsp_reg_store_stack(2); +#ifdef _M_IX86 // All32 + MOV(16, M(&(g_dsp.pc)), Imm16(compilePC + 2)); +#else + MOV(64, R(RAX), ImmPtr(&(g_dsp.pc))); + MOV(16, MDisp(RAX,0), Imm16(compilePC + 2)); +#endif + FixupBranch exit = J(); + + SetJumpTarget(cnt); + // g_dsp.pc = loop_pc; + // dsp_skip_inst(); +#ifdef _M_IX86 // All32 + MOV(16, M(&g_dsp.pc), Imm16(loop_pc + opTable[loop_pc]->size)); +#else + MOV(64, R(RAX), ImmPtr(&(g_dsp.pc))); + MOV(16, MatR(RAX), Imm16(loop_pc + opTable[loop_pc]->size)); +#endif + SetJumpTarget(exit); +} + +// BLOOPI #I, addrA +// 0001 0001 iiii iiii +// aaaa aaaa aaaa aaaa +// Repeatedly execute block of code starting at following opcode until +// counter specified by immediate value I reaches zero. Block ends at specified +// address addrA inclusive, ie. opcode at addrA is the last opcode included in +// loop. Counter is pushed on loop stack $st3, end of block address is pushed +// on loop stack $st2 and repeat address is pushed on call stack $st0. Up to 4 +// nested loops are allowed. +void DSPEmitter::bloopi(const UDSPInstruction opc) +{ + u16 cnt = opc & 0xff; +// u16 loop_pc = dsp_fetch_code(); + u16 loop_pc = dsp_imem_read(compilePC + 1); + + if (cnt) + { + MOV(16, R(RDX), Imm16(compilePC + 2)); + dsp_reg_store_stack(0); + MOV(16, R(RDX), Imm16(loop_pc)); + dsp_reg_store_stack(2); + MOV(16, R(RDX), Imm16(cnt)); + dsp_reg_store_stack(3); + +#ifdef _M_IX86 // All32 + MOV(16, M(&(g_dsp.pc)), Imm16(compilePC + 2)); +#else + MOV(64, R(RAX), ImmPtr(&(g_dsp.pc))); + MOV(16, MDisp(RAX,0), Imm16(compilePC + 2)); +#endif + } + else + { +// g_dsp.pc = loop_pc; +// dsp_skip_inst(); +#ifdef _M_IX86 // All32 + MOV(16, M(&g_dsp.pc), Imm16(loop_pc + opTable[loop_pc]->size)); +#else + MOV(64, R(RAX), ImmPtr(&(g_dsp.pc))); + MOV(16, MatR(RAX), Imm16(loop_pc + opTable[loop_pc]->size)); +#endif + } +}