mirror of
https://github.com/dolphin-emu/dolphin.git
synced 2025-01-10 16:19:28 +01:00
LLE JIT: Added the loop instructions to the JIT. Added ASM version of HandleLoop. Both x86 and x64 versions have been added.
git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@6659 8ced0084-cf51-0410-be5f-012b33b47a6e
This commit is contained in:
parent
7c5b12c5bc
commit
963ca6f963
@ -299,7 +299,7 @@ void DSPEmitter::Compile(int start_addr)
|
|||||||
MOVZX(32, 16, EAX, MDisp(R11,DSP_REG_ST2*2));
|
MOVZX(32, 16, EAX, MDisp(R11,DSP_REG_ST2*2));
|
||||||
#endif
|
#endif
|
||||||
CMP(32, R(EAX), Imm32(0));
|
CMP(32, R(EAX), Imm32(0));
|
||||||
FixupBranch rLoopAddressExit = J_CC(CC_LE);
|
FixupBranch rLoopAddressExit = J_CC(CC_LE, true);
|
||||||
|
|
||||||
#ifdef _M_IX86 // All32
|
#ifdef _M_IX86 // All32
|
||||||
MOVZX(32, 16, EAX, M(&(g_dsp.r[DSP_REG_ST3])));
|
MOVZX(32, 16, EAX, M(&(g_dsp.r[DSP_REG_ST3])));
|
||||||
@ -307,7 +307,7 @@ void DSPEmitter::Compile(int start_addr)
|
|||||||
MOVZX(32, 16, EAX, MDisp(R11,DSP_REG_ST3*2));
|
MOVZX(32, 16, EAX, MDisp(R11,DSP_REG_ST3*2));
|
||||||
#endif
|
#endif
|
||||||
CMP(32, R(EAX), Imm32(0));
|
CMP(32, R(EAX), Imm32(0));
|
||||||
FixupBranch rLoopCounterExit = J_CC(CC_LE);
|
FixupBranch rLoopCounterExit = J_CC(CC_LE, true);
|
||||||
|
|
||||||
if (!opcode->branch)
|
if (!opcode->branch)
|
||||||
{
|
{
|
||||||
@ -322,7 +322,7 @@ void DSPEmitter::Compile(int start_addr)
|
|||||||
|
|
||||||
// These functions branch and therefore only need to be called in the
|
// These functions branch and therefore only need to be called in the
|
||||||
// end of each block and in this order
|
// end of each block and in this order
|
||||||
ABI_CallFunction((void *)&DSPInterpreter::HandleLoop);
|
HandleLoop();
|
||||||
// ABI_RestoreStack(0);
|
// ABI_RestoreStack(0);
|
||||||
ABI_PopAllCalleeSavedRegsAndAdjustStack();
|
ABI_PopAllCalleeSavedRegsAndAdjustStack();
|
||||||
if (DSPAnalyzer::code_flags[start_addr] & DSPAnalyzer::CODE_IDLE_SKIP)
|
if (DSPAnalyzer::code_flags[start_addr] & DSPAnalyzer::CODE_IDLE_SKIP)
|
||||||
|
@ -104,8 +104,8 @@ public:
|
|||||||
// Command helpers
|
// Command helpers
|
||||||
void dsp_reg_stack_push(int stack_reg);
|
void dsp_reg_stack_push(int stack_reg);
|
||||||
void dsp_reg_stack_pop(int stack_reg);
|
void dsp_reg_stack_pop(int stack_reg);
|
||||||
void dsp_reg_store_stack(int stack_reg, Gen::X64Reg host_sreg);
|
void dsp_reg_store_stack(int stack_reg, Gen::X64Reg host_sreg = Gen::EDX);
|
||||||
void dsp_reg_load_stack(int stack_reg, Gen::X64Reg host_dreg);
|
void dsp_reg_load_stack(int stack_reg, Gen::X64Reg host_dreg = Gen::EDX);
|
||||||
void dsp_reg_store_stack_imm(int stack_reg, u16 val);
|
void dsp_reg_store_stack_imm(int stack_reg, u16 val);
|
||||||
void dsp_op_write_reg(int reg, Gen::X64Reg host_sreg);
|
void dsp_op_write_reg(int reg, Gen::X64Reg host_sreg);
|
||||||
void dsp_op_write_reg_imm(int reg, u16 val);
|
void dsp_op_write_reg_imm(int reg, u16 val);
|
||||||
@ -127,10 +127,15 @@ public:
|
|||||||
void nx(const UDSPInstruction opc);
|
void nx(const UDSPInstruction opc);
|
||||||
|
|
||||||
// Branch
|
// Branch
|
||||||
|
void HandleLoop();
|
||||||
void jcc(const UDSPInstruction opc);
|
void jcc(const UDSPInstruction opc);
|
||||||
void jmprcc(const UDSPInstruction opc);
|
void jmprcc(const UDSPInstruction opc);
|
||||||
void call(const UDSPInstruction opc);
|
void call(const UDSPInstruction opc);
|
||||||
void callr(const UDSPInstruction opc);
|
void callr(const UDSPInstruction opc);
|
||||||
|
void loop(const UDSPInstruction opc);
|
||||||
|
void loopi(const UDSPInstruction opc);
|
||||||
|
void bloop(const UDSPInstruction opc);
|
||||||
|
void bloopi(const UDSPInstruction opc);
|
||||||
|
|
||||||
// Load/Store
|
// Load/Store
|
||||||
void srs(const UDSPInstruction opc);
|
void srs(const UDSPInstruction opc);
|
||||||
|
@ -184,10 +184,10 @@ const DSPOPCTemplate opcodes[] =
|
|||||||
{"ILRRN", 0x021c, 0xfefc, DSPInterpreter::ilrrn, NULL, 1, 2, {{P_ACCM, 1, 0, 8, 0x0100}, {P_PRG, 1, 0, 0, 0x0003}}, false, false, false, false, false},
|
{"ILRRN", 0x021c, 0xfefc, DSPInterpreter::ilrrn, NULL, 1, 2, {{P_ACCM, 1, 0, 8, 0x0100}, {P_PRG, 1, 0, 0, 0x0003}}, false, false, false, false, false},
|
||||||
|
|
||||||
// LOOPS
|
// LOOPS
|
||||||
{"LOOP", 0x0040, 0xffe0, DSPInterpreter::loop, NULL, 1, 1, {{P_REG, 1, 0, 0, 0x001f}}, false, true, false, true, false},
|
{"LOOP", 0x0040, 0xffe0, DSPInterpreter::loop, &DSPEmitter::loop, 1, 1, {{P_REG, 1, 0, 0, 0x001f}}, false, true, false, true, false},
|
||||||
{"BLOOP", 0x0060, 0xffe0, DSPInterpreter::bloop, NULL, 2, 2, {{P_REG, 1, 0, 0, 0x001f}, {P_ADDR_I, 2, 1, 0, 0xffff}}, false, true, false, true, false},
|
{"BLOOP", 0x0060, 0xffe0, DSPInterpreter::bloop, &DSPEmitter::bloop, 2, 2, {{P_REG, 1, 0, 0, 0x001f}, {P_ADDR_I, 2, 1, 0, 0xffff}}, false, true, false, true, false},
|
||||||
{"LOOPI", 0x1000, 0xff00, DSPInterpreter::loopi, NULL, 1, 1, {{P_IMM, 1, 0, 0, 0x00ff}}, false, true, false, true, false},
|
{"LOOPI", 0x1000, 0xff00, DSPInterpreter::loopi, &DSPEmitter::loopi, 1, 1, {{P_IMM, 1, 0, 0, 0x00ff}}, false, true, false, true, false},
|
||||||
{"BLOOPI", 0x1100, 0xff00, DSPInterpreter::bloopi, NULL, 2, 2, {{P_IMM, 1, 0, 0, 0x00ff}, {P_ADDR_I, 2, 1, 0, 0xffff}}, false, true, false, true, false},
|
{"BLOOPI", 0x1100, 0xff00, DSPInterpreter::bloopi, &DSPEmitter::bloopi, 2, 2, {{P_IMM, 1, 0, 0, 0x00ff}, {P_ADDR_I, 2, 1, 0, 0xffff}}, false, true, false, true, false},
|
||||||
|
|
||||||
// load and store value pointed by indexing reg and increment; LRR/SRR variants
|
// load and store value pointed by indexing reg and increment; LRR/SRR variants
|
||||||
{"LRR", 0x1800, 0xff80, DSPInterpreter::lrr, &DSPEmitter::lrr, 1, 2, {{P_REG, 1, 0, 0, 0x001f}, {P_PRG, 1, 0, 5, 0x0060}}, false, false, false, false, false},
|
{"LRR", 0x1800, 0xff80, DSPInterpreter::lrr, &DSPEmitter::lrr, 1, 2, {{P_REG, 1, 0, 0, 0x001f}, {P_PRG, 1, 0, 5, 0x0060}}, false, false, false, false, false},
|
||||||
|
@ -142,7 +142,7 @@ void halt(const UDSPInstruction opc)
|
|||||||
// instructions. Whenever there is value on stack $st2 and current PC is equal
|
// instructions. Whenever there is value on stack $st2 and current PC is equal
|
||||||
// value at $st2, then value at stack $st3 is decremented. If value is not zero
|
// value at $st2, then value at stack $st3 is decremented. If value is not zero
|
||||||
// then PC is modified with value from call stack $st0. Otherwise values from
|
// then PC is modified with value from call stack $st0. Otherwise values from
|
||||||
// call stack $st0 and both loop stacks $st2 and $st3 are poped and execution
|
// call stack $st0 and both loop stacks $st2 and $st3 are popped and execution
|
||||||
// continues at next opcode.
|
// continues at next opcode.
|
||||||
void HandleLoop()
|
void HandleLoop()
|
||||||
{
|
{
|
||||||
@ -225,7 +225,7 @@ void loopi(const UDSPInstruction opc)
|
|||||||
// specified address addrA inclusive, ie. opcode at addrA is the last opcode
|
// specified address addrA inclusive, ie. opcode at addrA is the last opcode
|
||||||
// included in loop. Counter is pushed on loop stack $st3, end of block address
|
// included in loop. Counter is pushed on loop stack $st3, end of block address
|
||||||
// is pushed on loop stack $st2 and repeat address is pushed on call stack $st0.
|
// is pushed on loop stack $st2 and repeat address is pushed on call stack $st0.
|
||||||
// Up to 4 nested loops is allowed.
|
// Up to 4 nested loops are allowed.
|
||||||
void bloop(const UDSPInstruction opc)
|
void bloop(const UDSPInstruction opc)
|
||||||
{
|
{
|
||||||
u16 reg = opc & 0x1f;
|
u16 reg = opc & 0x1f;
|
||||||
@ -253,7 +253,7 @@ void bloop(const UDSPInstruction opc)
|
|||||||
// address addrA inclusive, ie. opcode at addrA is the last opcode included in
|
// address addrA inclusive, ie. opcode at addrA is the last opcode included in
|
||||||
// loop. Counter is pushed on loop stack $st3, end of block address is pushed
|
// loop. Counter is pushed on loop stack $st3, end of block address is pushed
|
||||||
// on loop stack $st2 and repeat address is pushed on call stack $st0. Up to 4
|
// on loop stack $st2 and repeat address is pushed on call stack $st0. Up to 4
|
||||||
// nested loops is allowed.
|
// nested loops are allowed.
|
||||||
void bloopi(const UDSPInstruction opc)
|
void bloopi(const UDSPInstruction opc)
|
||||||
{
|
{
|
||||||
u16 cnt = opc & 0xff;
|
u16 cnt = opc & 0xff;
|
||||||
|
@ -284,3 +284,216 @@ void DSPEmitter::callr(const UDSPInstruction opc)
|
|||||||
{
|
{
|
||||||
ReJitConditional<r_callr>(opc, *this);
|
ReJitConditional<r_callr>(opc, *this);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// LOOP handling: Loop stack is used to control execution of repeated blocks of
|
||||||
|
// instructions. Whenever there is value on stack $st2 and current PC is equal
|
||||||
|
// value at $st2, then value at stack $st3 is decremented. If value is not zero
|
||||||
|
// then PC is modified with value from call stack $st0. Otherwise values from
|
||||||
|
// call stack $st0 and both loop stacks $st2 and $st3 are popped and execution
|
||||||
|
// continues at next opcode.
|
||||||
|
void DSPEmitter::HandleLoop()
|
||||||
|
{
|
||||||
|
#ifdef _M_IX86 // All32
|
||||||
|
MOVZX(32, 16, EAX, M(&(g_dsp.r[DSP_REG_ST2])));
|
||||||
|
MOVZX(32, 16, ECX, M(&(g_dsp.r[DSP_REG_ST3])));
|
||||||
|
#else
|
||||||
|
MOV(64, R(R11), ImmPtr(&g_dsp.r));
|
||||||
|
MOVZX(32, 16, EAX, MDisp(R11,DSP_REG_ST2*2));
|
||||||
|
MOVZX(32, 16, ECX, MDisp(R11,DSP_REG_ST3*2));
|
||||||
|
#endif
|
||||||
|
|
||||||
|
CMP(32, R(RCX), Imm32(0));
|
||||||
|
FixupBranch rLoopCntG = J_CC(CC_LE, true);
|
||||||
|
CMP(16, R(RAX), Imm16(compilePC - 1));
|
||||||
|
FixupBranch rLoopAddrG = J_CC(CC_NE, true);
|
||||||
|
|
||||||
|
#ifdef _M_IX86 // All32
|
||||||
|
SUB(16, M(&(g_dsp.r[DSP_REG_ST3])), Imm16(1));
|
||||||
|
CMP(16, M(&(g_dsp.r[DSP_REG_ST3])), Imm16(0));
|
||||||
|
#else
|
||||||
|
SUB(16, MDisp(R11,DSP_REG_ST3*2), Imm16(1));
|
||||||
|
CMP(16, MDisp(R11,DSP_REG_ST3*2), Imm16(0));
|
||||||
|
#endif
|
||||||
|
|
||||||
|
FixupBranch loadStack = J_CC(CC_LE, true);
|
||||||
|
#ifdef _M_IX86 // All32
|
||||||
|
MOVZX(32, 16, ECX, M(&(g_dsp.r[DSP_REG_ST0])));
|
||||||
|
MOV(16, M(&g_dsp.pc), R(RCX));
|
||||||
|
#else
|
||||||
|
MOVZX(32, 16, RCX, MDisp(R11,DSP_REG_ST0*2));
|
||||||
|
MOV(64, R(RAX), ImmPtr(&(g_dsp.pc)));
|
||||||
|
MOV(16, MatR(RAX), R(RCX));
|
||||||
|
#endif
|
||||||
|
FixupBranch loopUpdated = J(true);
|
||||||
|
|
||||||
|
SetJumpTarget(loadStack);
|
||||||
|
dsp_reg_load_stack(0);
|
||||||
|
dsp_reg_load_stack(2);
|
||||||
|
dsp_reg_load_stack(3);
|
||||||
|
|
||||||
|
SetJumpTarget(loopUpdated);
|
||||||
|
SetJumpTarget(rLoopAddrG);
|
||||||
|
SetJumpTarget(rLoopCntG);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
// LOOP $R
|
||||||
|
// 0000 0000 010r rrrr
|
||||||
|
// Repeatedly execute following opcode until counter specified by value
|
||||||
|
// from register $R reaches zero. Each execution decrement counter. Register
|
||||||
|
// $R remains unchanged. If register $R is set to zero at the beginning of loop
|
||||||
|
// then looped instruction will not get executed.
|
||||||
|
// Actually, this instruction simply prepares the loop stacks for the above.
|
||||||
|
// The looping hardware takes care of the rest.
|
||||||
|
void DSPEmitter::loop(const UDSPInstruction opc)
|
||||||
|
{
|
||||||
|
u16 reg = opc & 0x1f;
|
||||||
|
// u16 cnt = g_dsp.r[reg];
|
||||||
|
#ifdef _M_IX86 // All32
|
||||||
|
MOVZX(32, 16, EDX, M(&(g_dsp.r[reg])));
|
||||||
|
#else
|
||||||
|
MOV(64, R(R11), ImmPtr(&g_dsp.r));
|
||||||
|
MOVZX(32, 16, EDX, MDisp(R11,reg*2));
|
||||||
|
#endif
|
||||||
|
u16 loop_pc = compilePC + 1;
|
||||||
|
|
||||||
|
CMP(16, R(EDX), Imm16(0));
|
||||||
|
FixupBranch cnt = J_CC(CC_Z, true);
|
||||||
|
dsp_reg_store_stack(3);
|
||||||
|
MOV(16, R(RDX), Imm16(compilePC + 1));
|
||||||
|
dsp_reg_store_stack(0);
|
||||||
|
MOV(16, R(RDX), Imm16(loop_pc));
|
||||||
|
dsp_reg_store_stack(2);
|
||||||
|
|
||||||
|
SetJumpTarget(cnt);
|
||||||
|
#ifdef _M_IX86 // All32
|
||||||
|
MOV(16, M(&(g_dsp.pc)), Imm16(compilePC + 1));
|
||||||
|
#else
|
||||||
|
MOV(64, R(RAX), ImmPtr(&(g_dsp.pc)));
|
||||||
|
MOV(16, MDisp(RAX,0), Imm16(compilePC + 1));
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
// LOOPI #I
|
||||||
|
// 0001 0000 iiii iiii
|
||||||
|
// Repeatedly execute following opcode until counter specified by
|
||||||
|
// immediate value I reaches zero. Each execution decrement counter. If
|
||||||
|
// immediate value I is set to zero at the beginning of loop then looped
|
||||||
|
// instruction will not get executed.
|
||||||
|
// Actually, this instruction simply prepares the loop stacks for the above.
|
||||||
|
// The looping hardware takes care of the rest.
|
||||||
|
void DSPEmitter::loopi(const UDSPInstruction opc)
|
||||||
|
{
|
||||||
|
u16 cnt = opc & 0xff;
|
||||||
|
u16 loop_pc = compilePC + 1;
|
||||||
|
|
||||||
|
if (cnt)
|
||||||
|
{
|
||||||
|
MOV(16, R(RDX), Imm16(compilePC + 1));
|
||||||
|
dsp_reg_store_stack(0);
|
||||||
|
MOV(16, R(RDX), Imm16(loop_pc));
|
||||||
|
dsp_reg_store_stack(2);
|
||||||
|
MOV(16, R(RDX), Imm16(cnt));
|
||||||
|
dsp_reg_store_stack(3);
|
||||||
|
|
||||||
|
#ifdef _M_IX86 // All32
|
||||||
|
MOV(16, M(&(g_dsp.pc)), Imm16(compilePC + 1));
|
||||||
|
#else
|
||||||
|
MOV(64, R(RAX), ImmPtr(&(g_dsp.pc)));
|
||||||
|
MOV(16, MDisp(RAX,0), Imm16(compilePC + 1));
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// BLOOP $R, addrA
|
||||||
|
// 0000 0000 011r rrrr
|
||||||
|
// aaaa aaaa aaaa aaaa
|
||||||
|
// Repeatedly execute block of code starting at following opcode until
|
||||||
|
// counter specified by value from register $R reaches zero. Block ends at
|
||||||
|
// specified address addrA inclusive, ie. opcode at addrA is the last opcode
|
||||||
|
// included in loop. Counter is pushed on loop stack $st3, end of block address
|
||||||
|
// is pushed on loop stack $st2 and repeat address is pushed on call stack $st0.
|
||||||
|
// Up to 4 nested loops are allowed.
|
||||||
|
void DSPEmitter::bloop(const UDSPInstruction opc)
|
||||||
|
{
|
||||||
|
u16 reg = opc & 0x1f;
|
||||||
|
// u16 cnt = g_dsp.r[reg];
|
||||||
|
#ifdef _M_IX86 // All32
|
||||||
|
MOVZX(32, 16, EDX, M(&(g_dsp.r[reg])));
|
||||||
|
#else
|
||||||
|
MOV(64, R(R11), ImmPtr(&g_dsp.r));
|
||||||
|
MOVZX(32, 16, EDX, MDisp(R11,reg*2));
|
||||||
|
#endif
|
||||||
|
u16 loop_pc = dsp_imem_read(compilePC + 1);
|
||||||
|
|
||||||
|
CMP(16, R(EDX), Imm16(0));
|
||||||
|
FixupBranch cnt = J_CC(CC_Z, true);
|
||||||
|
dsp_reg_store_stack(3);
|
||||||
|
MOV(16, R(RDX), Imm16(compilePC + 2));
|
||||||
|
dsp_reg_store_stack(0);
|
||||||
|
MOV(16, R(RDX), Imm16(loop_pc));
|
||||||
|
dsp_reg_store_stack(2);
|
||||||
|
#ifdef _M_IX86 // All32
|
||||||
|
MOV(16, M(&(g_dsp.pc)), Imm16(compilePC + 2));
|
||||||
|
#else
|
||||||
|
MOV(64, R(RAX), ImmPtr(&(g_dsp.pc)));
|
||||||
|
MOV(16, MDisp(RAX,0), Imm16(compilePC + 2));
|
||||||
|
#endif
|
||||||
|
FixupBranch exit = J();
|
||||||
|
|
||||||
|
SetJumpTarget(cnt);
|
||||||
|
// g_dsp.pc = loop_pc;
|
||||||
|
// dsp_skip_inst();
|
||||||
|
#ifdef _M_IX86 // All32
|
||||||
|
MOV(16, M(&g_dsp.pc), Imm16(loop_pc + opTable[loop_pc]->size));
|
||||||
|
#else
|
||||||
|
MOV(64, R(RAX), ImmPtr(&(g_dsp.pc)));
|
||||||
|
MOV(16, MatR(RAX), Imm16(loop_pc + opTable[loop_pc]->size));
|
||||||
|
#endif
|
||||||
|
SetJumpTarget(exit);
|
||||||
|
}
|
||||||
|
|
||||||
|
// BLOOPI #I, addrA
|
||||||
|
// 0001 0001 iiii iiii
|
||||||
|
// aaaa aaaa aaaa aaaa
|
||||||
|
// Repeatedly execute block of code starting at following opcode until
|
||||||
|
// counter specified by immediate value I reaches zero. Block ends at specified
|
||||||
|
// address addrA inclusive, ie. opcode at addrA is the last opcode included in
|
||||||
|
// loop. Counter is pushed on loop stack $st3, end of block address is pushed
|
||||||
|
// on loop stack $st2 and repeat address is pushed on call stack $st0. Up to 4
|
||||||
|
// nested loops are allowed.
|
||||||
|
void DSPEmitter::bloopi(const UDSPInstruction opc)
|
||||||
|
{
|
||||||
|
u16 cnt = opc & 0xff;
|
||||||
|
// u16 loop_pc = dsp_fetch_code();
|
||||||
|
u16 loop_pc = dsp_imem_read(compilePC + 1);
|
||||||
|
|
||||||
|
if (cnt)
|
||||||
|
{
|
||||||
|
MOV(16, R(RDX), Imm16(compilePC + 2));
|
||||||
|
dsp_reg_store_stack(0);
|
||||||
|
MOV(16, R(RDX), Imm16(loop_pc));
|
||||||
|
dsp_reg_store_stack(2);
|
||||||
|
MOV(16, R(RDX), Imm16(cnt));
|
||||||
|
dsp_reg_store_stack(3);
|
||||||
|
|
||||||
|
#ifdef _M_IX86 // All32
|
||||||
|
MOV(16, M(&(g_dsp.pc)), Imm16(compilePC + 2));
|
||||||
|
#else
|
||||||
|
MOV(64, R(RAX), ImmPtr(&(g_dsp.pc)));
|
||||||
|
MOV(16, MDisp(RAX,0), Imm16(compilePC + 2));
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
// g_dsp.pc = loop_pc;
|
||||||
|
// dsp_skip_inst();
|
||||||
|
#ifdef _M_IX86 // All32
|
||||||
|
MOV(16, M(&g_dsp.pc), Imm16(loop_pc + opTable[loop_pc]->size));
|
||||||
|
#else
|
||||||
|
MOV(64, R(RAX), ImmPtr(&(g_dsp.pc)));
|
||||||
|
MOV(16, MatR(RAX), Imm16(loop_pc + opTable[loop_pc]->size));
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user