LLE JIT: Added the loop instructions to the JIT. Added ASM version of HandleLoop. Both x86 and x64 versions have been added.

git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@6659 8ced0084-cf51-0410-be5f-012b33b47a6e
This commit is contained in:
skidau 2010-12-26 12:34:38 +00:00
parent 7c5b12c5bc
commit 963ca6f963
5 changed files with 230 additions and 12 deletions

View File

@ -299,7 +299,7 @@ void DSPEmitter::Compile(int start_addr)
MOVZX(32, 16, EAX, MDisp(R11,DSP_REG_ST2*2)); MOVZX(32, 16, EAX, MDisp(R11,DSP_REG_ST2*2));
#endif #endif
CMP(32, R(EAX), Imm32(0)); CMP(32, R(EAX), Imm32(0));
FixupBranch rLoopAddressExit = J_CC(CC_LE); FixupBranch rLoopAddressExit = J_CC(CC_LE, true);
#ifdef _M_IX86 // All32 #ifdef _M_IX86 // All32
MOVZX(32, 16, EAX, M(&(g_dsp.r[DSP_REG_ST3]))); MOVZX(32, 16, EAX, M(&(g_dsp.r[DSP_REG_ST3])));
@ -307,7 +307,7 @@ void DSPEmitter::Compile(int start_addr)
MOVZX(32, 16, EAX, MDisp(R11,DSP_REG_ST3*2)); MOVZX(32, 16, EAX, MDisp(R11,DSP_REG_ST3*2));
#endif #endif
CMP(32, R(EAX), Imm32(0)); CMP(32, R(EAX), Imm32(0));
FixupBranch rLoopCounterExit = J_CC(CC_LE); FixupBranch rLoopCounterExit = J_CC(CC_LE, true);
if (!opcode->branch) if (!opcode->branch)
{ {
@ -322,7 +322,7 @@ void DSPEmitter::Compile(int start_addr)
// These functions branch and therefore only need to be called in the // These functions branch and therefore only need to be called in the
// end of each block and in this order // end of each block and in this order
ABI_CallFunction((void *)&DSPInterpreter::HandleLoop); HandleLoop();
// ABI_RestoreStack(0); // ABI_RestoreStack(0);
ABI_PopAllCalleeSavedRegsAndAdjustStack(); ABI_PopAllCalleeSavedRegsAndAdjustStack();
if (DSPAnalyzer::code_flags[start_addr] & DSPAnalyzer::CODE_IDLE_SKIP) if (DSPAnalyzer::code_flags[start_addr] & DSPAnalyzer::CODE_IDLE_SKIP)

View File

@ -104,8 +104,8 @@ public:
// Command helpers // Command helpers
void dsp_reg_stack_push(int stack_reg); void dsp_reg_stack_push(int stack_reg);
void dsp_reg_stack_pop(int stack_reg); void dsp_reg_stack_pop(int stack_reg);
void dsp_reg_store_stack(int stack_reg, Gen::X64Reg host_sreg); void dsp_reg_store_stack(int stack_reg, Gen::X64Reg host_sreg = Gen::EDX);
void dsp_reg_load_stack(int stack_reg, Gen::X64Reg host_dreg); void dsp_reg_load_stack(int stack_reg, Gen::X64Reg host_dreg = Gen::EDX);
void dsp_reg_store_stack_imm(int stack_reg, u16 val); void dsp_reg_store_stack_imm(int stack_reg, u16 val);
void dsp_op_write_reg(int reg, Gen::X64Reg host_sreg); void dsp_op_write_reg(int reg, Gen::X64Reg host_sreg);
void dsp_op_write_reg_imm(int reg, u16 val); void dsp_op_write_reg_imm(int reg, u16 val);
@ -127,10 +127,15 @@ public:
void nx(const UDSPInstruction opc); void nx(const UDSPInstruction opc);
// Branch // Branch
void HandleLoop();
void jcc(const UDSPInstruction opc); void jcc(const UDSPInstruction opc);
void jmprcc(const UDSPInstruction opc); void jmprcc(const UDSPInstruction opc);
void call(const UDSPInstruction opc); void call(const UDSPInstruction opc);
void callr(const UDSPInstruction opc); void callr(const UDSPInstruction opc);
void loop(const UDSPInstruction opc);
void loopi(const UDSPInstruction opc);
void bloop(const UDSPInstruction opc);
void bloopi(const UDSPInstruction opc);
// Load/Store // Load/Store
void srs(const UDSPInstruction opc); void srs(const UDSPInstruction opc);

View File

@ -184,10 +184,10 @@ const DSPOPCTemplate opcodes[] =
{"ILRRN", 0x021c, 0xfefc, DSPInterpreter::ilrrn, NULL, 1, 2, {{P_ACCM, 1, 0, 8, 0x0100}, {P_PRG, 1, 0, 0, 0x0003}}, false, false, false, false, false}, {"ILRRN", 0x021c, 0xfefc, DSPInterpreter::ilrrn, NULL, 1, 2, {{P_ACCM, 1, 0, 8, 0x0100}, {P_PRG, 1, 0, 0, 0x0003}}, false, false, false, false, false},
// LOOPS // LOOPS
{"LOOP", 0x0040, 0xffe0, DSPInterpreter::loop, NULL, 1, 1, {{P_REG, 1, 0, 0, 0x001f}}, false, true, false, true, false}, {"LOOP", 0x0040, 0xffe0, DSPInterpreter::loop, &DSPEmitter::loop, 1, 1, {{P_REG, 1, 0, 0, 0x001f}}, false, true, false, true, false},
{"BLOOP", 0x0060, 0xffe0, DSPInterpreter::bloop, NULL, 2, 2, {{P_REG, 1, 0, 0, 0x001f}, {P_ADDR_I, 2, 1, 0, 0xffff}}, false, true, false, true, false}, {"BLOOP", 0x0060, 0xffe0, DSPInterpreter::bloop, &DSPEmitter::bloop, 2, 2, {{P_REG, 1, 0, 0, 0x001f}, {P_ADDR_I, 2, 1, 0, 0xffff}}, false, true, false, true, false},
{"LOOPI", 0x1000, 0xff00, DSPInterpreter::loopi, NULL, 1, 1, {{P_IMM, 1, 0, 0, 0x00ff}}, false, true, false, true, false}, {"LOOPI", 0x1000, 0xff00, DSPInterpreter::loopi, &DSPEmitter::loopi, 1, 1, {{P_IMM, 1, 0, 0, 0x00ff}}, false, true, false, true, false},
{"BLOOPI", 0x1100, 0xff00, DSPInterpreter::bloopi, NULL, 2, 2, {{P_IMM, 1, 0, 0, 0x00ff}, {P_ADDR_I, 2, 1, 0, 0xffff}}, false, true, false, true, false}, {"BLOOPI", 0x1100, 0xff00, DSPInterpreter::bloopi, &DSPEmitter::bloopi, 2, 2, {{P_IMM, 1, 0, 0, 0x00ff}, {P_ADDR_I, 2, 1, 0, 0xffff}}, false, true, false, true, false},
// load and store value pointed by indexing reg and increment; LRR/SRR variants // load and store value pointed by indexing reg and increment; LRR/SRR variants
{"LRR", 0x1800, 0xff80, DSPInterpreter::lrr, &DSPEmitter::lrr, 1, 2, {{P_REG, 1, 0, 0, 0x001f}, {P_PRG, 1, 0, 5, 0x0060}}, false, false, false, false, false}, {"LRR", 0x1800, 0xff80, DSPInterpreter::lrr, &DSPEmitter::lrr, 1, 2, {{P_REG, 1, 0, 0, 0x001f}, {P_PRG, 1, 0, 5, 0x0060}}, false, false, false, false, false},

View File

@ -142,7 +142,7 @@ void halt(const UDSPInstruction opc)
// instructions. Whenever there is value on stack $st2 and current PC is equal // instructions. Whenever there is value on stack $st2 and current PC is equal
// value at $st2, then value at stack $st3 is decremented. If value is not zero // value at $st2, then value at stack $st3 is decremented. If value is not zero
// then PC is modified with value from call stack $st0. Otherwise values from // then PC is modified with value from call stack $st0. Otherwise values from
// call stack $st0 and both loop stacks $st2 and $st3 are poped and execution // call stack $st0 and both loop stacks $st2 and $st3 are popped and execution
// continues at next opcode. // continues at next opcode.
void HandleLoop() void HandleLoop()
{ {
@ -225,7 +225,7 @@ void loopi(const UDSPInstruction opc)
// specified address addrA inclusive, ie. opcode at addrA is the last opcode // specified address addrA inclusive, ie. opcode at addrA is the last opcode
// included in loop. Counter is pushed on loop stack $st3, end of block address // included in loop. Counter is pushed on loop stack $st3, end of block address
// is pushed on loop stack $st2 and repeat address is pushed on call stack $st0. // is pushed on loop stack $st2 and repeat address is pushed on call stack $st0.
// Up to 4 nested loops is allowed. // Up to 4 nested loops are allowed.
void bloop(const UDSPInstruction opc) void bloop(const UDSPInstruction opc)
{ {
u16 reg = opc & 0x1f; u16 reg = opc & 0x1f;
@ -253,7 +253,7 @@ void bloop(const UDSPInstruction opc)
// address addrA inclusive, ie. opcode at addrA is the last opcode included in // address addrA inclusive, ie. opcode at addrA is the last opcode included in
// loop. Counter is pushed on loop stack $st3, end of block address is pushed // loop. Counter is pushed on loop stack $st3, end of block address is pushed
// on loop stack $st2 and repeat address is pushed on call stack $st0. Up to 4 // on loop stack $st2 and repeat address is pushed on call stack $st0. Up to 4
// nested loops is allowed. // nested loops are allowed.
void bloopi(const UDSPInstruction opc) void bloopi(const UDSPInstruction opc)
{ {
u16 cnt = opc & 0xff; u16 cnt = opc & 0xff;

View File

@ -284,3 +284,216 @@ void DSPEmitter::callr(const UDSPInstruction opc)
{ {
ReJitConditional<r_callr>(opc, *this); ReJitConditional<r_callr>(opc, *this);
} }
// LOOP handling: Loop stack is used to control execution of repeated blocks of
// instructions. Whenever there is value on stack $st2 and current PC is equal
// value at $st2, then value at stack $st3 is decremented. If value is not zero
// then PC is modified with value from call stack $st0. Otherwise values from
// call stack $st0 and both loop stacks $st2 and $st3 are popped and execution
// continues at next opcode.
void DSPEmitter::HandleLoop()
{
#ifdef _M_IX86 // All32
MOVZX(32, 16, EAX, M(&(g_dsp.r[DSP_REG_ST2])));
MOVZX(32, 16, ECX, M(&(g_dsp.r[DSP_REG_ST3])));
#else
MOV(64, R(R11), ImmPtr(&g_dsp.r));
MOVZX(32, 16, EAX, MDisp(R11,DSP_REG_ST2*2));
MOVZX(32, 16, ECX, MDisp(R11,DSP_REG_ST3*2));
#endif
CMP(32, R(RCX), Imm32(0));
FixupBranch rLoopCntG = J_CC(CC_LE, true);
CMP(16, R(RAX), Imm16(compilePC - 1));
FixupBranch rLoopAddrG = J_CC(CC_NE, true);
#ifdef _M_IX86 // All32
SUB(16, M(&(g_dsp.r[DSP_REG_ST3])), Imm16(1));
CMP(16, M(&(g_dsp.r[DSP_REG_ST3])), Imm16(0));
#else
SUB(16, MDisp(R11,DSP_REG_ST3*2), Imm16(1));
CMP(16, MDisp(R11,DSP_REG_ST3*2), Imm16(0));
#endif
FixupBranch loadStack = J_CC(CC_LE, true);
#ifdef _M_IX86 // All32
MOVZX(32, 16, ECX, M(&(g_dsp.r[DSP_REG_ST0])));
MOV(16, M(&g_dsp.pc), R(RCX));
#else
MOVZX(32, 16, RCX, MDisp(R11,DSP_REG_ST0*2));
MOV(64, R(RAX), ImmPtr(&(g_dsp.pc)));
MOV(16, MatR(RAX), R(RCX));
#endif
FixupBranch loopUpdated = J(true);
SetJumpTarget(loadStack);
dsp_reg_load_stack(0);
dsp_reg_load_stack(2);
dsp_reg_load_stack(3);
SetJumpTarget(loopUpdated);
SetJumpTarget(rLoopAddrG);
SetJumpTarget(rLoopCntG);
}
// LOOP $R
// 0000 0000 010r rrrr
// Repeatedly execute following opcode until counter specified by value
// from register $R reaches zero. Each execution decrement counter. Register
// $R remains unchanged. If register $R is set to zero at the beginning of loop
// then looped instruction will not get executed.
// Actually, this instruction simply prepares the loop stacks for the above.
// The looping hardware takes care of the rest.
void DSPEmitter::loop(const UDSPInstruction opc)
{
u16 reg = opc & 0x1f;
// u16 cnt = g_dsp.r[reg];
#ifdef _M_IX86 // All32
MOVZX(32, 16, EDX, M(&(g_dsp.r[reg])));
#else
MOV(64, R(R11), ImmPtr(&g_dsp.r));
MOVZX(32, 16, EDX, MDisp(R11,reg*2));
#endif
u16 loop_pc = compilePC + 1;
CMP(16, R(EDX), Imm16(0));
FixupBranch cnt = J_CC(CC_Z, true);
dsp_reg_store_stack(3);
MOV(16, R(RDX), Imm16(compilePC + 1));
dsp_reg_store_stack(0);
MOV(16, R(RDX), Imm16(loop_pc));
dsp_reg_store_stack(2);
SetJumpTarget(cnt);
#ifdef _M_IX86 // All32
MOV(16, M(&(g_dsp.pc)), Imm16(compilePC + 1));
#else
MOV(64, R(RAX), ImmPtr(&(g_dsp.pc)));
MOV(16, MDisp(RAX,0), Imm16(compilePC + 1));
#endif
}
// LOOPI #I
// 0001 0000 iiii iiii
// Repeatedly execute following opcode until counter specified by
// immediate value I reaches zero. Each execution decrement counter. If
// immediate value I is set to zero at the beginning of loop then looped
// instruction will not get executed.
// Actually, this instruction simply prepares the loop stacks for the above.
// The looping hardware takes care of the rest.
void DSPEmitter::loopi(const UDSPInstruction opc)
{
u16 cnt = opc & 0xff;
u16 loop_pc = compilePC + 1;
if (cnt)
{
MOV(16, R(RDX), Imm16(compilePC + 1));
dsp_reg_store_stack(0);
MOV(16, R(RDX), Imm16(loop_pc));
dsp_reg_store_stack(2);
MOV(16, R(RDX), Imm16(cnt));
dsp_reg_store_stack(3);
#ifdef _M_IX86 // All32
MOV(16, M(&(g_dsp.pc)), Imm16(compilePC + 1));
#else
MOV(64, R(RAX), ImmPtr(&(g_dsp.pc)));
MOV(16, MDisp(RAX,0), Imm16(compilePC + 1));
#endif
}
}
// BLOOP $R, addrA
// 0000 0000 011r rrrr
// aaaa aaaa aaaa aaaa
// Repeatedly execute block of code starting at following opcode until
// counter specified by value from register $R reaches zero. Block ends at
// specified address addrA inclusive, ie. opcode at addrA is the last opcode
// included in loop. Counter is pushed on loop stack $st3, end of block address
// is pushed on loop stack $st2 and repeat address is pushed on call stack $st0.
// Up to 4 nested loops are allowed.
void DSPEmitter::bloop(const UDSPInstruction opc)
{
u16 reg = opc & 0x1f;
// u16 cnt = g_dsp.r[reg];
#ifdef _M_IX86 // All32
MOVZX(32, 16, EDX, M(&(g_dsp.r[reg])));
#else
MOV(64, R(R11), ImmPtr(&g_dsp.r));
MOVZX(32, 16, EDX, MDisp(R11,reg*2));
#endif
u16 loop_pc = dsp_imem_read(compilePC + 1);
CMP(16, R(EDX), Imm16(0));
FixupBranch cnt = J_CC(CC_Z, true);
dsp_reg_store_stack(3);
MOV(16, R(RDX), Imm16(compilePC + 2));
dsp_reg_store_stack(0);
MOV(16, R(RDX), Imm16(loop_pc));
dsp_reg_store_stack(2);
#ifdef _M_IX86 // All32
MOV(16, M(&(g_dsp.pc)), Imm16(compilePC + 2));
#else
MOV(64, R(RAX), ImmPtr(&(g_dsp.pc)));
MOV(16, MDisp(RAX,0), Imm16(compilePC + 2));
#endif
FixupBranch exit = J();
SetJumpTarget(cnt);
// g_dsp.pc = loop_pc;
// dsp_skip_inst();
#ifdef _M_IX86 // All32
MOV(16, M(&g_dsp.pc), Imm16(loop_pc + opTable[loop_pc]->size));
#else
MOV(64, R(RAX), ImmPtr(&(g_dsp.pc)));
MOV(16, MatR(RAX), Imm16(loop_pc + opTable[loop_pc]->size));
#endif
SetJumpTarget(exit);
}
// BLOOPI #I, addrA
// 0001 0001 iiii iiii
// aaaa aaaa aaaa aaaa
// Repeatedly execute block of code starting at following opcode until
// counter specified by immediate value I reaches zero. Block ends at specified
// address addrA inclusive, ie. opcode at addrA is the last opcode included in
// loop. Counter is pushed on loop stack $st3, end of block address is pushed
// on loop stack $st2 and repeat address is pushed on call stack $st0. Up to 4
// nested loops are allowed.
void DSPEmitter::bloopi(const UDSPInstruction opc)
{
u16 cnt = opc & 0xff;
// u16 loop_pc = dsp_fetch_code();
u16 loop_pc = dsp_imem_read(compilePC + 1);
if (cnt)
{
MOV(16, R(RDX), Imm16(compilePC + 2));
dsp_reg_store_stack(0);
MOV(16, R(RDX), Imm16(loop_pc));
dsp_reg_store_stack(2);
MOV(16, R(RDX), Imm16(cnt));
dsp_reg_store_stack(3);
#ifdef _M_IX86 // All32
MOV(16, M(&(g_dsp.pc)), Imm16(compilePC + 2));
#else
MOV(64, R(RAX), ImmPtr(&(g_dsp.pc)));
MOV(16, MDisp(RAX,0), Imm16(compilePC + 2));
#endif
}
else
{
// g_dsp.pc = loop_pc;
// dsp_skip_inst();
#ifdef _M_IX86 // All32
MOV(16, M(&g_dsp.pc), Imm16(loop_pc + opTable[loop_pc]->size));
#else
MOV(64, R(RAX), ImmPtr(&(g_dsp.pc)));
MOV(16, MatR(RAX), Imm16(loop_pc + opTable[loop_pc]->size));
#endif
}
}