diff --git a/Source/Plugins/Plugin_DSP_LLE/Src/DSPAnalyzer.cpp b/Source/Plugins/Plugin_DSP_LLE/Src/DSPAnalyzer.cpp index 8376d30ba4..c182fa33c8 100644 --- a/Source/Plugins/Plugin_DSP_LLE/Src/DSPAnalyzer.cpp +++ b/Source/Plugins/Plugin_DSP_LLE/Src/DSPAnalyzer.cpp @@ -36,22 +36,22 @@ u8 code_flags[ISPACE]; // 0xFFFF means ignore. const u16 idle_skip_sigs[NUM_IDLE_SIGS][MAX_IDLE_SIG_SIZE + 1] = { - { 0x26fc, // LRS $30, @DMBH - 0x02c0, 0x8000, // ANDCF $30, #0x8000 - 0x029d, 0xFFFF, // JLZ 0x027a - 0, 0 }, // RET - { 0x27fc, // LRS $31, @DMBH - 0x03c0, 0x8000, // ANDCF $31, #0x8000 - 0x029d, 0xFFFF, // JLZ 0x027a - 0, 0 }, // RET - { 0x26fe, // LRS $30, @CMBH - 0x02c0, 0x8000, // ANDCF $30, #0x8000 - 0x029c, 0xFFFF, // JLNZ 0x0280 - 0, 0 }, // RET - { 0x27fe, // LRS $31, @CMBH - 0x03c0, 0x8000, // ANDCF $31, #0x8000 - 0x029c, 0xFFFF, // JLNZ 0x0280 - 0, 0 }, // RET + { 0x26fc, // LRS $30, @DMBH + 0x02c0, 0x8000, // ANDCF $30, #0x8000 + 0x029d, 0xFFFF, // JLZ 0x027a + 0, 0 }, // RET + { 0x27fc, // LRS $31, @DMBH + 0x03c0, 0x8000, // ANDCF $31, #0x8000 + 0x029d, 0xFFFF, // JLZ 0x027a + 0, 0 }, // RET + { 0x26fe, // LRS $30, @CMBH + 0x02c0, 0x8000, // ANDCF $30, #0x8000 + 0x029c, 0xFFFF, // JLNZ 0x0280 + 0, 0 }, // RET + { 0x27fe, // LRS $31, @CMBH + 0x03c0, 0x8000, // ANDCF $31, #0x8000 + 0x029c, 0xFFFF, // JLNZ 0x0280 + 0, 0 }, // RET }; void Reset() @@ -64,8 +64,8 @@ void AnalyzeRange(int start_addr, int end_addr) // First we run an extremely simplified version of a disassembler to find // where all instructions start. - // This may not be 100% accurate in case of jump tables, but should be good - // enough as a start. + // This may not be 100% accurate in case of jump tables! + // It could get desynced, which would be bad. We'll see if that's an issue. int addr = start_addr; while (addr < end_addr) { @@ -78,6 +78,16 @@ void AnalyzeRange(int start_addr, int end_addr) } code_flags[addr] |= CODE_START_OF_INST; addr += opcode->size; + + // Look for loops. + if ((inst.hex & 0xffe0) == 0x0060 || (inst.hex & 0xff00) == 0x1100) { + // BLOOP, BLOOPI + u16 loop_end = dsp_imem_read(addr + 1); + code_flags[loop_end] |= CODE_LOOP_END; + } else if ((inst.hex & 0xffe0) == 0x0040 || (inst.hex & 0xff00) == 0x1000) { + // LOOP, LOOPI + code_flags[addr + 1] |= CODE_LOOP_END; + } } // Next, we'll scan for potential idle skips. diff --git a/Source/Plugins/Plugin_DSP_LLE/Src/DSPAnalyzer.h b/Source/Plugins/Plugin_DSP_LLE/Src/DSPAnalyzer.h index c7e4c7dfa9..b187eef2a7 100644 --- a/Source/Plugins/Plugin_DSP_LLE/Src/DSPAnalyzer.h +++ b/Source/Plugins/Plugin_DSP_LLE/Src/DSPAnalyzer.h @@ -23,10 +23,16 @@ namespace DSPAnalyzer { #define ISPACE 65536 + + +// Useful things to detect: +// * Loop endpoints - so that we can avoid checking for loops every cycle. + enum { CODE_START_OF_INST = 1, CODE_IDLE_SKIP = 2, + CODE_LOOP_END = 4, }; // Easy to query array covering the whole of instruction memory. diff --git a/Source/Plugins/Plugin_DSP_LLE/Src/disassemble.cpp b/Source/Plugins/Plugin_DSP_LLE/Src/disassemble.cpp index 44d514bdad..52746e246f 100644 --- a/Source/Plugins/Plugin_DSP_LLE/Src/disassemble.cpp +++ b/Source/Plugins/Plugin_DSP_LLE/Src/disassemble.cpp @@ -268,7 +268,7 @@ u16 gd_dis_get_opcode_size(gd_globals_t* gdg) if ((gdg->pc & 0x7fff) >= 0x1000) return 1; - u32 op1 = Common::swap16(gdg->binbuf[gdg->pc & 0x0fff]); + u32 op1 = gdg->binbuf[gdg->pc & 0x0fff]; for (u32 j = 0; j < opcodes_size; j++) { @@ -393,7 +393,7 @@ char* gd_dis_opcode(gd_globals_t* gdg) if ((opc->size & ~P_EXT) == 2) { - op2 = Common::swap16(gdg->binbuf[pc + 1]); + op2 = gdg->binbuf[pc + 1]; if (gdg->show_hex) sprintf(buf, "%04x %04x ", op1, op2); diff --git a/Source/Plugins/Plugin_DSP_LLE/Src/gdsp_aram.cpp b/Source/Plugins/Plugin_DSP_LLE/Src/gdsp_aram.cpp index 61faab6543..ccca9b4954 100644 --- a/Source/Plugins/Plugin_DSP_LLE/Src/gdsp_aram.cpp +++ b/Source/Plugins/Plugin_DSP_LLE/Src/gdsp_aram.cpp @@ -19,12 +19,10 @@ #include "gdsp_interface.h" #include "gdsp_interpreter.h" -extern u16 dsp_swap16(u16 x); - // The hardware adpcm decoder :) -s16 ADPCM_Step(u32& _rSamplePos, u32 _BaseAddress) +s16 ADPCM_Step(u32& _rSamplePos) { - s16* pCoefTable = (s16*)&gdsp_ifx_regs[DSP_COEF_A1_0]; + const s16 *pCoefTable = (const s16 *)&gdsp_ifx_regs[DSP_COEF_A1_0]; if (((_rSamplePos) & 15) == 0) { @@ -66,8 +64,7 @@ s16 ADPCM_Step(u32& _rSamplePos, u32 _BaseAddress) u16 dsp_read_aram() { - // u32 BaseAddress = (gdsp_ifx_regs[DSP_ACSAH] << 16) | gdsp_ifx_regs[DSP_ACSAL]; - u32 EndAddress = (gdsp_ifx_regs[DSP_ACEAH] << 16) | gdsp_ifx_regs[DSP_ACEAL]; + const u32 EndAddress = (gdsp_ifx_regs[DSP_ACEAH] << 16) | gdsp_ifx_regs[DSP_ACEAL]; u32 Address = (gdsp_ifx_regs[DSP_ACCAH] << 16) | gdsp_ifx_regs[DSP_ACCAL]; u16 val; @@ -75,11 +72,11 @@ u16 dsp_read_aram() // lets the "hardware" decode switch (gdsp_ifx_regs[DSP_FORMAT]) { - case 0x00: - val = ADPCM_Step(Address, EndAddress); + case 0x00: // ADPCM audio + val = ADPCM_Step(Address); break; - case 0x0A: + case 0x0A: // 16-bit PCM audio val = (g_dspInitialize.pARAM_Read_U8(Address) << 8) | g_dspInitialize.pARAM_Read_U8(Address + 1); gdsp_ifx_regs[DSP_YN2] = gdsp_ifx_regs[DSP_YN1]; @@ -99,15 +96,18 @@ u16 dsp_read_aram() // check for loop - if (Address > EndAddress) + if (Address >= EndAddress) { + // Set address back to start address. Address = (gdsp_ifx_regs[DSP_ACSAH] << 16) | gdsp_ifx_regs[DSP_ACSAL]; + + // Do we really need both? gdsp_generate_exception(3); gdsp_generate_exception(5); // Somehow, YN1 and YN2 must be initialized with their "loop" values, so yeah, // it seems likely that we should raise an exception to let the DSP program do that, - // at least if DSP_FORMAT == 0x0A. + // at least if DSP_FORMAT == 0x0A. } gdsp_ifx_regs[DSP_ACCAH] = Address >> 16; diff --git a/Source/Plugins/Plugin_DSP_LLE/Src/gdsp_interface.cpp b/Source/Plugins/Plugin_DSP_LLE/Src/gdsp_interface.cpp index 67d8391a62..3a12dcae9e 100644 --- a/Source/Plugins/Plugin_DSP_LLE/Src/gdsp_interface.cpp +++ b/Source/Plugins/Plugin_DSP_LLE/Src/gdsp_interface.cpp @@ -103,11 +103,10 @@ u16 gdsp_mbox_read_h(u8 mbx) u16 gdsp_mbox_read_l(u8 mbx) { - u16 val; if (g_dspInitialize.bOnThread) g_CriticalSection.Enter(); - val = gdsp_mbox[mbx][1]; + u16 val = gdsp_mbox[mbx][1]; gdsp_mbox[mbx][0] &= ~0x8000; DEBUG_LOG(DSPLLE, "- DSP reads mail from mbx %i: %08x (pc=0x%04x)", mbx, gdsp_mbox_peek(mbx), g_dsp.pc); @@ -198,7 +197,7 @@ u16 gdsp_ifx_read(u16 addr) break; } - return(val); + return val; } @@ -208,9 +207,9 @@ void gdsp_idma_in(u16 dsp_addr, u32 addr, u32 size) u8* dst = ((u8*)g_dsp.iram); for (u32 i = 0; i < size; i += 2) - { + { // TODO : this may be different on Wii. - *(u16*)&dst[dsp_addr + i] = Common::swap16(*(u16*)&g_dsp.cpu_ram[(addr + i) & 0x0fffffff]); + *(u16*)&dst[dsp_addr + i] = Common::swap16(*(const u16*)&g_dsp.cpu_ram[(addr + i) & 0x0fffffff]); } WriteProtectMemory(g_dsp.iram, DSP_IRAM_BYTE_SIZE, false); @@ -241,7 +240,7 @@ void gdsp_ddma_in(u16 dsp_addr, u32 addr, u32 size) for (u32 i = 0; i < size; i += 2) { - *(u16*)&dst[dsp_addr + i] = Common::swap16(*(u16*)&g_dsp.cpu_ram[(addr + i) & 0x7FFFFFFF]); + *(u16*)&dst[dsp_addr + i] = Common::swap16(*(const u16*)&g_dsp.cpu_ram[(addr + i) & 0x7FFFFFFF]); } INFO_LOG(DSPLLE, "*** ddma_in RAM (0x%08x) -> DRAM_DSP (0x%04x) : size (0x%08x)\n", addr, dsp_addr / 2, size); @@ -256,11 +255,11 @@ void gdsp_ddma_out(u16 dsp_addr, u32 addr, u32 size) return; } - u8* src = ((u8*)g_dsp.dram); + const u8* src = ((const u8*)g_dsp.dram); for (u32 i = 0; i < size; i += 2) { - *(u16*)&g_dsp.cpu_ram[(addr + i) & 0x7FFFFFFF] = Common::swap16(*(u16*)&src[dsp_addr + i]); + *(u16*)&g_dsp.cpu_ram[(addr + i) & 0x7FFFFFFF] = Common::swap16(*(const u16*)&src[dsp_addr + i]); } INFO_LOG(DSPLLE, "*** ddma_out DRAM_DSP (0x%04x) -> RAM (0x%08x) : size (0x%08x)\n", dsp_addr / 2, addr, size); diff --git a/Source/Plugins/Plugin_DSP_LLE/Src/gdsp_interpreter.cpp b/Source/Plugins/Plugin_DSP_LLE/Src/gdsp_interpreter.cpp index cfb68f47e4..2aa91cc184 100644 --- a/Source/Plugins/Plugin_DSP_LLE/Src/gdsp_interpreter.cpp +++ b/Source/Plugins/Plugin_DSP_LLE/Src/gdsp_interpreter.cpp @@ -217,28 +217,47 @@ u16 gdsp_read_cr() return g_dsp.cr; } -void gdsp_step() +void gdsp_check_external_int() { - g_dsp.step_counter++; - -#if PROFILE - g_dsp.err_pc = g_dsp.pc; - - ProfilerAddDelta(g_dsp.err_pc, 1); - if (g_dsp.step_counter == 1) + // check if there is an external interrupt + if (cr_external_int) { - ProfilerInit(); + if (dsp_SR_is_flag_set(FLAG_ENABLE_INTERUPT) && (g_dsp.exception_in_progress_hack == false)) + { + // level 7 is the interrupt exception + gdsp_generate_exception(7); + g_dsp.cr &= ~0x0002; + UpdateCachedCR(); + } } +} - if ((g_dsp.step_counter & 0xFFFFF) == 0) +void gdsp_check_exceptions() +{ + // check exceptions + if ((g_dsp.exceptions != 0) && (!g_dsp.exception_in_progress_hack)) { - ProfilerDump(g_dsp.step_counter); + for (int i = 0; i < 8; i++) + { + if (g_dsp.exceptions & (1 << i)) + { + _assert_msg_(MASTER_LOG, !g_dsp.exception_in_progress_hack, "assert while exception"); + + dsp_reg_store_stack(DSP_STACK_C, g_dsp.pc); + dsp_reg_store_stack(DSP_STACK_D, g_dsp.r[DSP_REG_SR]); + + g_dsp.pc = i * 2; + g_dsp.exceptions &= ~(1 << i); + + g_dsp.exception_in_progress_hack = true; + break; + } + } } -#endif - - u16 opc = dsp_fetch_code(); - ExecuteInstruction(UDSPInstruction(opc)); +} +void gdsp_handle_loop() +{ // Handle looping hardware. u16& rLoopCounter = g_dsp.r[DSP_REG_ST3]; if (rLoopCounter > 0) @@ -263,39 +282,33 @@ void gdsp_step() } } } +} - // check if there is an external interrupt - if (cr_external_int) +void gdsp_step() +{ + gdsp_check_exceptions(); + + g_dsp.step_counter++; + +#if PROFILE + g_dsp.err_pc = g_dsp.pc; + + ProfilerAddDelta(g_dsp.err_pc, 1); + if (g_dsp.step_counter == 1) { - if (dsp_SR_is_flag_set(FLAG_ENABLE_INTERUPT) && (g_dsp.exception_in_progress_hack == false)) - { - // level 7 is the interrupt exception - gdsp_generate_exception(7); - g_dsp.cr &= ~0x0002; - UpdateCachedCR(); - } + ProfilerInit(); } - // check exceptions - if ((g_dsp.exceptions != 0) && (!g_dsp.exception_in_progress_hack)) + if ((g_dsp.step_counter & 0xFFFFF) == 0) { - for (int i = 0; i < 8; i++) - { - if (g_dsp.exceptions & (1 << i)) - { - _assert_msg_(MASTER_LOG, !g_dsp.exception_in_progress_hack, "assert while exception"); - - dsp_reg_store_stack(DSP_STACK_C, g_dsp.pc); - dsp_reg_store_stack(DSP_STACK_D, g_dsp.r[DSP_REG_SR]); - - g_dsp.pc = i * 2; - g_dsp.exceptions &= ~(1 << i); - - g_dsp.exception_in_progress_hack = true; - break; - } - } + ProfilerDump(g_dsp.step_counter); } +#endif + + u16 opc = dsp_fetch_code(); + ExecuteInstruction(UDSPInstruction(opc)); + + gdsp_handle_loop(); } // Used by thread mode. @@ -307,8 +320,10 @@ void gdsp_run() // Are we running? if (*g_dspInitialize.pEmulatorState) break; - - gdsp_step(); + + gdsp_check_external_int(); + for (int i = 0; i < 500; i++) + gdsp_step(); if (!gdsp_running) break; @@ -319,6 +334,8 @@ void gdsp_run() // Used by non-thread mode. void gdsp_run_cycles(int cycles) { + gdsp_check_external_int(); + // First, let's run a few cycles with no idle skipping so that things can progress a bit. for (int i = 0; i < 8; i++) { @@ -327,6 +344,7 @@ void gdsp_run_cycles(int cycles) gdsp_step(); cycles--; } + // Next, let's run a few cycles with idle skipping, so that we can skip loops. for (int i = 0; i < 8; i++) { @@ -337,6 +355,7 @@ void gdsp_run_cycles(int cycles) gdsp_step(); cycles--; } + // Now, run the rest of the block without idle skipping. It might trip into a // idle loop and if so we waste some time here. Might be beneficial to slice even further. while (cycles > 0)