From 05cdbccc3882ecd5778371a95801951519eed5d7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C3=A9o=20Lam?= Date: Tue, 19 Sep 2017 19:14:08 +0200 Subject: [PATCH 01/11] DSPSpy/Base: Clean up trailing whitespace --- Source/DSPSpy/tests/dsp_base.inc | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/Source/DSPSpy/tests/dsp_base.inc b/Source/DSPSpy/tests/dsp_base.inc index 29c992b1c3..55cff4af8e 100644 --- a/Source/DSPSpy/tests/dsp_base.inc +++ b/Source/DSPSpy/tests/dsp_base.inc @@ -106,7 +106,7 @@ MEM_LO: equ 0x0f7F jmp start_of_test ; This is where we jump when we're done testing, see above. -; We just fall into a loop, playing dead until someone resets the DSP. +; We just fall into a loop, playing dead until someone resets the DSP. end_of_test: nop jmp end_of_test @@ -156,7 +156,7 @@ irq: si @DIRQ, #0x0001 halt ; Through some magic this allows us to properly ack the exception in dspspy ;rti ; allow dumping of ucodes which cause exceptions...probably not safe at all - + ; DMA:s the current state of the registers back to the PowerPC. To do this, ; it must write the contents of all regs to DRAM. ; Unfortunately, this loop uses ar0 so it's best to use AR1 and friends for testing @@ -216,13 +216,13 @@ send_back: dma_copy: mrr $ax0.l, $ac1.m -; Wait for the CPU to send us a mail. +; Wait for the CPU to send us a mail. call 0x807e si @DMBH, #0x8888 si @DMBL, #0xfeeb si @DIRQ, #0x0001 - -; wait for the CPU to recieve our response before we execute the next op + +; wait for the CPU to recieve our response before we execute the next op call 0x8078 andi $ac0.m, #0x7fff lrs $ac1.m, @CMBL @@ -261,14 +261,14 @@ dma_copy: lrri $ac0.m, @$ar0 lrri $ac1.m, @$ar0 lr $ar0, @REGS_BASE - + ret ; from send_back - + ; If you are in set40 mode, use this instead of send_back if you want to stay ; in set40 mode. send_back_40: set16 - call send_back + call send_back set40 ret From e569d3bc4a8d011eb7d93034fc386f9174b397b0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C3=A9o=20Lam?= Date: Tue, 19 Sep 2017 19:16:53 +0200 Subject: [PATCH 02/11] DSPSpy/Base: Handle ACCOV exceptions This allows dspspy to show that an ACCOV happened, and to resume accelerator reads after an ACCOV (by refreshing the YN2 register). --- Source/DSPSpy/tests/dsp_base.inc | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/Source/DSPSpy/tests/dsp_base.inc b/Source/DSPSpy/tests/dsp_base.inc index 55cff4af8e..7fac883311 100644 --- a/Source/DSPSpy/tests/dsp_base.inc +++ b/Source/DSPSpy/tests/dsp_base.inc @@ -138,8 +138,18 @@ irq4: lri $ac0.m, #0x0004 jmp irq irq5: - lri $ac0.m, #0x0005 - jmp irq + lrs $ac0.m, @DMBH + andcf $ac0.m, #0x8000 + jlz irq5 + si @DMBH, #0x8005 + si @DMBL, #0x0000 + si @DIRQ, #0x0001 + lri $ac0.m, #0xbbbb + sr @0xffda, $ac0.m ; pred scale + sr @0xffdb, $ac0.m ; yn1 + lr $ix2, @ARAM + sr @0xffdc, $ac0.m ; yn2 + rti irq6: lri $ac0.m, #0x0006 jmp irq From 99e36cd9d98802ce0eba5d28dbfb99a71fd05dcf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C3=A9o=20Lam?= Date: Tue, 19 Sep 2017 19:13:39 +0200 Subject: [PATCH 03/11] DSPSpy: Print ACCOV mails --- Source/DSPSpy/main_spy.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Source/DSPSpy/main_spy.cpp b/Source/DSPSpy/main_spy.cpp index 10516dc252..5be2598404 100644 --- a/Source/DSPSpy/main_spy.cpp +++ b/Source/DSPSpy/main_spy.cpp @@ -390,6 +390,10 @@ void handle_dsp_mail(void) while (real_dsp.CheckMailTo()) ; } + else if (mail == 0x80050000) + { + CON_PrintRow(4, 25, "ACCOV at step %i", dsp_steps); + } // ROM dumping mails else if (mail == 0x8888c0de) From 7c01127ac6b37728be435a82ae6d7a2939cf81c7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C3=A9o=20Lam?= Date: Tue, 19 Sep 2017 19:10:11 +0200 Subject: [PATCH 04/11] DSPSpy: Add a test for accelerator loop This adds a test ucode that can be used to check the accelerator loop behaviour with various start/end addresses. It's actually more of a test template than a ready to use test. --- Source/DSPSpy/tests/accelerator_loop_test.ds | 64 ++++++++++++++++++++ 1 file changed, 64 insertions(+) create mode 100644 Source/DSPSpy/tests/accelerator_loop_test.ds diff --git a/Source/DSPSpy/tests/accelerator_loop_test.ds b/Source/DSPSpy/tests/accelerator_loop_test.ds new file mode 100644 index 0000000000..20e72919f5 --- /dev/null +++ b/Source/DSPSpy/tests/accelerator_loop_test.ds @@ -0,0 +1,64 @@ +incdir "tests" +include "dsp_base.inc" + +; Test parameters +lri $AC0.M, #0x0000 ; start +lri $AC0.L, #0x0000 ; start +lri $AC1.M, #0x0000 ; end +lri $AC1.L, #0x0011 ; end + +; Reset some registers +lri $AC0.H, #0xffff +sr @0xffda, $AC0.H ; pred scale +sr @0xffdb, $AC0.H ; yn1 +sr @0xffdc, $AC0.H ; yn2 + +; Set the sample format +lri $AC0.H, #0x0 +sr @0xffd1, $AC0.H +; Set the starting and current address +srs @ACSAH, $AC0.M +srs @ACCAH, $AC0.M +srs @ACSAL, $AC0.L +srs @ACCAL, $AC0.L +; Set the ending address +srs @ACEAH, $AC1.M +srs @ACEAL, $AC1.L + +call load_hw_reg_to_regs +call send_back ; check the accelerator regs before a read + +bloopi #40, end_of_loop + lr $IX3, @ARAM + call load_hw_reg_to_regs + call send_back ; after a read +end_of_loop: + nop + +jmp end_of_test + +load_hw_reg_to_regs: + lr $AR0, @0xffd1 ; format + lr $AR1, @0xffd2 ; unknown + lr $AR2, @0xffda ; pred scale + lr $AR3, @0xffdb ; yn1 + lr $IX0, @0xffdc ; yn2 + lr $IX1, @0xffdf ; unknown accelerator register + + lri $AC0.H, #0 + lrs $AC0.M, @ACSAH + lrs $AC0.L, @ACSAL + + lri $AC1.H, #0 + lrs $AC1.M, @ACEAH + lrs $AC1.L, @ACEAL + + lrs $AX0.H, @ACCAH + lrs $AX0.L, @ACCAL + lrs $AX1.H, @ACCAH + lrs $AX1.L, @ACCAL + + lrs $AX1.H, @ACCAH + lrs $AX1.L, @ACCAL + + ret From 017bfcda2be6683af57bedc972e6da2d37534784 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C3=A9o=20Lam?= Date: Thu, 21 Sep 2017 22:09:54 +0200 Subject: [PATCH 05/11] DSP: Fix gdsp_ifx_write to take a u16 value And change the JIT to clear the upper 16 bits when calling the write function to work around bugs in some compilers like clang. --- Source/Core/Core/DSP/DSPHWInterface.cpp | 2 +- Source/Core/Core/DSP/DSPHWInterface.h | 2 +- Source/Core/Core/DSP/Jit/DSPJitUtil.cpp | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Source/Core/Core/DSP/DSPHWInterface.cpp b/Source/Core/Core/DSP/DSPHWInterface.cpp index c965233a8e..0e773b8b4b 100644 --- a/Source/Core/Core/DSP/DSPHWInterface.cpp +++ b/Source/Core/Core/DSP/DSPHWInterface.cpp @@ -98,7 +98,7 @@ u16 gdsp_mbox_read_l(Mailbox mbx) return (u16)value; } -void gdsp_ifx_write(u32 addr, u32 val) +void gdsp_ifx_write(u32 addr, u16 val) { g_dsp_cap->LogIFXWrite(addr, val); diff --git a/Source/Core/Core/DSP/DSPHWInterface.h b/Source/Core/Core/DSP/DSPHWInterface.h index 329851fb9a..8c68e1c834 100644 --- a/Source/Core/Core/DSP/DSPHWInterface.h +++ b/Source/Core/Core/DSP/DSPHWInterface.h @@ -22,6 +22,6 @@ u16 gdsp_mbox_read_h(Mailbox mbx); u16 gdsp_mbox_read_l(Mailbox mbx); void gdsp_ifx_init(); -void gdsp_ifx_write(u32 addr, u32 val); +void gdsp_ifx_write(u32 addr, u16 val); u16 gdsp_ifx_read(u16 addr); } // namespace DSP diff --git a/Source/Core/Core/DSP/Jit/DSPJitUtil.cpp b/Source/Core/Core/DSP/Jit/DSPJitUtil.cpp index 842362c1fb..476637badb 100644 --- a/Source/Core/Core/DSP/Jit/DSPJitUtil.cpp +++ b/Source/Core/Core/DSP/Jit/DSPJitUtil.cpp @@ -530,9 +530,9 @@ void DSPEmitter::dmem_write(X64Reg value) FixupBranch end = J(true); // else if (saddr == 0xf) SetJumpTarget(ifx); - // Does it mean gdsp_ifx_write needs u32 rather than u16? DSPJitRegCache c(m_gpr); X64Reg abisafereg = m_gpr.MakeABICallSafe(value); + MOVZX(32, 16, abisafereg, R(abisafereg)); m_gpr.PushRegs(); ABI_CallFunctionRR(gdsp_ifx_write, EAX, abisafereg); m_gpr.PopRegs(); From 003dba5275c12a883104be2e670aeedfeb92f526 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C3=A9o=20Lam?= Date: Wed, 20 Sep 2017 10:52:57 +0200 Subject: [PATCH 06/11] DSP: Convert accelerator to a C++ class Slightly cleaner, allows DSP accelerator behaviour to be added to both HLE and LLE pretty easily, and makes the accelerator easier to unit test. I chose to include all accelerator state as private members, and to expose state that is accessible via registers with getters/setters. It's more verbose, yes, but it makes it very clear what is part of the accelerator state and what isn't (e.g. coefs). This works quite well for registers, since the accelerator can do whatever it wants internally. For example, the start/end/current addresses are masked -- having a getter/setter makes it easier to enforce the mask. --- Source/Core/Core/DSP/DSPAccelerator.cpp | 161 +++++++++++--------- Source/Core/Core/DSP/DSPAccelerator.h | 49 +++++- Source/Core/Core/DSP/DSPCore.cpp | 12 ++ Source/Core/Core/DSP/DSPCore.h | 4 + Source/Core/Core/DSP/DSPHWInterface.cpp | 75 ++++++--- Source/Core/Core/HW/DSPHLE/UCodes/AXVoice.h | 96 ++++++------ Source/Core/Core/HW/DSPLLE/DSPLLE.cpp | 2 + 7 files changed, 256 insertions(+), 143 deletions(-) diff --git a/Source/Core/Core/DSP/DSPAccelerator.cpp b/Source/Core/Core/DSP/DSPAccelerator.cpp index 1a8cbf2671..c82b62530a 100644 --- a/Source/Core/Core/DSP/DSPAccelerator.cpp +++ b/Source/Core/Core/DSP/DSPAccelerator.cpp @@ -4,76 +4,61 @@ #include "Core/DSP/DSPAccelerator.h" +#include "Common/ChunkFile.h" #include "Common/CommonTypes.h" #include "Common/Logging/Log.h" #include "Common/MathUtil.h" -#include "Core/DSP/DSPCore.h" -#include "Core/DSP/DSPHWInterface.h" -#include "Core/DSP/DSPHost.h" - namespace DSP { -u16 dsp_read_aram_d3() +u16 Accelerator::ReadD3() { - // Zelda ucode reads ARAM through 0xffd3. - const u32 EndAddress = (g_dsp.ifx_regs[DSP_ACEAH] << 16) | g_dsp.ifx_regs[DSP_ACEAL]; - u32 Address = (g_dsp.ifx_regs[DSP_ACCAH] << 16) | g_dsp.ifx_regs[DSP_ACCAL]; u16 val = 0; - switch (g_dsp.ifx_regs[DSP_FORMAT]) + switch (m_sample_format) { case 0x5: // u8 reads - val = Host::ReadHostMemory(Address); - Address++; + val = ReadMemory(m_current_address); + m_current_address++; break; case 0x6: // u16 reads - val = (Host::ReadHostMemory(Address * 2) << 8) | Host::ReadHostMemory(Address * 2 + 1); - Address++; + val = (ReadMemory(m_current_address * 2) << 8) | ReadMemory(m_current_address * 2 + 1); + m_current_address++; break; default: - ERROR_LOG(DSPLLE, "dsp_read_aram_d3() - unknown format 0x%x", g_dsp.ifx_regs[DSP_FORMAT]); + ERROR_LOG(DSPLLE, "dsp_read_aram_d3() - unknown format 0x%x", m_sample_format); break; } - if (Address >= EndAddress) + if (m_current_address >= m_end_address) { // Set address back to start address. (never seen this here!) - Address = (g_dsp.ifx_regs[DSP_ACSAH] << 16) | g_dsp.ifx_regs[DSP_ACSAL]; + m_current_address = m_start_address; } - - g_dsp.ifx_regs[DSP_ACCAH] = Address >> 16; - g_dsp.ifx_regs[DSP_ACCAL] = Address & 0xffff; return val; } -void dsp_write_aram_d3(u16 value) +void Accelerator::WriteD3(u16 value) { // Zelda ucode writes a bunch of zeros to ARAM through d3 during // initialization. Don't know if it ever does it later, too. // Pikmin 2 Wii writes non-stop to 0x10008000-0x1000801f (non-zero values too) // Zelda TP Wii writes non-stop to 0x10000000-0x1000001f (non-zero values too) - u32 Address = (g_dsp.ifx_regs[DSP_ACCAH] << 16) | g_dsp.ifx_regs[DSP_ACCAL]; - switch (g_dsp.ifx_regs[DSP_FORMAT]) + switch (m_sample_format) { case 0xA: // u16 writes - Host::WriteHostMemory(value >> 8, Address * 2); - Host::WriteHostMemory(value & 0xFF, Address * 2 + 1); - Address++; + WriteMemory(m_current_address * 2, value >> 8); + WriteMemory(m_current_address * 2 + 1, value & 0xFF); + m_current_address++; break; default: - ERROR_LOG(DSPLLE, "dsp_write_aram_d3() - unknown format 0x%x", g_dsp.ifx_regs[DSP_FORMAT]); + ERROR_LOG(DSPLLE, "dsp_write_aram_d3() - unknown format 0x%x", m_sample_format); break; } - - g_dsp.ifx_regs[DSP_ACCAH] = Address >> 16; - g_dsp.ifx_regs[DSP_ACCAL] = Address & 0xffff; } -u16 ReadAccelerator(u32 start_address, u32 end_address, u32* current_address, u16 sample_format, - s16* yn1, s16* yn2, u16* pred_scale, s16* coefs, - std::function end_exception) +u16 Accelerator::Read(s16* coefs) { u16 val; u8 step_size_bytes = 0; @@ -84,18 +69,18 @@ u16 ReadAccelerator(u32 start_address, u32 end_address, u32* current_address, u1 // extension and do/do not use ADPCM. It also remains to be figured out // whether there's a difference between the usual accelerator "read // address" and 0xd3. - switch (sample_format) + switch (m_sample_format) { case 0x00: // ADPCM audio { // ADPCM decoding, not much to explain here. - if ((*current_address & 15) == 0) + if ((m_current_address & 15) == 0) { - *pred_scale = Host::ReadHostMemory((*current_address & ~15) >> 1); - *current_address += 2; + m_pred_scale = ReadMemory((m_current_address & ~15) >> 1); + m_current_address += 2; } - switch (end_address & 15) + switch (m_end_address & 15) { case 0: // Tom and Jerry step_size_bytes = 1; @@ -108,45 +93,44 @@ u16 ReadAccelerator(u32 start_address, u32 end_address, u32* current_address, u1 break; } - int scale = 1 << (*pred_scale & 0xF); - int coef_idx = (*pred_scale >> 4) & 0x7; + int scale = 1 << (m_pred_scale & 0xF); + int coef_idx = (m_pred_scale >> 4) & 0x7; s32 coef1 = coefs[coef_idx * 2 + 0]; s32 coef2 = coefs[coef_idx * 2 + 1]; - int temp = (*current_address & 1) ? (Host::ReadHostMemory(*current_address >> 1) & 0xF) : - (Host::ReadHostMemory(*current_address >> 1) >> 4); + int temp = (m_current_address & 1) ? (ReadMemory(m_current_address >> 1) & 0xF) : + (ReadMemory(m_current_address >> 1) >> 4); if (temp >= 8) temp -= 16; - s32 val32 = (scale * temp) + ((0x400 + coef1 * *yn1 + coef2 * *yn2) >> 11); + s32 val32 = (scale * temp) + ((0x400 + coef1 * m_yn1 + coef2 * m_yn2) >> 11); val = static_cast(MathUtil::Clamp(val32, -0x7FFF, 0x7FFF)); - *yn2 = *yn1; - *yn1 = val; - *current_address += 1; + m_yn2 = m_yn1; + m_yn1 = val; + m_current_address += 1; break; } case 0x0A: // 16-bit PCM audio - val = (Host::ReadHostMemory(*current_address * 2) << 8) | - Host::ReadHostMemory(*current_address * 2 + 1); - *yn2 = *yn1; - *yn1 = val; + val = (ReadMemory(m_current_address * 2) << 8) | ReadMemory(m_current_address * 2 + 1); + m_yn2 = m_yn1; + m_yn1 = val; step_size_bytes = 2; - *current_address += 1; + m_current_address += 1; break; case 0x19: // 8-bit PCM audio - val = Host::ReadHostMemory(*current_address) << 8; - *yn2 = *yn1; - *yn1 = val; + val = ReadMemory(m_current_address) << 8; + m_yn2 = m_yn1; + m_yn1 = val; step_size_bytes = 2; - *current_address += 1; + m_current_address += 1; break; default: - ERROR_LOG(DSPLLE, "dsp_read_accelerator() - unknown format 0x%x", g_dsp.ifx_regs[DSP_FORMAT]); + ERROR_LOG(DSPLLE, "dsp_read_accelerator() - unknown format 0x%x", m_sample_format); step_size_bytes = 2; - *current_address += 1; + m_current_address += 1; val = 0; break; } @@ -160,30 +144,63 @@ u16 ReadAccelerator(u32 start_address, u32 end_address, u32* current_address, u1 // Somehow, YN1 and YN2 must be initialized with their "loop" values, // so yeah, it seems likely that we should raise an exception to let // the DSP program do that, at least if DSP_FORMAT == 0x0A. - if (*current_address == (end_address + step_size_bytes - 1)) + if (m_current_address == (m_end_address + step_size_bytes - 1)) { // Set address back to start address. - *current_address = start_address; - end_exception(); + m_current_address = m_start_address; + OnEndException(); } + + SetCurrentAddress(m_current_address); return val; } -u16 dsp_read_accelerator() +void Accelerator::DoState(PointerWrap& p) { - const u32 start_address = (g_dsp.ifx_regs[DSP_ACSAH] << 16) | g_dsp.ifx_regs[DSP_ACSAL]; - const u32 end_address = (g_dsp.ifx_regs[DSP_ACEAH] << 16) | g_dsp.ifx_regs[DSP_ACEAL]; - u32 current_address = (g_dsp.ifx_regs[DSP_ACCAH] << 16) | g_dsp.ifx_regs[DSP_ACCAL]; + p.Do(m_start_address); + p.Do(m_end_address); + p.Do(m_current_address); + p.Do(m_sample_format); + p.Do(m_yn1); + p.Do(m_yn2); + p.Do(m_pred_scale); +} - auto end_address_reached = [] { DSPCore_SetException(EXP_ACCOV); }; - const u16 val = ReadAccelerator( - start_address, end_address, ¤t_address, g_dsp.ifx_regs[DSP_FORMAT], - reinterpret_cast(&g_dsp.ifx_regs[DSP_YN1]), - reinterpret_cast(&g_dsp.ifx_regs[DSP_YN2]), &g_dsp.ifx_regs[DSP_PRED_SCALE], - reinterpret_cast(&g_dsp.ifx_regs[DSP_COEF_A1_0]), end_address_reached); +constexpr u32 START_END_ADDRESS_MASK = 0x3fffffff; +constexpr u32 CURRENT_ADDRESS_MASK = 0xbfffffff; - gdsp_ifx_write(DSP_ACCAH, current_address >> 16); - gdsp_ifx_write(DSP_ACCAL, current_address & 0xffff); - return val; +void Accelerator::SetStartAddress(u32 address) +{ + m_start_address = address & START_END_ADDRESS_MASK; +} + +void Accelerator::SetEndAddress(u32 address) +{ + m_end_address = address & START_END_ADDRESS_MASK; +} + +void Accelerator::SetCurrentAddress(u32 address) +{ + m_current_address = address & CURRENT_ADDRESS_MASK; +} + +void Accelerator::SetSampleFormat(u16 format) +{ + m_sample_format = format; +} + +void Accelerator::SetYn1(s16 yn1) +{ + m_yn1 = yn1; +} + +void Accelerator::SetYn2(s16 yn2) +{ + m_yn2 = yn2; +} + +void Accelerator::SetPredScale(u16 pred_scale) +{ + m_pred_scale = pred_scale; } } // namespace DSP diff --git a/Source/Core/Core/DSP/DSPAccelerator.h b/Source/Core/Core/DSP/DSPAccelerator.h index 6cb7428b88..1c12da3417 100644 --- a/Source/Core/Core/DSP/DSPAccelerator.h +++ b/Source/Core/Core/DSP/DSPAccelerator.h @@ -4,18 +4,51 @@ #pragma once -#include - #include "Common/CommonTypes.h" +class PointerWrap; + namespace DSP { -u16 ReadAccelerator(u32 start_address, u32 end_address, u32* current_address, u16 sample_format, - s16* yn1, s16* yn2, u16* pred_scale, s16* coefs, - std::function end_exception); +class Accelerator +{ +public: + virtual ~Accelerator() = default; -u16 dsp_read_accelerator(); + u16 Read(s16* coefs); + // Zelda ucode reads ARAM through 0xffd3. + u16 ReadD3(); + void WriteD3(u16 value); -u16 dsp_read_aram_d3(); -void dsp_write_aram_d3(u16 value); + u32 GetStartAddress() const { return m_start_address; } + u32 GetEndAddress() const { return m_end_address; } + u32 GetCurrentAddress() const { return m_current_address; } + u16 GetSampleFormat() const { return m_sample_format; } + s16 GetYn1() const { return m_yn1; } + s16 GetYn2() const { return m_yn2; } + u16 GetPredScale() const { return m_pred_scale; } + void SetStartAddress(u32 address); + void SetEndAddress(u32 address); + void SetCurrentAddress(u32 address); + void SetSampleFormat(u16 format); + void SetYn1(s16 yn1); + void SetYn2(s16 yn2); + void SetPredScale(u16 pred_scale); + + void DoState(PointerWrap& p); + +protected: + virtual void OnEndException() = 0; + virtual u8 ReadMemory(u32 address) = 0; + virtual void WriteMemory(u32 address, u8 value) = 0; + + // DSP accelerator registers. + u32 m_start_address = 0; + u32 m_end_address = 0; + u32 m_current_address = 0; + u16 m_sample_format = 0; + s16 m_yn1 = 0; + s16 m_yn2 = 0; + u16 m_pred_scale = 0; +}; } // namespace DSP diff --git a/Source/Core/Core/DSP/DSPCore.cpp b/Source/Core/Core/DSP/DSPCore.cpp index aec6b9dd50..3c92a4be54 100644 --- a/Source/Core/Core/DSP/DSPCore.cpp +++ b/Source/Core/Core/DSP/DSPCore.cpp @@ -15,12 +15,14 @@ #include "Common/MemoryUtil.h" #include "Common/MsgHandler.h" +#include "Core/DSP/DSPAccelerator.h" #include "Core/DSP/DSPAnalyzer.h" #include "Core/DSP/DSPHWInterface.h" #include "Core/DSP/DSPHost.h" #include "Core/DSP/Interpreter/DSPIntUtil.h" #include "Core/DSP/Interpreter/DSPInterpreter.h" #include "Core/DSP/Jit/DSPEmitter.h" +#include "Core/HW/DSP.h" namespace DSP { @@ -111,11 +113,21 @@ static void DSPCore_FreeMemoryPages() g_dsp.irom = g_dsp.iram = g_dsp.dram = g_dsp.coef = nullptr; } +class LLEAccelerator final : public Accelerator +{ +protected: + u8 ReadMemory(u32 address) override { return Host::ReadHostMemory(address); } + void WriteMemory(u32 address, u8 value) override { Host::WriteHostMemory(value, address); } + void OnEndException() override { DSPCore_SetException(EXP_ACCOV); } +}; + bool DSPCore_Init(const DSPInitOptions& opts) { g_dsp.step_counter = 0; g_init_hax = false; + g_dsp.accelerator = std::make_unique(); + g_dsp.irom = static_cast(Common::AllocateMemoryPages(DSP_IROM_BYTE_SIZE)); g_dsp.iram = static_cast(Common::AllocateMemoryPages(DSP_IRAM_BYTE_SIZE)); g_dsp.dram = static_cast(Common::AllocateMemoryPages(DSP_DRAM_BYTE_SIZE)); diff --git a/Source/Core/Core/DSP/DSPCore.h b/Source/Core/Core/DSP/DSPCore.h index 5e4798ccdd..953dd210ad 100644 --- a/Source/Core/Core/DSP/DSPCore.h +++ b/Source/Core/Core/DSP/DSPCore.h @@ -16,6 +16,8 @@ namespace DSP { +class Accelerator; + namespace JIT { namespace x86 @@ -299,6 +301,8 @@ struct SDSP // Accelerator / DMA / other hardware registers. Not GPRs. std::array ifx_regs; + std::unique_ptr accelerator; + // When state saving, all of the above can just be memcpy'd into the save state. // The below needs special handling. u16* iram; diff --git a/Source/Core/Core/DSP/DSPHWInterface.cpp b/Source/Core/Core/DSP/DSPHWInterface.cpp index 0e773b8b4b..09a700117c 100644 --- a/Source/Core/Core/DSP/DSPHWInterface.cpp +++ b/Source/Core/Core/DSP/DSPHWInterface.cpp @@ -136,10 +136,6 @@ void gdsp_ifx_write(u32 addr, u16 val) g_dsp.ifx_regs[DSP_DSBL] = 0; break; - case DSP_ACDATA1: // Accelerator write (Zelda type) - "UnkZelda" - dsp_write_aram_d3(val); - break; - case DSP_GAIN: if (val) { @@ -151,21 +147,45 @@ void gdsp_ifx_write(u32 addr, u16 val) case DSP_DSCR: g_dsp.ifx_regs[addr & 0xFF] = val; break; - /* - case DSP_ACCAL: - dsp_step_accelerator(); - break; - */ - // Masking occurs for the start and end addresses as soon as the registers are written to. case DSP_ACSAH: - case DSP_ACEAH: - g_dsp.ifx_regs[addr & 0xff] = val & 0x3fff; + g_dsp.accelerator->SetStartAddress(val << 16 | + static_cast(g_dsp.accelerator->GetStartAddress())); + break; + case DSP_ACSAL: + g_dsp.accelerator->SetStartAddress( + static_cast(g_dsp.accelerator->GetStartAddress() >> 16) << 16 | val); + break; + case DSP_ACEAH: + g_dsp.accelerator->SetEndAddress(val << 16 | + static_cast(g_dsp.accelerator->GetEndAddress())); + break; + case DSP_ACEAL: + g_dsp.accelerator->SetEndAddress( + static_cast(g_dsp.accelerator->GetEndAddress() >> 16) << 16 | val); break; - - // This also happens for the current address, but with a different mask. case DSP_ACCAH: - g_dsp.ifx_regs[addr & 0xff] = val & 0xbfff; + g_dsp.accelerator->SetCurrentAddress(val << 16 | + static_cast(g_dsp.accelerator->GetCurrentAddress())); + break; + case DSP_ACCAL: + g_dsp.accelerator->SetCurrentAddress( + static_cast(g_dsp.accelerator->GetCurrentAddress() >> 16) << 16 | val); + break; + case DSP_FORMAT: + g_dsp.accelerator->SetSampleFormat(val); + break; + case DSP_YN1: + g_dsp.accelerator->SetYn1(val); + break; + case DSP_YN2: + g_dsp.accelerator->SetYn2(val); + break; + case DSP_PRED_SCALE: + g_dsp.accelerator->SetPredScale(val); + break; + case DSP_ACDATA1: // Accelerator write (Zelda type) - "UnkZelda" + g_dsp.accelerator->WriteD3(val); break; default: @@ -208,11 +228,30 @@ static u16 _gdsp_ifx_read(u16 addr) case DSP_DSCR: return g_dsp.ifx_regs[addr & 0xFF]; + case DSP_ACSAH: + return static_cast(g_dsp.accelerator->GetStartAddress() >> 16); + case DSP_ACSAL: + return static_cast(g_dsp.accelerator->GetStartAddress()); + case DSP_ACEAH: + return static_cast(g_dsp.accelerator->GetEndAddress() >> 16); + case DSP_ACEAL: + return static_cast(g_dsp.accelerator->GetEndAddress()); + case DSP_ACCAH: + return static_cast(g_dsp.accelerator->GetCurrentAddress() >> 16); + case DSP_ACCAL: + return static_cast(g_dsp.accelerator->GetCurrentAddress()); + case DSP_FORMAT: + return g_dsp.accelerator->GetSampleFormat(); + case DSP_YN1: + return g_dsp.accelerator->GetYn1(); + case DSP_YN2: + return g_dsp.accelerator->GetYn2(); + case DSP_PRED_SCALE: + return g_dsp.accelerator->GetPredScale(); case DSP_ACCELERATOR: // ADPCM Accelerator reads - return dsp_read_accelerator(); - + return g_dsp.accelerator->Read(reinterpret_cast(&g_dsp.ifx_regs[DSP_COEF_A1_0])); case DSP_ACDATA1: // Accelerator reads (Zelda type) - "UnkZelda" - return dsp_read_aram_d3(); + return g_dsp.accelerator->ReadD3(); default: if ((addr & 0xff) >= 0xa0) diff --git a/Source/Core/Core/HW/DSPHLE/UCodes/AXVoice.h b/Source/Core/Core/HW/DSPHLE/UCodes/AXVoice.h index 059ae46ee9..29e43dd49f 100644 --- a/Source/Core/Core/HW/DSPHLE/UCodes/AXVoice.h +++ b/Source/Core/Core/HW/DSPHLE/UCodes/AXVoice.h @@ -13,6 +13,7 @@ #endif #include +#include #include "Common/CommonTypes.h" #include "Common/MathUtil.h" @@ -163,57 +164,33 @@ void DumpPB(const PB_TYPE& pb) #endif // Simulated accelerator state. -static u32 acc_loop_addr, acc_end_addr; -static u32* acc_cur_addr; static PB_TYPE* acc_pb; static bool acc_end_reached; -// Sets up the simulated accelerator. -void AcceleratorSetup(PB_TYPE* pb, u32* cur_addr) +class HLEAccelerator final : public Accelerator { - acc_pb = pb; - // Masking occurs for the start and end addresses as soon as the registers are written to. - acc_loop_addr = HILO_TO_32(pb->audio_addr.loop_addr) & 0x3fffffff; - acc_end_addr = HILO_TO_32(pb->audio_addr.end_addr) & 0x3fffffff; - acc_cur_addr = cur_addr; - // It also happens for the current address, but with a different mask. - *acc_cur_addr &= 0xbfffffff; - acc_end_reached = false; -} - -// Reads a sample from the accelerator. Also handles looping and -// disabling streams that reached the end (this is done by an exception raised -// by the accelerator on real hardware). -u16 AcceleratorGetSample() -{ - // See below for explanations about acc_end_reached. - if (acc_end_reached) - return 0; - - auto end_address_reached = [] { - // loop back to loop_addr. - *acc_cur_addr = acc_loop_addr; - +protected: + void OnEndException() override + { if (acc_pb->audio_addr.looping) { // Set the ADPCM info to continue processing at loop_addr. - // - // For some reason, yn1 and yn2 aren't set if the voice is not of - // stream type. This is what the AX UCode does and I don't really - // know why. - acc_pb->adpcm.pred_scale = acc_pb->adpcm_loop_info.pred_scale; + SetPredScale(acc_pb->adpcm_loop_info.pred_scale); if (!acc_pb->is_stream) { - acc_pb->adpcm.yn1 = acc_pb->adpcm_loop_info.yn1; - acc_pb->adpcm.yn2 = acc_pb->adpcm_loop_info.yn2; + SetYn1(acc_pb->adpcm_loop_info.yn1); + SetYn2(acc_pb->adpcm_loop_info.yn2); } -#ifdef AX_GC else { + // Refresh YN1 and YN2. This indirectly causes the accelerator to resume reads. + SetYn1(GetYn1()); + SetYn2(GetYn2()); +#ifdef AX_GC // If we're streaming, increment the loop counter. acc_pb->loop_counter++; - } #endif + } } else { @@ -229,11 +206,38 @@ u16 AcceleratorGetSample() acc_end_reached = true; #endif } - }; + } - return ReadAccelerator(acc_loop_addr, acc_end_addr, acc_cur_addr, - acc_pb->audio_addr.sample_format, &acc_pb->adpcm.yn1, &acc_pb->adpcm.yn2, - &acc_pb->adpcm.pred_scale, acc_pb->adpcm.coefs, end_address_reached); + u8 ReadMemory(u32 address) override { return ReadARAM(address); } + void WriteMemory(u32 address, u8 value) override { WriteARAM(value, address); } +}; + +static std::unique_ptr s_accelerator = std::make_unique(); + +// Sets up the simulated accelerator. +void AcceleratorSetup(PB_TYPE* pb) +{ + acc_pb = pb; + s_accelerator->SetStartAddress(HILO_TO_32(pb->audio_addr.loop_addr)); + s_accelerator->SetEndAddress(HILO_TO_32(pb->audio_addr.end_addr)); + s_accelerator->SetCurrentAddress(HILO_TO_32(pb->audio_addr.cur_addr)); + s_accelerator->SetSampleFormat(pb->audio_addr.sample_format); + s_accelerator->SetYn1(pb->adpcm.yn1); + s_accelerator->SetYn2(pb->adpcm.yn2); + s_accelerator->SetPredScale(pb->adpcm.pred_scale); + acc_end_reached = false; +} + +// Reads a sample from the accelerator. Also handles looping and +// disabling streams that reached the end (this is done by an exception raised +// by the accelerator on real hardware). +u16 AcceleratorGetSample() +{ + // See below for explanations about acc_end_reached. + if (acc_end_reached) + return 0; + + return s_accelerator->Read(acc_pb->adpcm.coefs); } // Reads samples from the input callback, resamples them to samples at @@ -375,8 +379,7 @@ u32 ResampleAudio(std::function input_callback, s16* output, u32 count // if required. void GetInputSamples(PB_TYPE& pb, s16* samples, u16 count, const s16* coeffs) { - u32 cur_addr = HILO_TO_32(pb.audio_addr.cur_addr); - AcceleratorSetup(&pb, &cur_addr); + AcceleratorSetup(&pb); if (coeffs) coeffs += pb.coef_select * 0x200; @@ -385,9 +388,12 @@ void GetInputSamples(PB_TYPE& pb, s16* samples, u16 count, const s16* coeffs) pb.src.cur_addr_frac, HILO_TO_32(pb.src.ratio), pb.src_type, coeffs); pb.src.cur_addr_frac = (curr_pos & 0xFFFF); - // Update current position in the PB. - pb.audio_addr.cur_addr_hi = static_cast(cur_addr >> 16) & 0xbfff; - pb.audio_addr.cur_addr_lo = static_cast(cur_addr); + // Update current position, YN1, YN2 and pred scale in the PB. + pb.audio_addr.cur_addr_hi = static_cast(s_accelerator->GetCurrentAddress() >> 16); + pb.audio_addr.cur_addr_lo = static_cast(s_accelerator->GetCurrentAddress()); + pb.adpcm.yn1 = s_accelerator->GetYn1(); + pb.adpcm.yn2 = s_accelerator->GetYn2(); + pb.adpcm.pred_scale = s_accelerator->GetPredScale(); } // Add samples to an output buffer, with optional volume ramping. diff --git a/Source/Core/Core/HW/DSPLLE/DSPLLE.cpp b/Source/Core/Core/HW/DSPLLE/DSPLLE.cpp index cbbb4de6eb..1a55395525 100644 --- a/Source/Core/Core/HW/DSPLLE/DSPLLE.cpp +++ b/Source/Core/Core/HW/DSPLLE/DSPLLE.cpp @@ -17,6 +17,7 @@ #include "Common/Thread.h" #include "Core/ConfigManager.h" #include "Core/Core.h" +#include "Core/DSP/DSPAccelerator.h" #include "Core/DSP/DSPCaptureLogger.h" #include "Core/DSP/DSPCore.h" #include "Core/DSP/DSPHWInterface.h" @@ -71,6 +72,7 @@ void DSPLLE::DoState(PointerWrap& p) p.Do(g_dsp.step_counter); p.DoArray(g_dsp.ifx_regs); + g_dsp.accelerator->DoState(p); p.Do(g_dsp.mbox[0]); p.Do(g_dsp.mbox[1]); Common::UnWriteProtectMemory(g_dsp.iram, DSP_IRAM_BYTE_SIZE, false); From 8310a672b01763c5c783f6afb638bcd23b07005d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C3=A9o=20Lam?= Date: Mon, 18 Sep 2017 23:40:06 +0200 Subject: [PATCH 07/11] DSP: Fix the predscale update logic When the current address is xxxxxxxf, after doing the standard ADPCM decoding and incrementing the current address as usual to get the next address, the DSP will update the predscale register by reading 2 bytes from memory, and add two to get the next address. This means xxxxxx10 cannot be a current address, as the DSP goes from 0f to 12 directly. A more serious issue with the old code is that if the start address is 16-byte aligned, some samples will always be skipped, even when that should not be the case. An easy way to test whether this behaviour is correct is to check the current address register and the predscale after each read. Old code: ... ACCA=00000002, predscale= ACCA=00000003, predscale= ... ACCA=0000000f, predscale= ACCA=00000010, predscale= ACCA=00000013, predscale= ACCA=00000014, predscale= ... New code (and console): ... ACCA=00000002, predscale= ACCA=00000003, predscale= ... ACCA=0000000f, predscale= ACCA=00000012, predscale= ACCA=00000013, predscale= ... --- Source/Core/Core/DSP/DSPAccelerator.cpp | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/Source/Core/Core/DSP/DSPAccelerator.cpp b/Source/Core/Core/DSP/DSPAccelerator.cpp index c82b62530a..a6a2695364 100644 --- a/Source/Core/Core/DSP/DSPAccelerator.cpp +++ b/Source/Core/Core/DSP/DSPAccelerator.cpp @@ -73,13 +73,6 @@ u16 Accelerator::Read(s16* coefs) { case 0x00: // ADPCM audio { - // ADPCM decoding, not much to explain here. - if ((m_current_address & 15) == 0) - { - m_pred_scale = ReadMemory((m_current_address & ~15) >> 1); - m_current_address += 2; - } - switch (m_end_address & 15) { case 0: // Tom and Jerry @@ -111,6 +104,13 @@ u16 Accelerator::Read(s16* coefs) m_yn2 = m_yn1; m_yn1 = val; m_current_address += 1; + + if ((m_current_address & 15) == 0) + { + m_pred_scale = ReadMemory((m_current_address & ~15) >> 1); + m_current_address += 2; + step_size_bytes += 2; + } break; } case 0x0A: // 16-bit PCM audio From bd03f2e46e17df701ed136f700e6508a03ea6a9f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C3=A9o=20Lam?= Date: Tue, 19 Sep 2017 18:16:50 +0200 Subject: [PATCH 08/11] DSP: Fix ACCOV not suspending accelerator reads When an ACCOV is triggered, the accelerator stops reading back anything and updating the current address until the YN2 register is set. This is kept track of internally by the DSP; this state is not exposed via any register. However, we need to emulate this behaviour correctly because some ucodes rely on it (notably AX GC); failure to emulate it will result in reading past the end and start address for non-looped voices. --- Source/Core/Core/DSP/DSPAccelerator.cpp | 6 ++++++ Source/Core/Core/DSP/DSPAccelerator.h | 5 +++++ Source/Core/Core/HW/DSPHLE/UCodes/AXVoice.h | 8 ++++---- Source/Core/Core/State.cpp | 2 +- 4 files changed, 16 insertions(+), 5 deletions(-) diff --git a/Source/Core/Core/DSP/DSPAccelerator.cpp b/Source/Core/Core/DSP/DSPAccelerator.cpp index a6a2695364..6e40d7f930 100644 --- a/Source/Core/Core/DSP/DSPAccelerator.cpp +++ b/Source/Core/Core/DSP/DSPAccelerator.cpp @@ -60,6 +60,9 @@ void Accelerator::WriteD3(u16 value) u16 Accelerator::Read(s16* coefs) { + if (m_reads_stopped) + return 0x0000; + u16 val; u8 step_size_bytes = 0; @@ -148,6 +151,7 @@ u16 Accelerator::Read(s16* coefs) { // Set address back to start address. m_current_address = m_start_address; + m_reads_stopped = true; OnEndException(); } @@ -164,6 +168,7 @@ void Accelerator::DoState(PointerWrap& p) p.Do(m_yn1); p.Do(m_yn2); p.Do(m_pred_scale); + p.Do(m_reads_stopped); } constexpr u32 START_END_ADDRESS_MASK = 0x3fffffff; @@ -197,6 +202,7 @@ void Accelerator::SetYn1(s16 yn1) void Accelerator::SetYn2(s16 yn2) { m_yn2 = yn2; + m_reads_stopped = false; } void Accelerator::SetPredScale(u16 pred_scale) diff --git a/Source/Core/Core/DSP/DSPAccelerator.h b/Source/Core/Core/DSP/DSPAccelerator.h index 1c12da3417..3ee0e9f2b0 100644 --- a/Source/Core/Core/DSP/DSPAccelerator.h +++ b/Source/Core/Core/DSP/DSPAccelerator.h @@ -50,5 +50,10 @@ protected: s16 m_yn1 = 0; s16 m_yn2 = 0; u16 m_pred_scale = 0; + + // When an ACCOV is triggered, the accelerator stops reading back anything + // and updating the current address register, unless the YN2 register is written to. + // This is kept track of internally; this state is not exposed via any register. + bool m_reads_stopped = false; }; } // namespace DSP diff --git a/Source/Core/Core/HW/DSPHLE/UCodes/AXVoice.h b/Source/Core/Core/HW/DSPHLE/UCodes/AXVoice.h index 29e43dd49f..f061848fac 100644 --- a/Source/Core/Core/HW/DSPHLE/UCodes/AXVoice.h +++ b/Source/Core/Core/HW/DSPHLE/UCodes/AXVoice.h @@ -199,10 +199,10 @@ protected: #ifdef AX_WII // One of the few meaningful differences between AXGC and AXWii: - // while AXGC handles non looping voices ending by having 0000 - // samples at the loop address, AXWii has the 0000 samples - // internally in DRAM and use an internal pointer to it (loop addr - // does not contain 0000 samples on AXWii!). + // while AXGC handles non looping voices ending by relying on the + // accelerator to stop reads once the loop address is reached, + // AXWii has the 0000 samples internally in DRAM and use an internal + // pointer to it (loop addr does not contain 0000 samples on AXWii!). acc_end_reached = true; #endif } diff --git a/Source/Core/Core/State.cpp b/Source/Core/Core/State.cpp index 4c22756e53..b17daba6ca 100644 --- a/Source/Core/Core/State.cpp +++ b/Source/Core/Core/State.cpp @@ -74,7 +74,7 @@ static Common::Event g_compressAndDumpStateSyncEvent; static std::thread g_save_thread; // Don't forget to increase this after doing changes on the savestate system -static const u32 STATE_VERSION = 89; // Last changed in PR 5890 +static const u32 STATE_VERSION = 90; // Last changed in PR 6077 // Maps savestate versions to Dolphin versions. // Versions after 42 don't need to be added to this list, From de6e80736445110aa3f1e896fbfd75ac1cadc090 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C3=A9o=20Lam?= Date: Tue, 19 Sep 2017 18:51:02 +0200 Subject: [PATCH 09/11] DSP: Handle two accelerator loop edge cases properly There are two special cases that the DSP accelerator handles in a special way: when the end address is of the form xxxxxxx0 or xxxxxxx1. For these two cases, the normal overflow handling doesn't apply. Instead, the overflow check is different, the ACCOV exception never fires at all, the predscale register is not updated, reads are not suspended, and if the end address is 16-byte aligned, the DSP loops back to start_address + 1 instead of the regular start_address. --- Source/Core/Core/DSP/DSPAccelerator.cpp | 29 +++++++++++++------------ 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/Source/Core/Core/DSP/DSPAccelerator.cpp b/Source/Core/Core/DSP/DSPAccelerator.cpp index 6e40d7f930..1692e192fe 100644 --- a/Source/Core/Core/DSP/DSPAccelerator.cpp +++ b/Source/Core/Core/DSP/DSPAccelerator.cpp @@ -76,19 +76,6 @@ u16 Accelerator::Read(s16* coefs) { case 0x00: // ADPCM audio { - switch (m_end_address & 15) - { - case 0: // Tom and Jerry - step_size_bytes = 1; - break; - case 1: // Blazing Angels - step_size_bytes = 0; - break; - default: - step_size_bytes = 2; - break; - } - int scale = 1 << (m_pred_scale & 0xF); int coef_idx = (m_pred_scale >> 4) & 0x7; @@ -103,12 +90,26 @@ u16 Accelerator::Read(s16* coefs) s32 val32 = (scale * temp) + ((0x400 + coef1 * m_yn1 + coef2 * m_yn2) >> 11); val = static_cast(MathUtil::Clamp(val32, -0x7FFF, 0x7FFF)); + step_size_bytes = 2; m_yn2 = m_yn1; m_yn1 = val; m_current_address += 1; - if ((m_current_address & 15) == 0) + // These two cases are handled in a special way, separate from normal overflow handling: + // the ACCOV exception does not fire at all, the predscale register is not updated, + // and if the end address is 16-byte aligned, the DSP loops to start_address + 1 + // instead of start_address. + if ((m_end_address & 0xf) == 0x0 && m_current_address == m_end_address) + { + m_current_address = m_start_address + 1; + } + else if ((m_end_address & 0xf) == 0x1 && m_current_address == m_end_address - 1) + { + m_current_address = m_start_address; + } + // If any of these special cases were hit, the DSP does not update the predscale register. + else if ((m_current_address & 15) == 0) { m_pred_scale = ReadMemory((m_current_address & ~15) >> 1); m_current_address += 2; From 5dae20ea9d2dfefcc507b3c6da41a31371803340 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C3=A9o=20Lam?= Date: Tue, 19 Sep 2017 21:12:06 +0200 Subject: [PATCH 10/11] UnitTests: Add DSP accelerator tests Includes DSP accelerator tests for basic behaviour, and everything that was fixed by the PR. --- Source/UnitTests/Core/CMakeLists.txt | 3 +- .../UnitTests/Core/DSP/DSPAcceleratorTest.cpp | 187 ++++++++++++++++++ 2 files changed, 189 insertions(+), 1 deletion(-) create mode 100644 Source/UnitTests/Core/DSP/DSPAcceleratorTest.cpp diff --git a/Source/UnitTests/Core/CMakeLists.txt b/Source/UnitTests/Core/CMakeLists.txt index 4df4ee52ff..ba8c228e88 100644 --- a/Source/UnitTests/Core/CMakeLists.txt +++ b/Source/UnitTests/Core/CMakeLists.txt @@ -2,11 +2,12 @@ add_dolphin_test(MMIOTest MMIOTest.cpp) add_dolphin_test(PageFaultTest PageFaultTest.cpp) add_dolphin_test(CoreTimingTest CoreTimingTest.cpp) +add_dolphin_test(DSPAcceleratorTest DSP/DSPAcceleratorTest.cpp) add_dolphin_test(DSPAssemblyTest DSP/DSPAssemblyTest.cpp DSP/DSPTestBinary.cpp DSP/DSPTestText.cpp DSP/HermesBinary.cpp -) +) add_dolphin_test(ESFormatsTest IOS/ES/FormatsTest.cpp IOS/ES/TestBinaryData.cpp) diff --git a/Source/UnitTests/Core/DSP/DSPAcceleratorTest.cpp b/Source/UnitTests/Core/DSP/DSPAcceleratorTest.cpp new file mode 100644 index 0000000000..5cebc103a8 --- /dev/null +++ b/Source/UnitTests/Core/DSP/DSPAcceleratorTest.cpp @@ -0,0 +1,187 @@ +// Copyright 2017 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#include + +#include + +#include "Common/CommonTypes.h" +#include "Core/DSP/DSPAccelerator.h" + +// Simulated DSP accelerator. +class TestAccelerator : public DSP::Accelerator +{ +public: + // For convenience. + u16 TestRead() + { + std::array coefs{}; + m_accov_raised = false; + return Read(coefs.data()); + } + + bool EndExceptionRaised() const { return m_accov_raised; } +protected: + void OnEndException() override + { + EXPECT_TRUE(m_reads_stopped); + m_accov_raised = true; + } + u8 ReadMemory(u32 address) override { return 0; } + void WriteMemory(u32 address, u8 value) override {} + bool m_accov_raised = false; +}; + +TEST(DSPAccelerator, Initialization) +{ + TestAccelerator accelerator; + accelerator.SetCurrentAddress(0x00000000); + accelerator.SetStartAddress(0x00000000); + accelerator.SetEndAddress(0x00001000); + EXPECT_EQ(accelerator.GetStartAddress(), 0x00000000u); + EXPECT_EQ(accelerator.GetCurrentAddress(), 0x00000000u); + EXPECT_EQ(accelerator.GetEndAddress(), 0x00001000u); +} + +TEST(DSPAccelerator, SimpleReads) +{ + TestAccelerator accelerator; + accelerator.SetCurrentAddress(0x00000000); + accelerator.SetStartAddress(0x00000000); + accelerator.SetEndAddress(0x00001000); + + for (size_t i = 1; i <= 0xf; ++i) + { + accelerator.TestRead(); + EXPECT_FALSE(accelerator.EndExceptionRaised()); + EXPECT_EQ(accelerator.GetCurrentAddress(), accelerator.GetStartAddress() + i); + } +} + +TEST(DSPAccelerator, AddressMasking) +{ + TestAccelerator accelerator; + + accelerator.SetCurrentAddress(0x48000000); + accelerator.SetStartAddress(0x48000000); + accelerator.SetEndAddress(0x48001000); + EXPECT_EQ(accelerator.GetStartAddress(), 0x08000000u); + EXPECT_EQ(accelerator.GetCurrentAddress(), 0x08000000u); + EXPECT_EQ(accelerator.GetEndAddress(), 0x08001000u); + + accelerator.SetCurrentAddress(0xffffffff); + accelerator.SetStartAddress(0xffffffff); + accelerator.SetEndAddress(0xffffffff); + EXPECT_EQ(accelerator.GetStartAddress(), 0x3fffffffu); + EXPECT_EQ(accelerator.GetCurrentAddress(), 0xbfffffffu); + EXPECT_EQ(accelerator.GetEndAddress(), 0x3fffffffu); +} + +TEST(DSPAccelerator, PredScaleRegisterMasking) +{ + TestAccelerator accelerator; + + accelerator.SetPredScale(0xbbbb); + EXPECT_EQ(accelerator.GetPredScale(), 0x3bu); + accelerator.SetPredScale(0xcccc); + EXPECT_EQ(accelerator.GetPredScale(), 0x4cu); + accelerator.SetPredScale(0xffff); + EXPECT_EQ(accelerator.GetPredScale(), 0x7fu); +} + +TEST(DSPAccelerator, OverflowBehaviour) +{ + TestAccelerator accelerator; + accelerator.SetCurrentAddress(0x00000000); + accelerator.SetStartAddress(0x00000000); + accelerator.SetEndAddress(0x0000000f); + + for (size_t i = 1; i <= 0xf; ++i) + { + accelerator.TestRead(); + EXPECT_FALSE(accelerator.EndExceptionRaised()); + EXPECT_EQ(accelerator.GetCurrentAddress(), accelerator.GetStartAddress() + i); + } + + accelerator.TestRead(); + EXPECT_TRUE(accelerator.EndExceptionRaised()); + EXPECT_EQ(accelerator.GetCurrentAddress(), accelerator.GetStartAddress()); + + // Since an ACCOV has fired, reads are stopped (until the YN2 register is reset), + // so the current address shouldn't be updated for this read. + accelerator.TestRead(); + EXPECT_EQ(accelerator.GetCurrentAddress(), accelerator.GetStartAddress()); + + // Simulate a write to YN2, which internally resets the "reads stopped" flag. + // After resetting it, reads should work once again. + accelerator.SetYn2(0); + for (size_t i = 1; i <= 0xf; ++i) + { + accelerator.TestRead(); + EXPECT_FALSE(accelerator.EndExceptionRaised()); + EXPECT_EQ(accelerator.GetCurrentAddress(), accelerator.GetStartAddress() + i); + } +} + +TEST(DSPAccelerator, OverflowFor16ByteAlignedAddresses) +{ + TestAccelerator accelerator; + accelerator.SetCurrentAddress(0x00000000); + accelerator.SetStartAddress(0x00000000); + accelerator.SetEndAddress(0x00000010); + + for (size_t i = 1; i <= 0xf; ++i) + { + accelerator.TestRead(); + EXPECT_FALSE(accelerator.EndExceptionRaised()); + EXPECT_EQ(accelerator.GetCurrentAddress(), accelerator.GetStartAddress() + i); + } + + accelerator.TestRead(); + EXPECT_FALSE(accelerator.EndExceptionRaised()); + EXPECT_EQ(accelerator.GetCurrentAddress(), accelerator.GetStartAddress() + 1); + + accelerator.TestRead(); + EXPECT_EQ(accelerator.GetCurrentAddress(), accelerator.GetStartAddress() + 2); +} + +TEST(DSPAccelerator, OverflowForXXXXXXX1Addresses) +{ + TestAccelerator accelerator; + accelerator.SetCurrentAddress(0x00000000); + accelerator.SetStartAddress(0x00000000); + accelerator.SetEndAddress(0x00000011); + + for (size_t i = 1; i <= 0xf; ++i) + { + accelerator.TestRead(); + EXPECT_FALSE(accelerator.EndExceptionRaised()); + EXPECT_EQ(accelerator.GetCurrentAddress(), accelerator.GetStartAddress() + i); + } + + accelerator.TestRead(); + EXPECT_FALSE(accelerator.EndExceptionRaised()); + EXPECT_EQ(accelerator.GetCurrentAddress(), accelerator.GetStartAddress()); + + accelerator.TestRead(); + EXPECT_EQ(accelerator.GetCurrentAddress(), accelerator.GetStartAddress() + 1); +} + +TEST(DSPAccelerator, CurrentAddressSkips) +{ + TestAccelerator accelerator; + accelerator.SetCurrentAddress(0x00000000); + accelerator.SetStartAddress(0x00000000); + accelerator.SetEndAddress(0x00001000); + + for (size_t j = 1; j <= 0xf; ++j) + accelerator.TestRead(); + EXPECT_EQ(accelerator.GetCurrentAddress(), 0x0000000fu); + + accelerator.TestRead(); + EXPECT_EQ(accelerator.GetCurrentAddress(), 0x00000012u); + + accelerator.TestRead(); + EXPECT_EQ(accelerator.GetCurrentAddress(), 0x00000013u); +} From 648477692001fa2809c6509e7e1b5da03e741efe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C3=A9o=20Lam?= Date: Sun, 24 Sep 2017 11:19:45 +0200 Subject: [PATCH 11/11] DSP: Fix a missing mask for the predscale register --- Source/Core/Core/DSP/DSPAccelerator.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Source/Core/Core/DSP/DSPAccelerator.cpp b/Source/Core/Core/DSP/DSPAccelerator.cpp index 1692e192fe..f6822ef2a8 100644 --- a/Source/Core/Core/DSP/DSPAccelerator.cpp +++ b/Source/Core/Core/DSP/DSPAccelerator.cpp @@ -208,6 +208,6 @@ void Accelerator::SetYn2(s16 yn2) void Accelerator::SetPredScale(u16 pred_scale) { - m_pred_scale = pred_scale; + m_pred_scale = pred_scale & 0x7f; } } // namespace DSP