From 3d6ff60a963f163b7443211e0f90fb53108d04c3 Mon Sep 17 00:00:00 2001 From: Pokechu22 Date: Sun, 15 Aug 2021 22:29:21 -0700 Subject: [PATCH 1/4] DSPSpy: Handle modified wr0 and cr registers correctly --- Source/DSPSpy/tests/dsp_base.inc | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/Source/DSPSpy/tests/dsp_base.inc b/Source/DSPSpy/tests/dsp_base.inc index c890390314..46441fd674 100644 --- a/Source/DSPSpy/tests/dsp_base.inc +++ b/Source/DSPSpy/tests/dsp_base.inc @@ -173,6 +173,9 @@ send_back: ; first, store $sr so we can modify it sr @(REGS_BASE + 19), $sr set16 + ; Now store $wr0, as it must be 0xffff for srri to work as we expect + sr @(REGS_BASE + 8), $wr0 + lri $wr0, #0xffff ; store registers to reg table sr @REGS_BASE, $ar0 lri $ar0, #(REGS_BASE + 1) @@ -183,7 +186,8 @@ send_back: srri @$ar0, $ix1 srri @$ar0, $ix2 srri @$ar0, $ix3 - srri @$ar0, $wr0 + ; skip $wr0 since we already stored and modified it + iar $ar0 srri @$ar0, $wr1 srri @$ar0, $wr2 srri @$ar0, $wr3 @@ -210,6 +214,9 @@ send_back: srri @$ar0, $ac1.m ; Regs are stored. Prepare DMA. +; $cr must be 0x00ff because the ROM uses lrs and srs with the assumption that +; they will modify hardware registers. + lri $cr, #0x00ff lri $ax0.l, #0x0000 lri $ax1.l, #1 ;(DSP_CR_IMEM | DSP_CR_TO_CPU) lri $ax0.h, #0x200 @@ -246,7 +253,8 @@ dma_copy: lrri $ix1, @$ar0 lrri $ix2, @$ar0 lrri $ix3, @$ar0 - lrri $wr0, @$ar0 + ; leave $wr for later + iar $ar0 lrri $wr1, @$ar0 lrri $wr2, @$ar0 lrri $wr3, @$ar0 @@ -272,6 +280,7 @@ dma_copy: lrri $ac0.m, @$ar0 lrri $ac1.m, @$ar0 lr $ar0, @REGS_BASE + lr $wr0, @(REGS_BASE+8) lr $sr, @(REGS_BASE+19) ret ; from send_back From 858d0675b9d940fbd2062c31aabac34dadd853e8 Mon Sep 17 00:00:00 2001 From: Pokechu22 Date: Sat, 14 Aug 2021 17:26:40 -0700 Subject: [PATCH 2/4] DSPLLE: Handle cr, sr, and prod.h masking --- Source/Core/Core/DSP/DSPCore.h | 2 +- .../Core/DSP/Interpreter/DSPInterpreter.cpp | 9 +-- .../Core/Core/DSP/Jit/x64/DSPJitRegCache.cpp | 53 +++++++++++++++ Source/DSPSpy/tests/reg_mask_test.ds | 65 +++++++++++++++++++ 4 files changed, 124 insertions(+), 5 deletions(-) create mode 100644 Source/DSPSpy/tests/reg_mask_test.ds diff --git a/Source/Core/Core/DSP/DSPCore.h b/Source/Core/Core/DSP/DSPCore.h index 8162dde619..506e3995d4 100644 --- a/Source/Core/Core/DSP/DSPCore.h +++ b/Source/Core/Core/DSP/DSPCore.h @@ -200,7 +200,7 @@ enum : u16 SR_LOGIC_ZERO = 0x0040, SR_OVERFLOW_STICKY = 0x0080, // Set at the same time as 0x2 (under same conditions) - but not cleared the same - SR_100 = 0x0100, // Unknown + SR_100 = 0x0100, // Unknown, always reads back as 0 SR_INT_ENABLE = 0x0200, // Not 100% sure but duddie says so. This should replace the hack, if so. SR_400 = 0x0400, // Unknown SR_EXT_INT_ENABLE = 0x0800, // Appears in zelda - seems to disable external interrupts diff --git a/Source/Core/Core/DSP/Interpreter/DSPInterpreter.cpp b/Source/Core/Core/DSP/Interpreter/DSPInterpreter.cpp index 5d8fd7e32b..3d1e7216ce 100644 --- a/Source/Core/Core/DSP/Interpreter/DSPInterpreter.cpp +++ b/Source/Core/Core/DSP/Interpreter/DSPInterpreter.cpp @@ -687,7 +687,7 @@ void Interpreter::OpWriteRegister(int reg_, u16 val) switch (reg) { - // 8-bit sign extended registers. Should look at prod.h too... + // 8-bit sign extended registers. case DSP_REG_ACH0: case DSP_REG_ACH1: // sign extend from the bottom 8 bits. @@ -720,10 +720,10 @@ void Interpreter::OpWriteRegister(int reg_, u16 val) state.r.wr[reg - DSP_REG_WR0] = val; break; case DSP_REG_CR: - state.r.cr = val; + state.r.cr = val & 0x00ff; break; case DSP_REG_SR: - state.r.sr = val; + state.r.sr = val & ~SR_100; break; case DSP_REG_PRODL: state.r.prod.l = val; @@ -732,7 +732,8 @@ void Interpreter::OpWriteRegister(int reg_, u16 val) state.r.prod.m = val; break; case DSP_REG_PRODH: - state.r.prod.h = val; + // Unlike ac0.h and ac1.h, prod.h is not sign-extended + state.r.prod.h = val & 0x00ff; break; case DSP_REG_PRODM2: state.r.prod.m2 = val; diff --git a/Source/Core/Core/DSP/Jit/x64/DSPJitRegCache.cpp b/Source/Core/Core/DSP/Jit/x64/DSPJitRegCache.cpp index d6ffde8d00..1d3b2768de 100644 --- a/Source/Core/Core/DSP/Jit/x64/DSPJitRegCache.cpp +++ b/Source/Core/Core/DSP/Jit/x64/DSPJitRegCache.cpp @@ -751,6 +751,7 @@ void DSPJitRegCache::PutReg(int reg, bool dirty) else if (oparg.IsImm()) { // TODO: Immediates? + ASSERT(false); } else { @@ -772,6 +773,58 @@ void DSPJitRegCache::PutReg(int reg, bool dirty) m_emitter.SAR(64, oparg, Imm8(64 - 40)); } break; + case DSP_REG_CR: + case DSP_REG_PRODH: + if (dirty) + { + if (oparg.IsSimpleReg()) + { + // register is already shifted correctly + // (if at all) + + // Zero extend from the bottom 8 bits. + m_emitter.MOVZX(16, 8, oparg.GetSimpleReg(), oparg); + } + else if (oparg.IsImm()) + { + // TODO: Immediates? + ASSERT(false); + } + else + { + // This works on the memory, so use reg instead + // of real_reg, since it has the right loc + X64Reg tmp = GetFreeXReg(); + // Zero extend from the bottom 8 bits. + m_emitter.MOVZX(16, 8, tmp, m_regs[reg].loc); + m_emitter.MOV(16, m_regs[reg].loc, R(tmp)); + PutXReg(tmp); + } + } + break; + case DSP_REG_SR: + if (dirty) + { + if (oparg.IsSimpleReg()) + { + // register is already shifted correctly + // (if at all) + + // Clear SR_100, which always reads back as 0 + m_emitter.AND(16, R(oparg.GetSimpleReg()), Gen::Imm16(~SR_100)); + } + else if (oparg.IsImm()) + { + // TODO: Immediates? + ASSERT(false); + } + else + { + // Clear SR_100, which always reads back as 0 + m_emitter.AND(16, m_regs[reg].loc, Gen::Imm16(~SR_100)); + } + } + break; default: break; } diff --git a/Source/DSPSpy/tests/reg_mask_test.ds b/Source/DSPSpy/tests/reg_mask_test.ds new file mode 100644 index 0000000000..3bc0e9068b --- /dev/null +++ b/Source/DSPSpy/tests/reg_mask_test.ds @@ -0,0 +1,65 @@ +incdir "tests" +include "dsp_base.inc" + +; Test what happens various values are written to every register + LRI $ar0, #0xffff + CALL set_all_regs + CALL send_back + + LRI $ar0, #0x0000 + CALL set_all_regs + CALL send_back + + LRI $ar0, #0x007f + CALL set_all_regs + CALL send_back + + LRI $ar0, #0x0080 + CALL set_all_regs + CALL send_back + + LRI $ar0, #0x0100 + CALL set_all_regs + CALL send_back + +; We're done, DO NOT DELETE THIS LINE + JMP end_of_test + +; Copy $ar0 to all other registers +set_all_regs: + SET16 + MRR $ar1, $ar0 + MRR $ar2, $ar0 + MRR $ar3, $ar0 + MRR $ix0, $ar0 + MRR $ix1, $ar0 + MRR $ix2, $ar0 + MRR $ix3, $ar0 + MRR $wr0, $ar0 + MRR $wr1, $ar0 + MRR $wr2, $ar0 + MRR $wr3, $ar0 + ; Don't write to the stacks; returning from this function breaks otherwise + ; They don't show up in DSPSpy anyways + ;MRR $st0, $ar0 + ;MRR $st1, $ar0 + ;MRR $st2, $ar0 + ;MRR $st3, $ar0 + MRR $ac0.h, $ar0 + MRR $ac1.h, $ar0 + MRR $cr, $ar0 + ; Wait to set $sr, as it can change the way stores work + MRR $prod.l, $ar0 + MRR $prod.m1, $ar0 + MRR $prod.h, $ar0 + MRR $prod.m2, $ar0 + MRR $ax0.l, $ar0 + MRR $ax1.l, $ar0 + MRR $ax0.h, $ar0 + MRR $ax1.h, $ar0 + MRR $ac0.l, $ar0 + MRR $ac1.l, $ar0 + MRR $ac0.m, $ar0 + MRR $ac1.m, $ar0 + MRR $sr, $ar0 + RET From 3b4bc9852fa19a66188ce2015ba8b1ab008dd9b3 Mon Sep 17 00:00:00 2001 From: Pokechu22 Date: Sun, 15 Aug 2021 18:18:43 -0700 Subject: [PATCH 3/4] DSPInterpreter: Fix sign extension of accumulators The extension needs to happen in SetLongAcc, not GetLongAcc, as the extension needs to always be reflected in acS.h. There is no functional difference with the write handler for acS.h, but it is more readable than 4 casts in a row. --- Source/Core/Core/DSP/Interpreter/DSPInterpreter.cpp | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/Source/Core/Core/DSP/Interpreter/DSPInterpreter.cpp b/Source/Core/Core/DSP/Interpreter/DSPInterpreter.cpp index 3d1e7216ce..1fd3aa3ff3 100644 --- a/Source/Core/Core/DSP/Interpreter/DSPInterpreter.cpp +++ b/Source/Core/Core/DSP/Interpreter/DSPInterpreter.cpp @@ -394,13 +394,14 @@ s16 Interpreter::GetAXHigh(s32 reg) const s64 Interpreter::GetLongAcc(s32 reg) const { const auto& state = m_dsp_core.DSPState(); - return static_cast(state.r.ac[reg].val << 24) >> 24; + return static_cast(state.r.ac[reg].val); } void Interpreter::SetLongAcc(s32 reg, s64 value) { auto& state = m_dsp_core.DSPState(); - state.r.ac[reg].val = static_cast(value); + // 40-bit sign extension + state.r.ac[reg].val = static_cast((value << (64 - 40)) >> (64 - 40)); } s16 Interpreter::GetAccLow(s32 reg) const @@ -690,8 +691,8 @@ void Interpreter::OpWriteRegister(int reg_, u16 val) // 8-bit sign extended registers. case DSP_REG_ACH0: case DSP_REG_ACH1: - // sign extend from the bottom 8 bits. - state.r.ac[reg - DSP_REG_ACH0].h = (u16)(s16)(s8)(u8)val; + // Sign extend from the bottom 8 bits. + state.r.ac[reg - DSP_REG_ACH0].h = static_cast(val); break; // Stack registers. From 439bf1597b0c3c067c9aef40a65118550ff7a7bd Mon Sep 17 00:00:00 2001 From: Pokechu22 Date: Mon, 16 Aug 2021 13:46:37 -0700 Subject: [PATCH 4/4] DSPJitUtil: Remove redundant handling of ac0.h and ac1.h m_gpr.WriteReg calls PutReg which already handles the sign extension. --- Source/Core/Core/DSP/Jit/x64/DSPJitUtil.cpp | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/Source/Core/Core/DSP/Jit/x64/DSPJitUtil.cpp b/Source/Core/Core/DSP/Jit/x64/DSPJitUtil.cpp index 896fd30d40..4957947b40 100644 --- a/Source/Core/Core/DSP/Jit/x64/DSPJitUtil.cpp +++ b/Source/Core/Core/DSP/Jit/x64/DSPJitUtil.cpp @@ -109,12 +109,6 @@ void DSPEmitter::dsp_op_write_reg(int reg, Gen::X64Reg host_sreg) { switch (reg & 0x1f) { - // 8-bit sign extended registers. - case DSP_REG_ACH0: - case DSP_REG_ACH1: - m_gpr.WriteReg(reg, R(host_sreg)); - break; - // Stack registers. case DSP_REG_ST0: case DSP_REG_ST1: @@ -133,11 +127,6 @@ void DSPEmitter::dsp_op_write_reg_imm(int reg, u16 val) { switch (reg & 0x1f) { - // 8-bit sign extended registers. Should look at prod.h too... - case DSP_REG_ACH0: - case DSP_REG_ACH1: - m_gpr.WriteReg(reg, Imm16((u16)(s16)(s8)(u8)val)); - break; // Stack registers. case DSP_REG_ST0: case DSP_REG_ST1: