Update free DSP ROM and coefficients to support GBA ucode

- coef: Explicitly set 23 different values that are used by GBA UCode,
  and tweaked overall parameters to more closely match those 23 values.
- irom: Moved a few functions to their proper places, updated BootUCode
  to configure DMA transfers using AX registers as well as IX registers
  (the GBA UCode uses this to do two sequential transfers in one call),
  and added partial functions used by GBA UCode.

All functions were reverse-engineered solely based off of observed
effects on the virtual machine: register states before-and-after, dmem
interactions, and DMA transfers. The specific coefficients were observed
being read from dmem, and must be exactly those values to function
properly. I have no knowledge of how the official ROM implements these
functions, or how it is implemented overall.

Tested with The Legend of Zelda: Four Swords Adventures, Final Fantasy
Crystal Chronicles, and Billy Hatcher and the Giant Egg (to download
ChuChu Rocket!).
This commit is contained in:
Michael Maltese 2017-06-03 14:45:52 -07:00
parent 3872437eac
commit e3531d17d7
6 changed files with 185 additions and 28 deletions

Binary file not shown.

Binary file not shown.

View File

@ -41,7 +41,7 @@ static bool VerifyRoms()
u32 hash_drom; // dsp_coef.bin u32 hash_drom; // dsp_coef.bin
}; };
static const std::array<DspRomHashes, 4> known_roms = { static const std::array<DspRomHashes, 5> known_roms = {
{// Official Nintendo ROM {// Official Nintendo ROM
{0x66f334fe, 0xf3b93527}, {0x66f334fe, 0xf3b93527},
@ -53,7 +53,10 @@ static bool VerifyRoms()
{0xd9907f71, 0xb019c2fb}, {0xd9907f71, 0xb019c2fb},
// above with improved resampling coefficients // above with improved resampling coefficients
{0xd9907f71, 0xdb6880c1}}}; {0xd9907f71, 0xdb6880c1},
// above with support for GBA ucode
{0x3aa4a793, 0xa4a575f5}}};
u32 hash_irom = HashAdler32((u8*)g_dsp.irom, DSP_IROM_BYTE_SIZE); u32 hash_irom = HashAdler32((u8*)g_dsp.irom, DSP_IROM_BYTE_SIZE);
u32 hash_drom = HashAdler32((u8*)g_dsp.coef, DSP_COEF_BYTE_SIZE); u32 hash_drom = HashAdler32((u8*)g_dsp.coef, DSP_COEF_BYTE_SIZE);

View File

@ -57,40 +57,79 @@ param5:
lr $AR0, @CMBL lr $AR0, @CMBL
jmp 0x80b5 jmp 0x80b5
wait_for_dsp_mbox:
lrs $AC0.M, @DMBH
andcf $AC0.M, #0x8000
jlz wait_for_dsp_mbox+#IROM_BASE
ret
wait_for_cpu_mbox:
lrs $AC0.M, @CMBH
andcf $AC0.M, #0x8000
jlnz wait_for_cpu_mbox+#IROM_BASE
ret
wait_dma: wait_dma:
lrs $AC0.M, @DSCR lrs $AC0.M, @DSCR
andcf $AC0.M, #0x0004 andcf $AC0.M, #0x0004
jlz wait_dma+#IROM_BASE jlz wait_dma+#IROM_BASE
ret ret
bootucode: WARNPC 0x78
ORG 0x78
; called by GBA ucode
wait_for_cpu_mbox:
lrs $AC0.M, @CMBH
andcf $AC0.M, #0x8000
jlnz wait_for_cpu_mbox+#IROM_BASE
ret
WARNPC 0x7e
ORG 0x7e
; called by GBA ucode
wait_for_dsp_mbox:
lrs $AC0.M, @DMBH
andcf $AC0.M, #0x8000
jlz wait_for_dsp_mbox+#IROM_BASE
ret
WARNPC 0x8b
ORG 0x8b
; called by GBA ucode
dram_to_cpu:
srs @DSMAH, $AX0.H
srs @DSMAL, $AX0.L
si @DSCR, #0x1
srs @DSPA, $AX1.H
srs @DSBL, $AX1.L
call wait_dma+#IROM_BASE
ret
WARNPC 0xb5 WARNPC 0xb5
ORG 0xb5 ORG 0xb5
sub_80b5: bootucode:
set16 set16
clr $ACC1 clr $ACC0
clr $ACC0 mrr $AC0.M, $AX1.L
lris $AC0.M, #0x2 andi $AC0.M, #0xffff
sr @DSCR, $AC0.M jz bootucode_ix+#IROM_BASE
sr @DSMAH, $IX0
sr @DSMAL, $IX1 WARNPC 0xbc
sr @DSPA, $IX2 ORG 0xbc
sr @DSBL, $IX3 ; called by GBA ucode
call wait_dma+#IROM_BASE bootucode_ax:
jmpr $AR0 lris $AC0.M, #0
srs @DSCR, $AC0.M
srs @DSMAH, $AX0.H
srs @DSMAL, $AX0.L
srs @DSPA, $AX1.H
srs @DSBL, $AX1.L
call wait_dma+#IROM_BASE
bootucode_ix:
mrr $AC0.M, $IX3
andi $AC0.M, #0xffff
jz bootucode_epilogue+#IROM_BASE
lris $AC0.M, #0x2
srs @DSCR, $AC0.M
sr @DSMAH, $IX0
sr @DSMAL, $IX1
sr @DSPA, $IX2
sr @DSBL, $IX3
call wait_dma+#IROM_BASE
bootucode_epilogue:
clr $ACC1
lr $AC1.M, @DSBL
jmpr $AR0
WARNPC 0xe7 WARNPC 0xe7
ORG 0xe7 ORG 0xe7
@ -117,6 +156,21 @@ mix_two_add:
mrr $AX0.L, $IX0 mrr $AX0.L, $IX0
ret ret
WARNPC 0x1f4
ORG 0x1f4
; used by GBA ucode for joyboot length and is the end of some mixing function
; (for an example of hitting the full function, try running the main menu of
; Metroid Prime using the Nintendo DSP ROM).
sub_81f4:
asr16'ir $ACC1 : $AR1
clr's $ACC0 : @$AR3, $AC1.M ; AC1.M is always #0x0 here.
; necessary both to match register state of official ROM, and for the
; following mul. could also be mrr $AX1.H, $AC0.M (before clearing ACC0).
mrr $AX1.H, $AX0.H
; make the product register match.
mul's $AX1.L, $AX1.H : @$AR3, $AC1.L
ret
WARNPC 0x1f9 WARNPC 0x1f9
ORG 0x1f9 ORG 0x1f9
; Args: ; Args:
@ -158,6 +212,18 @@ mix_two_add_ramp:
mrr $AX0.L, $IX1 mrr $AX0.L, $IX1
ret ret
WARNPC 0x458
ORG 0x458
; used by GBA ucode for joyboot length
sub_8458:
; AC1.L after = AC1.M before + 7. this looks really stupid, but matches
; captured traces and seems to work.
addis $AC1.M, #0x7
asr16 $ACC1
srri @$AR3, $AC1.M ; or just #0x0.
srri @$AR3, $AC1.L
ret
WARNPC 0x45d WARNPC 0x45d
ORG 0x45d ORG 0x45d
mix_add_ramp: mix_add_ramp:
@ -197,5 +263,48 @@ ____mix_add_ramp_end_loop:
mrr $AR3, $IX2 mrr $AR3, $IX2
ret ret
WARNPC 0x723
ORG 0x723
; called by GBA ucode
sub_8723:
; in GBA-HLE, the nonce challenge is XOR'd with 0x6f646573, which happens
; to match the values of the AX1.H register across these two calls.
xorr $AC1.M, $AX1.H
; the value of @AR2 is always the same as AC1.M after
srrd @$AR2, $AC1.M
ret
WARNPC 0x809
ORG 0x809
; called by GBA ucode
sub_8809:
; AR2 is the only addressing register that corresponds to the dmem writes
; could be AC1.L or AX0.L in the second call, but can't be AX0.L in the
; third call.
srr @$AR2, $AC1.L
; AC1.M after calling always look like either AC1.M | AC0.M or
; AC1.M | AX0.H. TODO: Why pick AX0.H?
orr $AC1.M, $AX0.H
; the second dmem write is incremented only in calls #3A and #3B. There,
; IX2 is the only register set to 1, and it's specifically set to 1 in the
; ucode. It's set to 0 in the first two calls.
addarn $AR2, $IX2
; obvious
srri @$AR2, $AC1.M
ret
WARNPC 0x8e5
ORG 0x8e5
; used by GBA ucode for challenge nonce, logo palette/speed, and joyboot length
sub_88e5:
dar $AR1 ; always gets decremented, no effect on rest of function
lrri $AC1.M, @$AR2
lrrd $AC1.L, @$AR2
add $ACC0, $ACC1 ; signed addition
orr $AC0.M, $AX0.H
srri @$AR2, $AC0.M
srr @$AR2, $AC0.L
ret
WARNPC 0x1000 WARNPC 0x1000
ORG 0x1000 ORG 0x1000

View File

@ -1,3 +1,16 @@
Legal GC/WII DSP IROM replacement (v0.3)
-------------------------------------------------------
- coef: Explicitly set 23 different values that are used by GBA UCode, and
tweaked overall parameters to more closely match those 23 values.
- irom: Moved a few functions to their proper places, updated BootUCode to
configure DMA transfers using AX registers as well as IX registers (the GBA
UCode uses this to do two sequential transfers in one call), and added
partial functions used by GBA UCode.
ligfx
2/june/2017
Legal GC/WII DSP IROM replacement (v0.2.1) Legal GC/WII DSP IROM replacement (v0.2.1)
------------------------------------------------------- -------------------------------------------------------

View File

@ -12,7 +12,7 @@ def convert_coefs(c):
def pack_coefs(short_coefs): def pack_coefs(short_coefs):
return b''.join(pack('>H', c) for c in short_coefs) return b''.join(pack('>H', c) for c in short_coefs)
x = linspace(-2, 2, 512, endpoint=False) x = linspace(-2, 2, 512, endpoint=True)
w1 = hamming(512) w1 = hamming(512)
w2 = kaiser(512, pi * 9/4) w2 = kaiser(512, pi * 9/4)
@ -23,5 +23,37 @@ coef_3 = [sinc(n) for n in x] * w1
short_coefs = convert_coefs(coef_1) + convert_coefs(coef_2) + convert_coefs(coef_3) + [0] * 512 short_coefs = convert_coefs(coef_1) + convert_coefs(coef_2) + convert_coefs(coef_3) + [0] * 512
# needed for GBA ucode
gba_coefs = (
(0x03b, 0x0065),
(0x043, 0x0076),
(0x0ca, 0x3461),
(0x0e2, 0x376f),
(0x1b8, 0x007f),
(0x1b8, 0x007f),
(0x1f8, 0x0009),
(0x1fc, 0x0003),
(0x229, 0x657c),
(0x231, 0x64fc),
(0x259, 0x6143),
(0x285, 0x5aff),
(0x456, 0x102f),
(0x468, 0xf808),
(0x491, 0x6a0f),
(0x5f1, 0x0200),
(0x5f6, 0x7f65),
(0x65b, 0x0000),
(0x66b, 0x0000),
(0x66c, 0x06f2),
(0x6fe, 0x0008),
(0x723, 0xffe0),
(0x766, 0x0273),
)
for (addr, value) in gba_coefs:
old_value = short_coefs[addr]
if old_value != value:
print("At %04x: replacing %04x with %04x (diff. of % #x)" % (addr, old_value, value, value - old_value))
short_coefs[addr] = value
with open('dsp_coef.bin', 'wb') as f: with open('dsp_coef.bin', 'wb') as f:
f.write(pack_coefs(short_coefs)) f.write(pack_coefs(short_coefs))