Michael Maltese e3531d17d7 Update free DSP ROM and coefficients to support GBA ucode
- coef: Explicitly set 23 different values that are used by GBA UCode,
  and tweaked overall parameters to more closely match those 23 values.
- irom: Moved a few functions to their proper places, updated BootUCode
  to configure DMA transfers using AX registers as well as IX registers
  (the GBA UCode uses this to do two sequential transfers in one call),
  and added partial functions used by GBA UCode.

All functions were reverse-engineered solely based off of observed
effects on the virtual machine: register states before-and-after, dmem
interactions, and DMA transfers. The specific coefficients were observed
being read from dmem, and must be exactly those values to function
properly. I have no knowledge of how the official ROM implements these
functions, or how it is implemented overall.

Tested with The Legend of Zelda: Four Swords Adventures, Final Fantasy
Crystal Chronicles, and Billy Hatcher and the Giant Egg (to download
ChuChu Rocket!).
2017-06-03 15:16:39 -07:00

311 lines
6.7 KiB
Plaintext

IROM_BASE: equ 0x8000
lri $CR, #0x00ff
lri $SR, #0x2000
si @DMBH, #0x8071
si @DMBL, #0xfeed
mainloop:
clr $ACC1
clr $ACC0
call wait_for_cpu_mbox+#IROM_BASE
;mmem-addr
param1:
lr $AC1.M, @CMBL
lri $AC0.M, #0xa001
cmp
jnz param2+#IROM_BASE
call wait_for_cpu_mbox+#IROM_BASE
lr $IX0, @CMBH
lr $IX1, @CMBL
jmp mainloop+#IROM_BASE
;iram-addr
param2:
lri $AC0.M, #0xc002
cmp
jnz param3+#IROM_BASE
call wait_for_cpu_mbox+#IROM_BASE
lr $IX2, @CMBL
jmp mainloop+#IROM_BASE
;iram-length
param3:
lri $AC0.M, #0xa002
cmp
jnz param4+#IROM_BASE
call wait_for_cpu_mbox+#IROM_BASE
lr $IX3, @CMBL
jmp mainloop+#IROM_BASE
;dram-length
param4:
lri $AC0.M, #0xb002
cmp
jnz param5+#IROM_BASE
call wait_for_cpu_mbox+#IROM_BASE
lr $AX0.L, @CMBL
jmp mainloop+#IROM_BASE
;iram-start-addr
param5:
lri $AC0.M, #0xd001
cmp
jnz mainloop+#IROM_BASE
call wait_for_cpu_mbox+#IROM_BASE
lr $AR0, @CMBL
jmp 0x80b5
wait_dma:
lrs $AC0.M, @DSCR
andcf $AC0.M, #0x0004
jlz wait_dma+#IROM_BASE
ret
WARNPC 0x78
ORG 0x78
; called by GBA ucode
wait_for_cpu_mbox:
lrs $AC0.M, @CMBH
andcf $AC0.M, #0x8000
jlnz wait_for_cpu_mbox+#IROM_BASE
ret
WARNPC 0x7e
ORG 0x7e
; called by GBA ucode
wait_for_dsp_mbox:
lrs $AC0.M, @DMBH
andcf $AC0.M, #0x8000
jlz wait_for_dsp_mbox+#IROM_BASE
ret
WARNPC 0x8b
ORG 0x8b
; called by GBA ucode
dram_to_cpu:
srs @DSMAH, $AX0.H
srs @DSMAL, $AX0.L
si @DSCR, #0x1
srs @DSPA, $AX1.H
srs @DSBL, $AX1.L
call wait_dma+#IROM_BASE
ret
WARNPC 0xb5
ORG 0xb5
bootucode:
set16
clr $ACC0
mrr $AC0.M, $AX1.L
andi $AC0.M, #0xffff
jz bootucode_ix+#IROM_BASE
WARNPC 0xbc
ORG 0xbc
; called by GBA ucode
bootucode_ax:
lris $AC0.M, #0
srs @DSCR, $AC0.M
srs @DSMAH, $AX0.H
srs @DSMAL, $AX0.L
srs @DSPA, $AX1.H
srs @DSBL, $AX1.L
call wait_dma+#IROM_BASE
bootucode_ix:
mrr $AC0.M, $IX3
andi $AC0.M, #0xffff
jz bootucode_epilogue+#IROM_BASE
lris $AC0.M, #0x2
srs @DSCR, $AC0.M
sr @DSMAH, $IX0
sr @DSMAL, $IX1
sr @DSPA, $IX2
sr @DSBL, $IX3
call wait_dma+#IROM_BASE
bootucode_epilogue:
clr $ACC1
lr $AC1.M, @DSBL
jmpr $AR0
WARNPC 0xe7
ORG 0xe7
; Args:
; AR0 points to the 32 input 1 samples (s16)
; AR1 points to the volume data (init1, delta1, init2, delta2)
; AR2 points to the already mixed samples for output 1 (s32)
; AR3 points to where the output 1 should be stored (s32)
; IX0 points to the 32 input 2 samples (s16)
; IX1 points to where the output 2 should be stored (s32)
;
; Returns:
; AX0.L is the value of the last sample from input 1
; AX1.H is the value of the last sample from input 2
mix_two_add:
call mix_add+#IROM_BASE
iar $AR1
mrr $AR0, $IX0
mrr $AR2, $IX1
mrr $AR3, $IX1
mrr $IX0, $AX0.L
call mix_add+#IROM_BASE
mrr $AX1.H, $AX0.L
mrr $AX0.L, $IX0
ret
WARNPC 0x1f4
ORG 0x1f4
; used by GBA ucode for joyboot length and is the end of some mixing function
; (for an example of hitting the full function, try running the main menu of
; Metroid Prime using the Nintendo DSP ROM).
sub_81f4:
asr16'ir $ACC1 : $AR1
clr's $ACC0 : @$AR3, $AC1.M ; AC1.M is always #0x0 here.
; necessary both to match register state of official ROM, and for the
; following mul. could also be mrr $AX1.H, $AC0.M (before clearing ACC0).
mrr $AX1.H, $AX0.H
; make the product register match.
mul's $AX1.L, $AX1.H : @$AR3, $AC1.L
ret
WARNPC 0x1f9
ORG 0x1f9
; Args:
; AR0 points to the 32 input samples (s16)
; AR1 points to the volume data (init, delta)
; AR2 points to the already mixed samples (s32)
; AR3 points to where the output should be stored (s32)
;
; Returns:
; AX0.L is the value of the last sample
; AX1.H is the first address after the output
mix_add:
lrri $AX1.L, @$AR1
bloopi #32, ____mix_add_end_loop+#IROM_BASE
lrri $AC0.M, @$AR2
lrri $AC0.L, @$AR2
lsl16 $ACC0
lrri $AX0.H, @$AR0
mulx $AX0.H, $AX1.L
addp $ACC0
asr16 $ACC0
srri @$AR3, $AC0.M
____mix_add_end_loop:
srri @$AR3, $AC0.L
movp $ACC0
mrr $AX0.L, $AC0.M
mrr $AX1.H, $AR3
ret
WARNPC 0x282
ORG 0x282
mix_two_add_ramp:
call mix_add_ramp+#IROM_BASE
mrr $AR0, $IX0
mrr $AR2, $IX1
mrr $IX1, $AX0.L
call mix_add_ramp+#IROM_BASE
mrr $AX1.H, $AX0.L
mrr $AX0.L, $IX1
ret
WARNPC 0x458
ORG 0x458
; used by GBA ucode for joyboot length
sub_8458:
; AC1.L after = AC1.M before + 7. this looks really stupid, but matches
; captured traces and seems to work.
addis $AC1.M, #0x7
asr16 $ACC1
srri @$AR3, $AC1.M ; or just #0x0.
srri @$AR3, $AC1.L
ret
WARNPC 0x45d
ORG 0x45d
mix_add_ramp:
clr $ACC0
clr $ACC1
lrri $AC0.L, @$AR1
lrrd $AC1.L, @$AR1
mrr $IX2, $AR3
bloopi #32, ____mix_add_ramp_end_ramp+#IROM_BASE
srri @$AR3, $AC0.L
____mix_add_ramp_end_ramp:
add $ACC0, $ACC1
srri @$AR1, $AC0.L
iar $AR1
mrr $IX3, $AR1
mrr $AR1, $IX2
mrr $AR3, $AR2
bloopi #32, ____mix_add_ramp_end_loop+#IROM_BASE
lrri $AC0.M, @$AR2
lrri $AC0.L, @$AR2
lsl16 $ACC0
lrri $AX0.H, @$AR0
lrri $AX1.L, @$AR1
mulx $AX0.H, $AX1.L
addp $ACC0
asr16 $ACC0
srri @$AR3, $AC0.M
____mix_add_ramp_end_loop:
srri @$AR3, $AC0.L
movp $ACC0
mrr $AX0.L, $AC0.M
mrr $AX1.H, $AR3
mrr $AR1, $IX3
mrr $AR3, $IX2
ret
WARNPC 0x723
ORG 0x723
; called by GBA ucode
sub_8723:
; in GBA-HLE, the nonce challenge is XOR'd with 0x6f646573, which happens
; to match the values of the AX1.H register across these two calls.
xorr $AC1.M, $AX1.H
; the value of @AR2 is always the same as AC1.M after
srrd @$AR2, $AC1.M
ret
WARNPC 0x809
ORG 0x809
; called by GBA ucode
sub_8809:
; AR2 is the only addressing register that corresponds to the dmem writes
; could be AC1.L or AX0.L in the second call, but can't be AX0.L in the
; third call.
srr @$AR2, $AC1.L
; AC1.M after calling always look like either AC1.M | AC0.M or
; AC1.M | AX0.H. TODO: Why pick AX0.H?
orr $AC1.M, $AX0.H
; the second dmem write is incremented only in calls #3A and #3B. There,
; IX2 is the only register set to 1, and it's specifically set to 1 in the
; ucode. It's set to 0 in the first two calls.
addarn $AR2, $IX2
; obvious
srri @$AR2, $AC1.M
ret
WARNPC 0x8e5
ORG 0x8e5
; used by GBA ucode for challenge nonce, logo palette/speed, and joyboot length
sub_88e5:
dar $AR1 ; always gets decremented, no effect on rest of function
lrri $AC1.M, @$AR2
lrrd $AC1.L, @$AR2
add $ACC0, $ACC1 ; signed addition
orr $AC0.M, $AX0.H
srri @$AR2, $AC0.M
srr @$AR2, $AC0.L
ret
WARNPC 0x1000
ORG 0x1000