mirror of
https://github.com/dolphin-emu/dolphin.git
synced 2025-01-15 18:49:11 +01:00
e3531d17d7
- coef: Explicitly set 23 different values that are used by GBA UCode, and tweaked overall parameters to more closely match those 23 values. - irom: Moved a few functions to their proper places, updated BootUCode to configure DMA transfers using AX registers as well as IX registers (the GBA UCode uses this to do two sequential transfers in one call), and added partial functions used by GBA UCode. All functions were reverse-engineered solely based off of observed effects on the virtual machine: register states before-and-after, dmem interactions, and DMA transfers. The specific coefficients were observed being read from dmem, and must be exactly those values to function properly. I have no knowledge of how the official ROM implements these functions, or how it is implemented overall. Tested with The Legend of Zelda: Four Swords Adventures, Final Fantasy Crystal Chronicles, and Billy Hatcher and the Giant Egg (to download ChuChu Rocket!).
311 lines
6.7 KiB
Plaintext
311 lines
6.7 KiB
Plaintext
IROM_BASE: equ 0x8000
|
|
|
|
lri $CR, #0x00ff
|
|
lri $SR, #0x2000
|
|
si @DMBH, #0x8071
|
|
si @DMBL, #0xfeed
|
|
|
|
mainloop:
|
|
clr $ACC1
|
|
clr $ACC0
|
|
call wait_for_cpu_mbox+#IROM_BASE
|
|
|
|
;mmem-addr
|
|
param1:
|
|
lr $AC1.M, @CMBL
|
|
lri $AC0.M, #0xa001
|
|
cmp
|
|
jnz param2+#IROM_BASE
|
|
call wait_for_cpu_mbox+#IROM_BASE
|
|
lr $IX0, @CMBH
|
|
lr $IX1, @CMBL
|
|
jmp mainloop+#IROM_BASE
|
|
|
|
;iram-addr
|
|
param2:
|
|
lri $AC0.M, #0xc002
|
|
cmp
|
|
jnz param3+#IROM_BASE
|
|
call wait_for_cpu_mbox+#IROM_BASE
|
|
lr $IX2, @CMBL
|
|
jmp mainloop+#IROM_BASE
|
|
|
|
;iram-length
|
|
param3:
|
|
lri $AC0.M, #0xa002
|
|
cmp
|
|
jnz param4+#IROM_BASE
|
|
call wait_for_cpu_mbox+#IROM_BASE
|
|
lr $IX3, @CMBL
|
|
jmp mainloop+#IROM_BASE
|
|
|
|
;dram-length
|
|
param4:
|
|
lri $AC0.M, #0xb002
|
|
cmp
|
|
jnz param5+#IROM_BASE
|
|
call wait_for_cpu_mbox+#IROM_BASE
|
|
lr $AX0.L, @CMBL
|
|
jmp mainloop+#IROM_BASE
|
|
|
|
;iram-start-addr
|
|
param5:
|
|
lri $AC0.M, #0xd001
|
|
cmp
|
|
jnz mainloop+#IROM_BASE
|
|
call wait_for_cpu_mbox+#IROM_BASE
|
|
lr $AR0, @CMBL
|
|
jmp 0x80b5
|
|
|
|
wait_dma:
|
|
lrs $AC0.M, @DSCR
|
|
andcf $AC0.M, #0x0004
|
|
jlz wait_dma+#IROM_BASE
|
|
ret
|
|
|
|
WARNPC 0x78
|
|
ORG 0x78
|
|
; called by GBA ucode
|
|
wait_for_cpu_mbox:
|
|
lrs $AC0.M, @CMBH
|
|
andcf $AC0.M, #0x8000
|
|
jlnz wait_for_cpu_mbox+#IROM_BASE
|
|
ret
|
|
|
|
WARNPC 0x7e
|
|
ORG 0x7e
|
|
; called by GBA ucode
|
|
wait_for_dsp_mbox:
|
|
lrs $AC0.M, @DMBH
|
|
andcf $AC0.M, #0x8000
|
|
jlz wait_for_dsp_mbox+#IROM_BASE
|
|
ret
|
|
|
|
WARNPC 0x8b
|
|
ORG 0x8b
|
|
; called by GBA ucode
|
|
dram_to_cpu:
|
|
srs @DSMAH, $AX0.H
|
|
srs @DSMAL, $AX0.L
|
|
si @DSCR, #0x1
|
|
srs @DSPA, $AX1.H
|
|
srs @DSBL, $AX1.L
|
|
call wait_dma+#IROM_BASE
|
|
ret
|
|
|
|
WARNPC 0xb5
|
|
ORG 0xb5
|
|
bootucode:
|
|
set16
|
|
clr $ACC0
|
|
mrr $AC0.M, $AX1.L
|
|
andi $AC0.M, #0xffff
|
|
jz bootucode_ix+#IROM_BASE
|
|
|
|
WARNPC 0xbc
|
|
ORG 0xbc
|
|
; called by GBA ucode
|
|
bootucode_ax:
|
|
lris $AC0.M, #0
|
|
srs @DSCR, $AC0.M
|
|
srs @DSMAH, $AX0.H
|
|
srs @DSMAL, $AX0.L
|
|
srs @DSPA, $AX1.H
|
|
srs @DSBL, $AX1.L
|
|
call wait_dma+#IROM_BASE
|
|
|
|
bootucode_ix:
|
|
mrr $AC0.M, $IX3
|
|
andi $AC0.M, #0xffff
|
|
jz bootucode_epilogue+#IROM_BASE
|
|
lris $AC0.M, #0x2
|
|
srs @DSCR, $AC0.M
|
|
sr @DSMAH, $IX0
|
|
sr @DSMAL, $IX1
|
|
sr @DSPA, $IX2
|
|
sr @DSBL, $IX3
|
|
call wait_dma+#IROM_BASE
|
|
|
|
bootucode_epilogue:
|
|
clr $ACC1
|
|
lr $AC1.M, @DSBL
|
|
jmpr $AR0
|
|
|
|
WARNPC 0xe7
|
|
ORG 0xe7
|
|
; Args:
|
|
; AR0 points to the 32 input 1 samples (s16)
|
|
; AR1 points to the volume data (init1, delta1, init2, delta2)
|
|
; AR2 points to the already mixed samples for output 1 (s32)
|
|
; AR3 points to where the output 1 should be stored (s32)
|
|
; IX0 points to the 32 input 2 samples (s16)
|
|
; IX1 points to where the output 2 should be stored (s32)
|
|
;
|
|
; Returns:
|
|
; AX0.L is the value of the last sample from input 1
|
|
; AX1.H is the value of the last sample from input 2
|
|
mix_two_add:
|
|
call mix_add+#IROM_BASE
|
|
iar $AR1
|
|
mrr $AR0, $IX0
|
|
mrr $AR2, $IX1
|
|
mrr $AR3, $IX1
|
|
mrr $IX0, $AX0.L
|
|
call mix_add+#IROM_BASE
|
|
mrr $AX1.H, $AX0.L
|
|
mrr $AX0.L, $IX0
|
|
ret
|
|
|
|
WARNPC 0x1f4
|
|
ORG 0x1f4
|
|
; used by GBA ucode for joyboot length and is the end of some mixing function
|
|
; (for an example of hitting the full function, try running the main menu of
|
|
; Metroid Prime using the Nintendo DSP ROM).
|
|
sub_81f4:
|
|
asr16'ir $ACC1 : $AR1
|
|
clr's $ACC0 : @$AR3, $AC1.M ; AC1.M is always #0x0 here.
|
|
; necessary both to match register state of official ROM, and for the
|
|
; following mul. could also be mrr $AX1.H, $AC0.M (before clearing ACC0).
|
|
mrr $AX1.H, $AX0.H
|
|
; make the product register match.
|
|
mul's $AX1.L, $AX1.H : @$AR3, $AC1.L
|
|
ret
|
|
|
|
WARNPC 0x1f9
|
|
ORG 0x1f9
|
|
; Args:
|
|
; AR0 points to the 32 input samples (s16)
|
|
; AR1 points to the volume data (init, delta)
|
|
; AR2 points to the already mixed samples (s32)
|
|
; AR3 points to where the output should be stored (s32)
|
|
;
|
|
; Returns:
|
|
; AX0.L is the value of the last sample
|
|
; AX1.H is the first address after the output
|
|
mix_add:
|
|
lrri $AX1.L, @$AR1
|
|
bloopi #32, ____mix_add_end_loop+#IROM_BASE
|
|
lrri $AC0.M, @$AR2
|
|
lrri $AC0.L, @$AR2
|
|
lsl16 $ACC0
|
|
lrri $AX0.H, @$AR0
|
|
mulx $AX0.H, $AX1.L
|
|
addp $ACC0
|
|
asr16 $ACC0
|
|
srri @$AR3, $AC0.M
|
|
____mix_add_end_loop:
|
|
srri @$AR3, $AC0.L
|
|
movp $ACC0
|
|
mrr $AX0.L, $AC0.M
|
|
mrr $AX1.H, $AR3
|
|
ret
|
|
|
|
WARNPC 0x282
|
|
ORG 0x282
|
|
mix_two_add_ramp:
|
|
call mix_add_ramp+#IROM_BASE
|
|
mrr $AR0, $IX0
|
|
mrr $AR2, $IX1
|
|
mrr $IX1, $AX0.L
|
|
call mix_add_ramp+#IROM_BASE
|
|
mrr $AX1.H, $AX0.L
|
|
mrr $AX0.L, $IX1
|
|
ret
|
|
|
|
WARNPC 0x458
|
|
ORG 0x458
|
|
; used by GBA ucode for joyboot length
|
|
sub_8458:
|
|
; AC1.L after = AC1.M before + 7. this looks really stupid, but matches
|
|
; captured traces and seems to work.
|
|
addis $AC1.M, #0x7
|
|
asr16 $ACC1
|
|
srri @$AR3, $AC1.M ; or just #0x0.
|
|
srri @$AR3, $AC1.L
|
|
ret
|
|
|
|
WARNPC 0x45d
|
|
ORG 0x45d
|
|
mix_add_ramp:
|
|
clr $ACC0
|
|
clr $ACC1
|
|
lrri $AC0.L, @$AR1
|
|
lrrd $AC1.L, @$AR1
|
|
mrr $IX2, $AR3
|
|
|
|
bloopi #32, ____mix_add_ramp_end_ramp+#IROM_BASE
|
|
srri @$AR3, $AC0.L
|
|
____mix_add_ramp_end_ramp:
|
|
add $ACC0, $ACC1
|
|
|
|
srri @$AR1, $AC0.L
|
|
iar $AR1
|
|
mrr $IX3, $AR1
|
|
mrr $AR1, $IX2
|
|
mrr $AR3, $AR2
|
|
|
|
bloopi #32, ____mix_add_ramp_end_loop+#IROM_BASE
|
|
lrri $AC0.M, @$AR2
|
|
lrri $AC0.L, @$AR2
|
|
lsl16 $ACC0
|
|
lrri $AX0.H, @$AR0
|
|
lrri $AX1.L, @$AR1
|
|
mulx $AX0.H, $AX1.L
|
|
addp $ACC0
|
|
asr16 $ACC0
|
|
srri @$AR3, $AC0.M
|
|
____mix_add_ramp_end_loop:
|
|
srri @$AR3, $AC0.L
|
|
movp $ACC0
|
|
mrr $AX0.L, $AC0.M
|
|
mrr $AX1.H, $AR3
|
|
mrr $AR1, $IX3
|
|
mrr $AR3, $IX2
|
|
ret
|
|
|
|
WARNPC 0x723
|
|
ORG 0x723
|
|
; called by GBA ucode
|
|
sub_8723:
|
|
; in GBA-HLE, the nonce challenge is XOR'd with 0x6f646573, which happens
|
|
; to match the values of the AX1.H register across these two calls.
|
|
xorr $AC1.M, $AX1.H
|
|
; the value of @AR2 is always the same as AC1.M after
|
|
srrd @$AR2, $AC1.M
|
|
ret
|
|
|
|
WARNPC 0x809
|
|
ORG 0x809
|
|
; called by GBA ucode
|
|
sub_8809:
|
|
; AR2 is the only addressing register that corresponds to the dmem writes
|
|
; could be AC1.L or AX0.L in the second call, but can't be AX0.L in the
|
|
; third call.
|
|
srr @$AR2, $AC1.L
|
|
; AC1.M after calling always look like either AC1.M | AC0.M or
|
|
; AC1.M | AX0.H. TODO: Why pick AX0.H?
|
|
orr $AC1.M, $AX0.H
|
|
; the second dmem write is incremented only in calls #3A and #3B. There,
|
|
; IX2 is the only register set to 1, and it's specifically set to 1 in the
|
|
; ucode. It's set to 0 in the first two calls.
|
|
addarn $AR2, $IX2
|
|
; obvious
|
|
srri @$AR2, $AC1.M
|
|
ret
|
|
|
|
WARNPC 0x8e5
|
|
ORG 0x8e5
|
|
; used by GBA ucode for challenge nonce, logo palette/speed, and joyboot length
|
|
sub_88e5:
|
|
dar $AR1 ; always gets decremented, no effect on rest of function
|
|
lrri $AC1.M, @$AR2
|
|
lrrd $AC1.L, @$AR2
|
|
add $ACC0, $ACC1 ; signed addition
|
|
orr $AC0.M, $AX0.H
|
|
srri @$AR2, $AC0.M
|
|
srr @$AR2, $AC0.L
|
|
ret
|
|
|
|
WARNPC 0x1000
|
|
ORG 0x1000
|