From 72fe795e5668c6ed25c3f5a523f35b8a91f16f98 Mon Sep 17 00:00:00 2001 From: Polprzewodnikowy Date: Sun, 11 Sep 2022 21:05:21 +0200 Subject: [PATCH] faster sd menu loading --- sw/bootloader/src/fatfs/diskio.c | 30 +++++++++++++++++----------- sw/bootloader/src/io.c | 34 ++++++++++++++++++++++++++++++++ sw/bootloader/src/io.h | 4 ++++ 3 files changed, 56 insertions(+), 12 deletions(-) diff --git a/sw/bootloader/src/fatfs/diskio.c b/sw/bootloader/src/fatfs/diskio.c index 848d2e6..c5b2205 100644 --- a/sw/bootloader/src/fatfs/diskio.c +++ b/sw/bootloader/src/fatfs/diskio.c @@ -1,3 +1,4 @@ +#include #include "ff.h" #include "diskio.h" #include "../io.h" @@ -5,7 +6,9 @@ #include "../error.h" -#define FROM_BCD(x) ((((x >> 4) & 0x0F) * 10) + (x & 0x0F)) +#define SD_BLOCK_SIZE (512) +#define BUFFER_BLOCKS_MAX (sizeof(SC64_BUFFERS->BUFFER) / SD_BLOCK_SIZE) +#define FROM_BCD(x) ((((x >> 4) & 0x0F) * 10) + (x & 0x0F)) static DSTATUS status = STA_NOINIT; @@ -34,21 +37,24 @@ DRESULT disk_read (BYTE pdrv, BYTE *buff, LBA_t sector, UINT count) { } uint32_t *physical_address = (uint32_t *) (PHYSICAL(buff)); if (physical_address < (uint32_t *) (N64_RAM_SIZE)) { + uint8_t aligned_buffer[BUFFER_BLOCKS_MAX * SD_BLOCK_SIZE] __attribute__((aligned(8))); while (count > 0) { - uint32_t block = ((count > 16) ? 16 : count); - if (sc64_sd_read_sectors((uint32_t *) (SC64_BUFFERS->BUFFER), sector, block)) { + uint32_t blocks = ((count > BUFFER_BLOCKS_MAX) ? BUFFER_BLOCKS_MAX : count); + size_t length = (blocks * SD_BLOCK_SIZE); + if (sc64_sd_read_sectors((uint32_t *) (SC64_BUFFERS->BUFFER), sector, blocks)) { return RES_ERROR; } - for (uint32_t i = 0; i < (block * 512); i += 4) { - // TODO: use dma - uint32_t data = pi_io_read((uint32_t *) (&SC64_BUFFERS->BUFFER[i])); - uint8_t *ptr = (uint8_t *) (&data); - for (int j = 0; j < 4; j++) { - *buff++ = *ptr++; - } + if (((uint32_t) (buff) % 8) == 0) { + pi_dma_read((io32_t *) (SC64_BUFFERS->BUFFER), buff, length); + cache_data_hit_invalidate(buff, length); + } else { + pi_dma_read((io32_t *) (SC64_BUFFERS->BUFFER), aligned_buffer, length); + cache_data_hit_invalidate(aligned_buffer, length); + memcpy(buff, aligned_buffer, length); } - count -= block; - sector += block; + buff += length; + sector += blocks; + count -= blocks; } } else { if (sc64_sd_read_sectors(physical_address, sector, count)) { diff --git a/sw/bootloader/src/io.c b/sw/bootloader/src/io.c index d31f4f6..0be6b9d 100644 --- a/sw/bootloader/src/io.c +++ b/sw/bootloader/src/io.c @@ -25,6 +25,20 @@ void pi_io_write (io32_t *address, uint32_t value) { while (pi_busy()); } +void pi_dma_read (io32_t *address, void *buffer, size_t length) { + io_write(&PI->PADDR, (uint32_t) (PHYSICAL(address))); + io_write(&PI->MADDR, (uint32_t) (PHYSICAL(buffer))); + io_write(&PI->WDMA, length - 1); + while (pi_busy()); +} + +void pi_dma_write (io32_t *address, void *buffer, size_t length) { + io_write(&PI->PADDR, (uint32_t) (PHYSICAL(address))); + io_write(&PI->MADDR, (uint32_t) (PHYSICAL(buffer))); + io_write(&PI->RDMA, length - 1); + while (pi_busy()); +} + uint32_t si_busy (void) { return (io_read(&SI->SR) & (SI_SR_IO_BUSY | SI_SR_DMA_BUSY)); } @@ -37,3 +51,23 @@ void si_io_write (io32_t *address, uint32_t value) { io_write(address, value); while (si_busy()); } + +static void cache_operation (uint8_t operation, uint8_t line_size, void *address, size_t length) { + uint32_t cache_address = (((uint32_t) (address)) & (~(line_size - 1))); + while (cache_address < ((uint32_t) (address) + length)) { + asm volatile ( + "cache %[operation], (%[cache_address]) \n" :: + [operation] "i" (operation), + [cache_address] "r" (cache_address) + ); + cache_address += line_size; + } +} + +void cache_data_hit_invalidate (void *address, size_t length) { + cache_operation (0x11, 16, address, length); +} + +void cache_data_hit_writeback (void *address, size_t length) { + cache_operation (0x19, 16, address, length); +} diff --git a/sw/bootloader/src/io.h b/sw/bootloader/src/io.h index 1780c0e..b6ea2db 100644 --- a/sw/bootloader/src/io.h +++ b/sw/bootloader/src/io.h @@ -281,9 +281,13 @@ void io_write (io32_t *address, uint32_t value); uint32_t pi_busy (void); uint32_t pi_io_read (io32_t *address); void pi_io_write (io32_t *address, uint32_t value); +void pi_dma_read (io32_t *address, void *buffer, size_t length); +void pi_dma_write (io32_t *address, void *buffer, size_t length); uint32_t si_busy (void); uint32_t si_io_read (io32_t *address); void si_io_write (io32_t *address, uint32_t value); +void cache_data_hit_invalidate (void *address, size_t length); +void cache_data_hit_writeback (void *address, size_t length); #endif