From 2c224b8db8f6b3d88db3734db257f8fae06771ed Mon Sep 17 00:00:00 2001 From: emu_kidid Date: Sat, 9 Jun 2018 08:59:52 +0930 Subject: [PATCH] - Fix LTO flag (actually use one :P) - Virtual memory for GameCube, prevent out of memory crashes --- Makefile.gc | 10 +- source/menu.cpp | 31 ++- source/snes9x/memmap.cpp | 11 ++ source/snes9x/memmap.h | 4 + source/snes9xgx.cpp | 19 +- source/utils/vm/asm.h | 338 ++++++++++++++++++++++++++++++++ source/utils/vm/dsihandler.s | 71 +++++++ source/utils/vm/vm.c | 367 +++++++++++++++++++++++++++++++++++ source/utils/vm/vm.h | 90 +++++++++ source/vmalloc.cpp | 49 +++++ source/vmalloc.h | 21 ++ 11 files changed, 1003 insertions(+), 8 deletions(-) create mode 100644 source/utils/vm/asm.h create mode 100644 source/utils/vm/dsihandler.s create mode 100644 source/utils/vm/vm.c create mode 100644 source/utils/vm/vm.h create mode 100644 source/vmalloc.cpp create mode 100644 source/vmalloc.h diff --git a/Makefile.gc b/Makefile.gc index c62edbd..ce3e9c0 100644 --- a/Makefile.gc +++ b/Makefile.gc @@ -20,22 +20,22 @@ TARGETDIR := executables BUILD := build_gc SOURCES := source source/images source/sounds source/fonts source/lang \ source/gui source/utils source/utils/sz \ - source/snes9x source/snes9x/apu + source/snes9x source/snes9x/apu source/utils/vm INCLUDES := source source/snes9x #--------------------------------------------------------------------------------- # options for code generation #--------------------------------------------------------------------------------- - -CFLAGS = -g -O3 -LTO -Wall $(MACHDEP) $(INCLUDE) -DNO_SOUND \ +LTO = -flto +CFLAGS = -g -O3 $(LTO) -Wall $(MACHDEP) $(INCLUDE) -DNO_SOUND \ -DHAVE_STDINT_H -DBLARGG_NONPORTABLE -DBLARGG_BIG_ENDIAN -DBLARGG_CPU_POWERPC \ -DZLIB -DRIGHTSHIFT_IS_SAR -DCPU_SHUTDOWN -DCORRECT_VRAM_READS \ - -D_SZ_ONE_DIRECTORY -D_LZMA_IN_CB -D_LZMA_OUT_READ \ + -D_SZ_ONE_DIRECTORY -D_LZMA_IN_CB -D_LZMA_OUT_READ -DUSE_VM \ -fomit-frame-pointer \ -Wno-unused-parameter -Wno-strict-aliasing \ -Wno-write-strings -Wno-parentheses CXXFLAGS = $(CFLAGS) -LDFLAGS = -g $(MACHDEP) -LTO -Wl,-Map,$(notdir $@).map +LDFLAGS = -g $(MACHDEP) $(LTO) -Wl,-Map,$(notdir $@).map #--------------------------------------------------------------------------------- # any extra libraries we wish to link with the project diff --git a/source/menu.cpp b/source/menu.cpp index a0c6ba7..a52d56f 100644 --- a/source/menu.cpp +++ b/source/menu.cpp @@ -57,9 +57,15 @@ static GuiImageData * pointer[4]; #define MEM_ALLOC(A) (u8*)mem2_malloc(A) #define MEM_DEALLOC(A) mem2_free(A) #else +#ifdef USE_VM + #include "vmalloc.h" + #define MEM_ALLOC(A) (u8*)vm_malloc(A) + #define MEM_DEALLOC(A) vm_free(A) + #else #define MEM_ALLOC(A) (u8*)memalign(32, A) #define MEM_DEALLOC(A) free(A) #endif +#endif static GuiTrigger * trigA = NULL; static GuiTrigger * trig2 = NULL; @@ -1213,7 +1219,27 @@ static int MenuGame() GuiText titleTxt((char *)Memory.ROMFilename, 22, (GXColor){255, 255, 255, 255}); titleTxt.SetAlignment(ALIGN_LEFT, ALIGN_TOP); - titleTxt.SetPosition(50,50); + titleTxt.SetPosition(50,40); + + char memInfo[128]; + memset(&memInfo[0], 0, 128); +#ifdef USE_VM + sprintf(&memInfo[0], "Memory Free: RAM %.2fMB VM %.2fMB" + ,((float)((u32)SYS_GetArena1Hi()-(u32)SYS_GetArena1Lo())/1024/1024) + ,((float)(vm_size_free())/1024/1024)); +#else +#ifdef HW_RVL + sprintf(&memInfo[0], "Memory Free: MEM1 %.2fMB MEM2 %.2fMB" + ,((float)((u32)SYS_GetArena1Hi()-(u32)SYS_GetArena1Lo())/1024/1024) + ,((float)((u32)SYS_GetArena2Hi()-(u32)SYS_GetArena2Lo())/1024/1024)); +#else + sprintf(&memInfo[0], "Memory Free: RAM %.2fMB" + ,((float)((u32)SYS_GetArena1Hi()-(u32)SYS_GetArena1Lo())/1024/1024)); +#endif +#endif + GuiText memTxt(memInfo, 18, (GXColor){255, 255, 255, 255}); + memTxt.SetAlignment(ALIGN_LEFT, ALIGN_TOP); + memTxt.SetPosition(50,70); GuiSound btnSoundOver(button_over_pcm, button_over_pcm_size, SOUND_PCM); GuiSound btnSoundClick(button_click_pcm, button_click_pcm_size, SOUND_PCM); @@ -1398,6 +1424,7 @@ static int MenuGame() HaltGui(); GuiWindow w(screenwidth, screenheight); w.Append(&titleTxt); + w.Append(&memTxt); w.Append(&saveBtn); w.Append(&loadBtn); w.Append(&deleteBtn); @@ -1424,6 +1451,7 @@ static int MenuGame() bgTopImg->SetEffect(EFFECT_SLIDE_TOP | EFFECT_SLIDE_IN, 35); closeBtn.SetEffect(EFFECT_SLIDE_TOP | EFFECT_SLIDE_IN, 35); titleTxt.SetEffect(EFFECT_SLIDE_TOP | EFFECT_SLIDE_IN, 35); + memTxt.SetEffect(EFFECT_SLIDE_TOP | EFFECT_SLIDE_IN, 35); mainmenuBtn.SetEffect(EFFECT_SLIDE_BOTTOM | EFFECT_SLIDE_IN, 35); bgBottomImg->SetEffect(EFFECT_SLIDE_BOTTOM | EFFECT_SLIDE_IN, 35); btnLogo->SetEffect(EFFECT_SLIDE_BOTTOM | EFFECT_SLIDE_IN, 35); @@ -1547,6 +1575,7 @@ static int MenuGame() bgTopImg->SetEffect(EFFECT_SLIDE_TOP | EFFECT_SLIDE_OUT, 15); closeBtn.SetEffect(EFFECT_SLIDE_TOP | EFFECT_SLIDE_OUT, 15); titleTxt.SetEffect(EFFECT_SLIDE_TOP | EFFECT_SLIDE_OUT, 15); + memTxt.SetEffect(EFFECT_SLIDE_TOP | EFFECT_SLIDE_OUT, 15); mainmenuBtn.SetEffect(EFFECT_SLIDE_BOTTOM | EFFECT_SLIDE_OUT, 15); bgBottomImg->SetEffect(EFFECT_SLIDE_BOTTOM | EFFECT_SLIDE_OUT, 15); btnLogo->SetEffect(EFFECT_SLIDE_BOTTOM | EFFECT_SLIDE_OUT, 15); diff --git a/source/snes9x/memmap.cpp b/source/snes9x/memmap.cpp index 38bfe7e..d106435 100644 --- a/source/snes9x/memmap.cpp +++ b/source/snes9x/memmap.cpp @@ -208,6 +208,9 @@ extern int WiiFileLoader(); extern void WiiSetupCheats(); #endif +#ifdef USE_VM + #include "vmalloc.h" +#endif #ifndef SET_UI_COLOR #define SET_UI_COLOR(r, g, b) ; @@ -1070,7 +1073,11 @@ bool8 CMemory::Init (void) RAM = (uint8 *) memalign(32,0x20000); SRAM = (uint8 *) memalign(32,0x20000); VRAM = (uint8 *) memalign(32,0x10000); +#ifdef USE_VM + ROM = (uint8 *) vm_malloc(MAX_ROM_SIZE + 0x200 + 0x8000); +#else ROM = (uint8 *) memalign(32,MAX_ROM_SIZE + 0x200 + 0x8000); +#endif IPPU.TileCache[TILE_2BIT] = (uint8 *) memalign(32,MAX_2BIT_TILES * 64); IPPU.TileCache[TILE_4BIT] = (uint8 *) memalign(32,MAX_4BIT_TILES * 64); @@ -1178,7 +1185,11 @@ void CMemory::Deinit (void) if (ROM) { ROM -= 0x8000; + #ifdef USE_VM + vm_free(ROM); + #else free(ROM); + #endif ROM = NULL; } diff --git a/source/snes9x/memmap.h b/source/snes9x/memmap.h index e34c854..814579e 100644 --- a/source/snes9x/memmap.h +++ b/source/snes9x/memmap.h @@ -187,7 +187,11 @@ struct CMemory { enum #ifdef HW_DOL +#ifdef USE_VM + { MAX_ROM_SIZE = 0x800000 }; +#else { MAX_ROM_SIZE = 0x600000 }; +#endif #else { MAX_ROM_SIZE = 0x800000 }; #endif diff --git a/source/snes9xgx.cpp b/source/snes9xgx.cpp index 45ffab3..b31f31f 100644 --- a/source/snes9xgx.cpp +++ b/source/snes9xgx.cpp @@ -27,6 +27,9 @@ #include #include #endif +#ifdef USE_VM + #include "vmalloc.h" +#endif #include "snes9xgx.h" #include "networkop.h" @@ -58,6 +61,9 @@ char appPath[1024] = { 0 }; static int currentMode; extern "C" { +#ifdef USE_VM + #include "utils/vm/vm.h" +#endif extern char* strcasestr(const char *, const char *); extern void __exception_setreload(int t); } @@ -363,6 +369,10 @@ extern "C" { int main(int argc, char *argv[]) { + #ifdef USE_VM + VM_Init(ARAM_SIZE, MRAM_BACKING); // Setup Virtual Memory with the entire ARAM + #endif + #ifdef HW_RVL L2Enhance(); @@ -443,8 +453,13 @@ int main(int argc, char *argv[]) savebuffer = (unsigned char *)mem2_malloc(SAVEBUFFERSIZE); browserList = (BROWSERENTRY *)mem2_malloc(sizeof(BROWSERENTRY)*MAX_BROWSER_SIZE); #else - savebuffer = (unsigned char *)malloc(SAVEBUFFERSIZE); - browserList = (BROWSERENTRY *)malloc(sizeof(BROWSERENTRY)*MAX_BROWSER_SIZE); +#ifdef USE_VM + savebuffer = (unsigned char *)vm_malloc(SAVEBUFFERSIZE); + browserList = (BROWSERENTRY *)vm_malloc(sizeof(BROWSERENTRY)*MAX_BROWSER_SIZE); +#else + savebuffer = (unsigned char *)memalign(32,SAVEBUFFERSIZE); + browserList = (BROWSERENTRY *)memalign(32,sizeof(BROWSERENTRY)*MAX_BROWSER_SIZE); +#endif #endif InitGUIThreads(); diff --git a/source/utils/vm/asm.h b/source/utils/vm/asm.h new file mode 100644 index 0000000..2feed00 --- /dev/null +++ b/source/utils/vm/asm.h @@ -0,0 +1,338 @@ +#ifndef __ASM_H__ +#define __ASM_H__ + +#ifdef _LANGUAGE_ASSEMBLY +/* Condition Register Bit Fields */ + +#define cr0 0 +#define cr1 1 +#define cr2 2 +#define cr3 3 +#define cr4 4 +#define cr5 5 +#define cr6 6 +#define cr7 7 + + +/* General Purpose Registers (GPRs) */ + +#define r0 0 +#define r1 1 +#define sp 1 +#define r2 2 +#define toc 2 +#define r3 3 +#define r4 4 +#define r5 5 +#define r6 6 +#define r7 7 +#define r8 8 +#define r9 9 +#define r10 10 +#define r11 11 +#define r12 12 +#define r13 13 +#define r14 14 +#define r15 15 +#define r16 16 +#define r17 17 +#define r18 18 +#define r19 19 +#define r20 20 +#define r21 21 +#define r22 22 +#define r23 23 +#define r24 24 +#define r25 25 +#define r26 26 +#define r27 27 +#define r28 28 +#define r29 29 +#define r30 30 +#define r31 31 + + +/* Floating Point Registers (FPRs) */ + +#define fr0 0 +#define fr1 1 +#define fr2 2 +#define fr3 3 +#define fr4 4 +#define fr5 5 +#define fr6 6 +#define fr7 7 +#define fr8 8 +#define fr9 9 +#define fr10 10 +#define fr11 11 +#define fr12 12 +#define fr13 13 +#define fr14 14 +#define fr15 15 +#define fr16 16 +#define fr17 17 +#define fr18 18 +#define fr19 19 +#define fr20 20 +#define fr21 21 +#define fr22 22 +#define fr23 23 +#define fr24 24 +#define fr25 25 +#define fr26 26 +#define fr27 27 +#define fr28 28 +#define fr29 29 +#define fr30 30 +#define fr31 31 + +#define vr0 0 +#define vr1 1 +#define vr2 2 +#define vr3 3 +#define vr4 4 +#define vr5 5 +#define vr6 6 +#define vr7 7 +#define vr8 8 +#define vr9 9 +#define vr10 10 +#define vr11 11 +#define vr12 12 +#define vr13 13 +#define vr14 14 +#define vr15 15 +#define vr16 16 +#define vr17 17 +#define vr18 18 +#define vr19 19 +#define vr20 20 +#define vr21 21 +#define vr22 22 +#define vr23 23 +#define vr24 24 +#define vr25 25 +#define vr26 26 +#define vr27 27 +#define vr28 28 +#define vr29 29 +#define vr30 30 +#define vr31 31 + +#endif //_LANGUAGE_ASSEMBLY + +#define SPRG0 272 +#define SPRG1 273 +#define SPRG2 274 +#define SPRG3 275 + +#define PMC1 953 +#define PMC2 954 +#define PMC3 957 +#define PMC4 958 + +#define MMCR0 952 +#define MMCR1 956 + + +#define LINK_REGISTER_CALLEE_UPDATE_ROOM 4 +#define EXCEPTION_NUMBER 8 +#define SRR0_OFFSET 12 +#define SRR1_OFFSET 16 +#define GPR0_OFFSET 20 +#define GPR1_OFFSET 24 +#define GPR2_OFFSET 28 +#define GPR3_OFFSET 32 +#define GPR4_OFFSET 36 +#define GPR5_OFFSET 40 +#define GPR6_OFFSET 44 +#define GPR7_OFFSET 48 +#define GPR8_OFFSET 52 +#define GPR9_OFFSET 56 +#define GPR10_OFFSET 60 +#define GPR11_OFFSET 64 +#define GPR12_OFFSET 68 +#define GPR13_OFFSET 72 +#define GPR14_OFFSET 76 +#define GPR15_OFFSET 80 +#define GPR16_OFFSET 84 +#define GPR17_OFFSET 88 +#define GPR18_OFFSET 92 +#define GPR19_OFFSET 96 +#define GPR20_OFFSET 100 +#define GPR21_OFFSET 104 +#define GPR22_OFFSET 108 +#define GPR23_OFFSET 112 +#define GPR24_OFFSET 116 +#define GPR25_OFFSET 120 +#define GPR26_OFFSET 124 +#define GPR27_OFFSET 128 +#define GPR28_OFFSET 132 +#define GPR29_OFFSET 136 +#define GPR30_OFFSET 140 +#define GPR31_OFFSET 144 + +#define GQR0_OFFSET 148 +#define GQR1_OFFSET 152 +#define GQR2_OFFSET 156 +#define GQR3_OFFSET 160 +#define GQR4_OFFSET 164 +#define GQR5_OFFSET 168 +#define GQR6_OFFSET 172 +#define GQR7_OFFSET 176 + +#define CR_OFFSET 180 +#define LR_OFFSET 184 +#define CTR_OFFSET 188 +#define XER_OFFSET 192 +#define MSR_OFFSET 196 +#define DAR_OFFSET 200 + +#define STATE_OFFSET 204 +#define MODE_OFFSET 206 + +#define FPR0_OFFSET 208 +#define FPR1_OFFSET 216 +#define FPR2_OFFSET 224 +#define FPR3_OFFSET 232 +#define FPR4_OFFSET 240 +#define FPR5_OFFSET 248 +#define FPR6_OFFSET 256 +#define FPR7_OFFSET 264 +#define FPR8_OFFSET 272 +#define FPR9_OFFSET 280 +#define FPR10_OFFSET 288 +#define FPR11_OFFSET 296 +#define FPR12_OFFSET 304 +#define FPR13_OFFSET 312 +#define FPR14_OFFSET 320 +#define FPR15_OFFSET 328 +#define FPR16_OFFSET 336 +#define FPR17_OFFSET 344 +#define FPR18_OFFSET 352 +#define FPR19_OFFSET 360 +#define FPR20_OFFSET 368 +#define FPR21_OFFSET 376 +#define FPR22_OFFSET 384 +#define FPR23_OFFSET 392 +#define FPR24_OFFSET 400 +#define FPR25_OFFSET 408 +#define FPR26_OFFSET 416 +#define FPR27_OFFSET 424 +#define FPR28_OFFSET 432 +#define FPR29_OFFSET 440 +#define FPR30_OFFSET 448 +#define FPR31_OFFSET 456 + +#define FPSCR_OFFSET 464 + +#define PSR0_OFFSET 472 +#define PSR1_OFFSET 480 +#define PSR2_OFFSET 488 +#define PSR3_OFFSET 496 +#define PSR4_OFFSET 504 +#define PSR5_OFFSET 512 +#define PSR6_OFFSET 520 +#define PSR7_OFFSET 528 +#define PSR8_OFFSET 536 +#define PSR9_OFFSET 544 +#define PSR10_OFFSET 552 +#define PSR11_OFFSET 560 +#define PSR12_OFFSET 568 +#define PSR13_OFFSET 576 +#define PSR14_OFFSET 584 +#define PSR15_OFFSET 592 +#define PSR16_OFFSET 600 +#define PSR17_OFFSET 608 +#define PSR18_OFFSET 616 +#define PSR19_OFFSET 624 +#define PSR20_OFFSET 632 +#define PSR21_OFFSET 640 +#define PSR22_OFFSET 648 +#define PSR23_OFFSET 656 +#define PSR24_OFFSET 664 +#define PSR25_OFFSET 672 +#define PSR26_OFFSET 680 +#define PSR27_OFFSET 688 +#define PSR28_OFFSET 696 +#define PSR29_OFFSET 704 +#define PSR30_OFFSET 712 +#define PSR31_OFFSET 720 +/* + * maintain the EABI requested 8 bytes aligment + * As SVR4 ABI requires 16, make it 16 (as some + * exception may need more registers to be processed...) + */ +#define EXCEPTION_FRAME_END 728 + +#define IBAT0U 528 +#define IBAT0L 529 +#define IBAT1U 530 +#define IBAT1L 531 +#define IBAT2U 532 +#define IBAT2L 533 +#define IBAT3U 534 +#define IBAT3L 535 +#define IBAT4U 560 +#define IBAT4L 561 +#define IBAT5U 562 +#define IBAT5L 563 +#define IBAT6U 564 +#define IBAT6L 565 +#define IBAT7U 566 +#define IBAT7L 567 + +#define DBAT0U 536 +#define DBAT0L 537 +#define DBAT1U 538 +#define DBAT1L 539 +#define DBAT2U 540 +#define DBAT2L 541 +#define DBAT3U 542 +#define DBAT3L 543 +#define DBAT4U 568 +#define DBAT4L 569 +#define DBAT5U 570 +#define DBAT5L 571 +#define DBAT6U 572 +#define DBAT6L 573 +#define DBAT7U 574 +#define DBAT7L 575 + +#define HID0 1008 +#define HID1 1009 +#define HID2 920 +#define HID4 1011 + +#define GQR0 912 +#define GQR1 913 +#define GQR2 914 +#define GQR3 915 +#define GQR4 916 +#define GQR5 917 +#define GQR6 918 +#define GQR7 919 + +#define L2CR 1017 + +#define WPAR 921 + +#define DMAU 922 +#define DMAL 923 + +#define MSR_RI 0x00000002 +#define MSR_DR 0x00000010 +#define MSR_IR 0x00000020 +#define MSR_IP 0x00000040 +#define MSR_SE 0x00000400 +#define MSR_ME 0x00001000 +#define MSR_FP 0x00002000 +#define MSR_POW 0x00004000 +#define MSR_EE 0x00008000 + +#define PPC_ALIGNMENT 8 + +#define PPC_CACHE_ALIGNMENT 32 + +#endif //__ASM_H__ diff --git a/source/utils/vm/dsihandler.s b/source/utils/vm/dsihandler.s new file mode 100644 index 0000000..6759938 --- /dev/null +++ b/source/utils/vm/dsihandler.s @@ -0,0 +1,71 @@ +/* Copyright 2013 tueidj All Rights Reserved + * This code may not be used in any project + * without explicit permission from the author. + */ + +#include +#include "asm.h" + + .extern vm_dsi_handler + .extern default_exceptionhandler + +FUNC_START(dsi_handler) + stwu sp,-EXCEPTION_FRAME_END(sp) + stw r6,GPR6_OFFSET(sp) + stw r7,GPR7_OFFSET(sp) + stw r8,GPR8_OFFSET(sp) + stw r9,GPR9_OFFSET(sp) + stw r10,GPR10_OFFSET(sp) + stw r11,GPR11_OFFSET(sp) + stw r12,GPR12_OFFSET(sp) + mfdsisr r4 + mfmsr r3 + ori r3,r3,MSR_RI + mtmsr r3 + + addi r3,sp,8 + bl vm_dsi_handler + + # check if it was handled correctly + cmplwi r3,0 + + lwz r6,GPR6_OFFSET(sp) + lwz r7,GPR7_OFFSET(sp) + lwz r8,GPR8_OFFSET(sp) + lwz r9,GPR9_OFFSET(sp) + lwz r10,GPR10_OFFSET(sp) + lwz r11,GPR11_OFFSET(sp) + lwz r12,GPR12_OFFSET(sp) + + # clear MSR_RI + mfmsr r3 + rlwinm r3,r3,0,31,29 + mtmsr r3 + + bne 1f + + # jump to libogc's default handler + addi sp,sp,EXCEPTION_FRAME_END + b default_exceptionhandler + +1: + lwz r3,CR_OFFSET(sp) + lwz r4,LR_OFFSET(sp) + lwz r5,CTR_OFFSET(sp) + lwz r0,XER_OFFSET(sp) + mtcr r3 + mtlr r4 + mtctr r5 + mtxer r0 + lwz r0,GPR0_OFFSET(sp) + lwz r5,GPR5_OFFSET(sp) + + lwz r3,SRR0_OFFSET(sp) + lwz r4,SRR1_OFFSET(sp) + mtsrr0 r3 + mtsrr1 r4 + lwz r3,GPR3_OFFSET(sp) + lwz r4,GPR4_OFFSET(sp) + lwz sp,GPR1_OFFSET(sp) + rfi +FUNC_END(dsi_handler) diff --git a/source/utils/vm/vm.c b/source/utils/vm/vm.c new file mode 100644 index 0000000..73f3392 --- /dev/null +++ b/source/utils/vm/vm.c @@ -0,0 +1,367 @@ +/* Copyright 2013 tueidj All Rights Reserved + * This code may not be used in any project + * without explicit permission from the author. + */ + +#include +#include +#include +#include +#include +#include +#include "vm.h" + +#include + +typedef u8 vm_page[PAGE_SIZE]; + +static p_map phys_map[2048+(PTE_SIZE/PAGE_SIZE)]; +static vm_map virt_map[65536]; +static u16 pmap_max, pmap_head; + +static PTE* HTABORG; +static vm_page* VM_Base; +static vm_page* MEM_Base = NULL; + +static int pagefile_fd = -1; +static mutex_t vm_mutex = LWP_MUTEX_NULL; +static bool vm_initialized = 0; + +static __inline__ void tlbie(void* p) +{ + asm volatile("tlbie %0" :: "r"(p)); +} + +static u16 locate_oldest(void) +{ + u16 head = pmap_head; + + for(;;++head) + { + PTE *p; + + if (head >= pmap_max) + head = 0; + + if (!phys_map[head].valid || phys_map[head].locked) + continue; + + p = HTABORG+phys_map[head].pte_index; + tlbie(VM_Base+phys_map[head].page_index); + + if (p->C) + { + p->C = 0; + phys_map[head].dirty = 1; + continue; + } + + if (p->R) + { + p->R = 0; + continue; + } + + p->data[0] = 0; + + pmap_head = head+1; + return head; + } +} + +static PTE* StorePTE(PTEG pteg, u32 virtual, u32 physical, u8 WIMG, u8 PP, int secondary) +{ + int i; + PTE p = {{0}}; + + p.valid = 1; + p.VSID = VM_VSID; + p.hash = secondary ? 1:0; + p.API = virtual >> 22; + p.RPN = physical >> 12; + p.WIMG = WIMG; + p.PP = PP; + + for (i=0; i < 8; i++) + { + if (pteg[i].data[0] == p.data[0]) + { +// printf("Error: address %08x already had a PTE entry\r\n", virtual); +// abort(); + } + else if (pteg[i].valid) + continue; + + asm volatile("tlbie %0" : : "r"(virtual)); + pteg[i].data[1] = p.data[1]; + pteg[i].data[0] = p.data[0]; +// if (i || secondary) +// printf("PTE for address %08x/%08x in PTEG %p index %d (%s)\r\n", virtual, physical, pteg, i, secondary ? "secondary" : "primary"); + return pteg+i; + } + + return NULL; +} + +static PTEG CalcPTEG(u32 virtual, int secondary) +{ + uint32_t segment_index = (virtual >> 12) & 0xFFFF; + u32 ptr = MEM_VIRTUAL_TO_PHYSICAL(HTABORG); + u32 hash = segment_index ^ VM_VSID; + + if (secondary) hash = ~hash; + + hash &= (HTABMASK << 10) | 0x3FF; + ptr |= hash << 6; + + return (PTEG)MEM_PHYSICAL_TO_K0(ptr); +} + +static PTE* insert_pte(u16 index, u32 physical, u8 WIMG, u8 PP) +{ + PTE *pte; + int i; + u32 virtual = (u32)(VM_Base+index); + + for (i=0; i < 2; i++) + { + PTEG pteg = CalcPTEG(virtual, i); + pte = StorePTE(pteg, virtual, physical, WIMG, PP, i); + if (pte) + return pte; + } + +// printf("Failed to insert PTE for %p\r\n", VM_Base+index); +// abort(); + + return NULL; +} + +static void tlbia(void) +{ + int i; + for (i=0; i < 64; i++) + asm volatile("tlbie %0" :: "r" (i*PAGE_SIZE)); +} + +/* This definition is wrong, pHndl does not take frame_context* as a parameter, + * it has to adjust the stack pointer and finish filling frame_context itself + */ +void __exception_sethandler(u32 nExcept, void (*pHndl)(frame_context*)); +extern void default_exceptionhandler(); +// use our own exception stub because libogc stupidly requires it +extern void dsi_handler(); + +void* VM_Init(u32 VMSize, u32 MEMSize) +{ + u32 i; + u16 index, v_index; + + if (vm_initialized) + return VM_Base; + + // parameter checking + if (VMSize>MAX_VM_SIZE || MEMSizeMAX_MEM_SIZE) + { + errno = EINVAL; + return NULL; + } + + VMSize = (VMSize+PAGE_SIZE-1)&PAGE_MASK; + MEMSize = (MEMSize+PAGE_SIZE-1)&PAGE_MASK; + //VM_Base = (vm_page*)(0x80000000 - VMSize); + VM_Base = (vm_page*)(ARAM_VM_BASE); + pmap_max = MEMSize / PAGE_SIZE + 16; + +// printf("VMSize %08x MEMSize %08x VM_Base %p pmap_max %u\r\n", VMSize, MEMSize, VM_Base, pmap_max); + + if (VMSize <= MEMSize) + { + errno = EINVAL; + return NULL; + } + + if (LWP_MutexInit(&vm_mutex, 0) != 0) + { + errno = ENOLCK; + return NULL; + } + + pagefile_fd = AR_Init(NULL, 0); +// ISFS_Initialize(); + // doesn't matter if this fails, will be caught when file is opened +// ISFS_CreateFile(VM_FILENAME, 0, ISFS_OPEN_RW, ISFS_OPEN_RW, ISFS_OPEN_RW); + +// pagefile_fd = ISFS_Open(VM_FILENAME, ISFS_OPEN_RW); + if (pagefile_fd < 0) + { + errno = ENOENT; + return NULL; + } + + MEMSize += PTE_SIZE; + MEM_Base = (vm_page*)memalign(PAGE_SIZE, MEMSize); + +// printf("MEM_Base: %p\r\n", MEM_Base); + + if (MEM_Base==NULL) + { + AR_Reset(); +// ISFS_Close(pagefile_fd); + errno = ENOMEM; + return NULL; + } + + tlbia(); + DCZeroRange(MEM_Base, MEMSize); + HTABORG = (PTE*)(((u32)MEM_Base+0xFFFF)&~0xFFFF); +// printf("HTABORG: %p\r\n", HTABORG); + + // attempt to make the pagefile the correct size +/* ISFS_Seek(pagefile_fd, 0, SEEK_SET); + for (i=0; i MEMSize) + to_write = MEMSize; + + if (ISFS_Write(pagefile_fd, MEM_Base, to_write) != to_write) + { + free(MEM_Base); + ISFS_Close(pagefile_fd); + errno = ENOSPC; + return NULL; + } +// printf("Wrote %u bytes to offset %u\r\n", to_write, page); + i += to_write; + }*/ + + // initial commit: map pmap_max pages to fill PTEs with valid RPNs + for (index=0,v_index=0; index %u\r\n", index, index+(PTE_SIZE/PAGE_SIZE)); + for (i=0; i<(PTE_SIZE/PAGE_SIZE); ++i,++index) + phys_map[index].valid = 0; + + --index; + --v_index; + continue; + } + + phys_map[index].valid = 1; + phys_map[index].locked = 0; + phys_map[index].dirty = 0; + phys_map[index].page_index = v_index; + phys_map[index].pte_index = insert_pte(v_index, MEM_VIRTUAL_TO_PHYSICAL(MEM_Base+index), 0, 0b10) - HTABORG; + virt_map[v_index].committed = 0; + virt_map[v_index].p_map_index = index; + } + + // all indexes up to 65536 + for (; v_index; ++v_index) + { + virt_map[v_index].committed = 0; + virt_map[v_index].p_map_index = pmap_max; + } + + pmap_head = 0; + + // set SDR1 + mtspr(25, MEM_VIRTUAL_TO_PHYSICAL(HTABORG)|HTABMASK); + //printf("SDR1: %08x\r\n", MEM_VIRTUAL_TO_PHYSICAL(HTABORG)); + // enable SR + asm volatile("mtsrin %0,%1" :: "r"(VM_VSID), "r"(VM_Base)); + // hook DSI + __exception_sethandler(EX_DSI, dsi_handler); + + atexit(VM_Deinit); + + vm_initialized = 1; + + return VM_Base; +} + +void VM_Deinit(void) +{ + if (!vm_initialized) + return; + + // disable SR + asm volatile("mtsrin %0,%1" :: "r"(0x80000000), "r"(VM_Base)); + // restore default DSI handler + __exception_sethandler(EX_DSI, default_exceptionhandler); + + free(MEM_Base); + MEM_Base = NULL; + + if (vm_mutex != LWP_MUTEX_NULL) + { + LWP_MutexDestroy(vm_mutex); + vm_mutex = LWP_MUTEX_NULL; + } + + if (pagefile_fd) + { + AR_Reset(); +// ISFS_Close(pagefile_fd); + pagefile_fd = -1; +// ISFS_Delete(VM_FILENAME); + } + + vm_initialized = 0; +} + +int vm_dsi_handler(frame_context* state, u32 DSISR) +{ + u16 v_index; + u16 p_index; + + if (state->DAR<(u32)VM_Base || state->DAR>=0x80000000) + return 0; + if ((DSISR&~0x02000000)!=0x40000000) + return 0; + if (!vm_initialized) + return 0; + + LWP_MutexLock(vm_mutex); + + state->DAR &= ~0xFFF; + v_index = (vm_page*)state->DAR - VM_Base; + + p_index = locate_oldest(); + + // purge p_index if it's dirty + if (phys_map[p_index].dirty) + { + DCFlushRange(MEM_Base+p_index, PAGE_SIZE); + AR_StartDMA(AR_MRAMTOARAM,(u32)(MEM_Base+p_index),phys_map[p_index].page_index*PAGE_SIZE,PAGE_SIZE); + while (AR_GetDMAStatus()); + virt_map[phys_map[p_index].page_index].committed = 1; + virt_map[phys_map[p_index].page_index].p_map_index = pmap_max; + phys_map[p_index].dirty = 0; +// printf("VM page %d was purged\r\n", phys_map[p_index].page_index); + } + + // fetch v_index if it has been previously committed + if (virt_map[v_index].committed) + { + DCInvalidateRange(MEM_Base+p_index, PAGE_SIZE); + AR_StartDMA(AR_ARAMTOMRAM,(u32)(MEM_Base+p_index),v_index*PAGE_SIZE,PAGE_SIZE); + while (AR_GetDMAStatus()); +// printf("VM page %d was fetched\r\n", v_index); + } + else + DCZeroRange(MEM_Base+p_index, PAGE_SIZE); + +// printf("VM page %u (0x%08x) replaced page %u (%p)\r\n", v_index, state->DAR, phys_map[p_index].page_index, VM_Base+phys_map[p_index].page_index); + + virt_map[v_index].p_map_index = p_index; + phys_map[p_index].page_index = v_index; + phys_map[p_index].pte_index = insert_pte(v_index, MEM_VIRTUAL_TO_PHYSICAL(MEM_Base+p_index), 0, 0b10) - HTABORG; + + LWP_MutexUnlock(vm_mutex); + + return 1; +} diff --git a/source/utils/vm/vm.h b/source/utils/vm/vm.h new file mode 100644 index 0000000..a85f714 --- /dev/null +++ b/source/utils/vm/vm.h @@ -0,0 +1,90 @@ +/* Copyright 2013 tueidj All Rights Reserved + * This code may not be used in any project + * without explicit permission from the author. + */ + +#ifndef _VM_H_ +#define _VM_H_ + +#include +#include +#include +#include +#include +#include + +#define KB (1024) +#define MB (1024*KB) +#define MRAM_BACKING (4*MB) // Use 4MB to page our 16MB +#define ARAM_RESERVED (64*KB) // Reserved for DSP/AESND/etc +#define ARAM_VM_BASE (0x7F000000) // Map ARAM to here +#define ARAM_START (ARAM_RESERVED + ARAM_VM_BASE) +#define ARAM_SIZE ((16*MB) - ARAM_RESERVED) // ARAM is ~16MB + +// maximum virtual memory size +#define MAX_VM_SIZE (256*1024*1024) +// maximum physical memory size +#define MAX_MEM_SIZE ( 8*1024*1024) +// minimum physical memory size +#define MIN_MEM_SIZE (256*1024) +// page size as defined by hardware +#define PAGE_SIZE 4096 +#define PAGE_MASK (~(PAGE_SIZE-1)) + +#define VM_VSID 0 +#define VM_SEGMENT 0x70000000 + +// use 64KB for PTEs +#define HTABMASK 0 +#define PTE_SIZE ((HTABMASK+1)*65536) +#define PTE_COUNT (PTE_SIZE>>3) + +//#define VM_FILENAME "/tmp/pagefile.sys" + +// keeps a record of each currently mapped page +typedef union +{ + u32 data; + struct + { + u32 valid : 1; + u32 locked : 1; + u32 dirty : 1; + u32 pte_index : 13; + u32 page_index : 16; + }; +} p_map; + +// maps VM addresses to mapped pages +typedef struct +{ + // data must be fetched when paging in? + u16 committed : 1; + u16 p_map_index: 12; +} vm_map; + +typedef union +{ + u32 data[2]; + struct + { + u32 valid : 1; + u32 VSID : 24; + u32 hash : 1; + u32 API : 6; + + u32 RPN : 20; + u32 pad0 : 3; + u32 R : 1; + u32 C : 1; + u32 WIMG : 4; + u32 pad1 : 1; + u32 PP : 2; + }; +} PTE; +typedef PTE* PTEG; + +extern void* VM_Init(u32 VMSize, u32 MEMSize); +extern void VM_Deinit(void); + +#endif diff --git a/source/vmalloc.cpp b/source/vmalloc.cpp new file mode 100644 index 0000000..a2cfb31 --- /dev/null +++ b/source/vmalloc.cpp @@ -0,0 +1,49 @@ +/**************************************************************************** + * Snes9x Nintendo Wii/Gamecube Port + * + * emu_kidid 2015-2018 + * + * vmalloc.cpp + * + * GC VM memory allocator + ***************************************************************************/ + +#ifdef USE_VM + +#include +#include +#include +#include +#include "utils/vm/vm.h" + +static heap_cntrl vm_heap; + +static int vm_initialised = 0; + +void InitVmManager () +{ + __lwp_heap_init(&vm_heap, (void *)ARAM_VM_BASE, ARAM_SIZE, 32); + vm_initialised = 1; +} + +void* vm_malloc(u32 size) +{ + if(!vm_initialised) InitVmManager(); + return __lwp_heap_allocate(&vm_heap, size); +} + +bool vm_free(void *ptr) +{ + if(!vm_initialised) InitVmManager(); + return __lwp_heap_free(&vm_heap, ptr); +} + +int vm_size_free() +{ + if(!vm_initialised) InitVmManager(); + heap_iblock info; + __lwp_heap_getinfo(&vm_heap,&info); + return info.free_size; +} + +#endif diff --git a/source/vmalloc.h b/source/vmalloc.h new file mode 100644 index 0000000..245c40e --- /dev/null +++ b/source/vmalloc.h @@ -0,0 +1,21 @@ +/**************************************************************************** + * Snes9x Nintendo Wii/Gamecube Port + * + * emu_kidid 2015-2018 + * + * vmalloc.h + * + * GC VM memory allocator + ***************************************************************************/ + +#ifdef USE_VM + +#ifndef _VMMANAGER_H_ +#define _VMMANAGER_H_ + +void* vm_malloc(u32 size); +bool vm_free(void *ptr); +int vm_size_free(); +#endif + +#endif