Merge pull request #725 from emukidid/master

More GameCube optimisations
This commit is contained in:
dborth 2018-06-08 17:56:26 -06:00 committed by GitHub
commit 8ea78dde4a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 1003 additions and 8 deletions

View File

@ -20,22 +20,22 @@ TARGETDIR := executables
BUILD := build_gc BUILD := build_gc
SOURCES := source source/images source/sounds source/fonts source/lang \ SOURCES := source source/images source/sounds source/fonts source/lang \
source/gui source/utils source/utils/sz \ source/gui source/utils source/utils/sz \
source/snes9x source/snes9x/apu source/snes9x source/snes9x/apu source/utils/vm
INCLUDES := source source/snes9x INCLUDES := source source/snes9x
#--------------------------------------------------------------------------------- #---------------------------------------------------------------------------------
# options for code generation # options for code generation
#--------------------------------------------------------------------------------- #---------------------------------------------------------------------------------
LTO = -flto
CFLAGS = -g -O3 -LTO -Wall $(MACHDEP) $(INCLUDE) -DNO_SOUND \ CFLAGS = -g -O3 $(LTO) -Wall $(MACHDEP) $(INCLUDE) -DNO_SOUND \
-DHAVE_STDINT_H -DBLARGG_NONPORTABLE -DBLARGG_BIG_ENDIAN -DBLARGG_CPU_POWERPC \ -DHAVE_STDINT_H -DBLARGG_NONPORTABLE -DBLARGG_BIG_ENDIAN -DBLARGG_CPU_POWERPC \
-DZLIB -DRIGHTSHIFT_IS_SAR -DCPU_SHUTDOWN -DCORRECT_VRAM_READS \ -DZLIB -DRIGHTSHIFT_IS_SAR -DCPU_SHUTDOWN -DCORRECT_VRAM_READS \
-D_SZ_ONE_DIRECTORY -D_LZMA_IN_CB -D_LZMA_OUT_READ \ -D_SZ_ONE_DIRECTORY -D_LZMA_IN_CB -D_LZMA_OUT_READ -DUSE_VM \
-fomit-frame-pointer \ -fomit-frame-pointer \
-Wno-unused-parameter -Wno-strict-aliasing \ -Wno-unused-parameter -Wno-strict-aliasing \
-Wno-write-strings -Wno-parentheses -Wno-write-strings -Wno-parentheses
CXXFLAGS = $(CFLAGS) CXXFLAGS = $(CFLAGS)
LDFLAGS = -g $(MACHDEP) -LTO -Wl,-Map,$(notdir $@).map LDFLAGS = -g $(MACHDEP) $(LTO) -Wl,-Map,$(notdir $@).map
#--------------------------------------------------------------------------------- #---------------------------------------------------------------------------------
# any extra libraries we wish to link with the project # any extra libraries we wish to link with the project

View File

@ -56,10 +56,16 @@ static GuiImageData * pointer[4];
#define MEM_ALLOC(A) (u8*)mem2_malloc(A) #define MEM_ALLOC(A) (u8*)mem2_malloc(A)
#define MEM_DEALLOC(A) mem2_free(A) #define MEM_DEALLOC(A) mem2_free(A)
#else
#ifdef USE_VM
#include "vmalloc.h"
#define MEM_ALLOC(A) (u8*)vm_malloc(A)
#define MEM_DEALLOC(A) vm_free(A)
#else #else
#define MEM_ALLOC(A) (u8*)memalign(32, A) #define MEM_ALLOC(A) (u8*)memalign(32, A)
#define MEM_DEALLOC(A) free(A) #define MEM_DEALLOC(A) free(A)
#endif #endif
#endif
static GuiTrigger * trigA = NULL; static GuiTrigger * trigA = NULL;
static GuiTrigger * trig2 = NULL; static GuiTrigger * trig2 = NULL;
@ -1213,7 +1219,27 @@ static int MenuGame()
GuiText titleTxt((char *)Memory.ROMFilename, 22, (GXColor){255, 255, 255, 255}); GuiText titleTxt((char *)Memory.ROMFilename, 22, (GXColor){255, 255, 255, 255});
titleTxt.SetAlignment(ALIGN_LEFT, ALIGN_TOP); titleTxt.SetAlignment(ALIGN_LEFT, ALIGN_TOP);
titleTxt.SetPosition(50,50); titleTxt.SetPosition(50,40);
char memInfo[128];
memset(&memInfo[0], 0, 128);
#ifdef USE_VM
sprintf(&memInfo[0], "Memory Free: RAM %.2fMB VM %.2fMB"
,((float)((u32)SYS_GetArena1Hi()-(u32)SYS_GetArena1Lo())/1024/1024)
,((float)(vm_size_free())/1024/1024));
#else
#ifdef HW_RVL
sprintf(&memInfo[0], "Memory Free: MEM1 %.2fMB MEM2 %.2fMB"
,((float)((u32)SYS_GetArena1Hi()-(u32)SYS_GetArena1Lo())/1024/1024)
,((float)((u32)SYS_GetArena2Hi()-(u32)SYS_GetArena2Lo())/1024/1024));
#else
sprintf(&memInfo[0], "Memory Free: RAM %.2fMB"
,((float)((u32)SYS_GetArena1Hi()-(u32)SYS_GetArena1Lo())/1024/1024));
#endif
#endif
GuiText memTxt(memInfo, 18, (GXColor){255, 255, 255, 255});
memTxt.SetAlignment(ALIGN_LEFT, ALIGN_TOP);
memTxt.SetPosition(50,70);
GuiSound btnSoundOver(button_over_pcm, button_over_pcm_size, SOUND_PCM); GuiSound btnSoundOver(button_over_pcm, button_over_pcm_size, SOUND_PCM);
GuiSound btnSoundClick(button_click_pcm, button_click_pcm_size, SOUND_PCM); GuiSound btnSoundClick(button_click_pcm, button_click_pcm_size, SOUND_PCM);
@ -1398,6 +1424,7 @@ static int MenuGame()
HaltGui(); HaltGui();
GuiWindow w(screenwidth, screenheight); GuiWindow w(screenwidth, screenheight);
w.Append(&titleTxt); w.Append(&titleTxt);
w.Append(&memTxt);
w.Append(&saveBtn); w.Append(&saveBtn);
w.Append(&loadBtn); w.Append(&loadBtn);
w.Append(&deleteBtn); w.Append(&deleteBtn);
@ -1424,6 +1451,7 @@ static int MenuGame()
bgTopImg->SetEffect(EFFECT_SLIDE_TOP | EFFECT_SLIDE_IN, 35); bgTopImg->SetEffect(EFFECT_SLIDE_TOP | EFFECT_SLIDE_IN, 35);
closeBtn.SetEffect(EFFECT_SLIDE_TOP | EFFECT_SLIDE_IN, 35); closeBtn.SetEffect(EFFECT_SLIDE_TOP | EFFECT_SLIDE_IN, 35);
titleTxt.SetEffect(EFFECT_SLIDE_TOP | EFFECT_SLIDE_IN, 35); titleTxt.SetEffect(EFFECT_SLIDE_TOP | EFFECT_SLIDE_IN, 35);
memTxt.SetEffect(EFFECT_SLIDE_TOP | EFFECT_SLIDE_IN, 35);
mainmenuBtn.SetEffect(EFFECT_SLIDE_BOTTOM | EFFECT_SLIDE_IN, 35); mainmenuBtn.SetEffect(EFFECT_SLIDE_BOTTOM | EFFECT_SLIDE_IN, 35);
bgBottomImg->SetEffect(EFFECT_SLIDE_BOTTOM | EFFECT_SLIDE_IN, 35); bgBottomImg->SetEffect(EFFECT_SLIDE_BOTTOM | EFFECT_SLIDE_IN, 35);
btnLogo->SetEffect(EFFECT_SLIDE_BOTTOM | EFFECT_SLIDE_IN, 35); btnLogo->SetEffect(EFFECT_SLIDE_BOTTOM | EFFECT_SLIDE_IN, 35);
@ -1547,6 +1575,7 @@ static int MenuGame()
bgTopImg->SetEffect(EFFECT_SLIDE_TOP | EFFECT_SLIDE_OUT, 15); bgTopImg->SetEffect(EFFECT_SLIDE_TOP | EFFECT_SLIDE_OUT, 15);
closeBtn.SetEffect(EFFECT_SLIDE_TOP | EFFECT_SLIDE_OUT, 15); closeBtn.SetEffect(EFFECT_SLIDE_TOP | EFFECT_SLIDE_OUT, 15);
titleTxt.SetEffect(EFFECT_SLIDE_TOP | EFFECT_SLIDE_OUT, 15); titleTxt.SetEffect(EFFECT_SLIDE_TOP | EFFECT_SLIDE_OUT, 15);
memTxt.SetEffect(EFFECT_SLIDE_TOP | EFFECT_SLIDE_OUT, 15);
mainmenuBtn.SetEffect(EFFECT_SLIDE_BOTTOM | EFFECT_SLIDE_OUT, 15); mainmenuBtn.SetEffect(EFFECT_SLIDE_BOTTOM | EFFECT_SLIDE_OUT, 15);
bgBottomImg->SetEffect(EFFECT_SLIDE_BOTTOM | EFFECT_SLIDE_OUT, 15); bgBottomImg->SetEffect(EFFECT_SLIDE_BOTTOM | EFFECT_SLIDE_OUT, 15);
btnLogo->SetEffect(EFFECT_SLIDE_BOTTOM | EFFECT_SLIDE_OUT, 15); btnLogo->SetEffect(EFFECT_SLIDE_BOTTOM | EFFECT_SLIDE_OUT, 15);

View File

@ -208,6 +208,9 @@
extern int WiiFileLoader(); extern int WiiFileLoader();
extern void WiiSetupCheats(); extern void WiiSetupCheats();
#endif #endif
#ifdef USE_VM
#include "vmalloc.h"
#endif
#ifndef SET_UI_COLOR #ifndef SET_UI_COLOR
#define SET_UI_COLOR(r, g, b) ; #define SET_UI_COLOR(r, g, b) ;
@ -1070,7 +1073,11 @@ bool8 CMemory::Init (void)
RAM = (uint8 *) memalign(32,0x20000); RAM = (uint8 *) memalign(32,0x20000);
SRAM = (uint8 *) memalign(32,0x20000); SRAM = (uint8 *) memalign(32,0x20000);
VRAM = (uint8 *) memalign(32,0x10000); VRAM = (uint8 *) memalign(32,0x10000);
#ifdef USE_VM
ROM = (uint8 *) vm_malloc(MAX_ROM_SIZE + 0x200 + 0x8000);
#else
ROM = (uint8 *) memalign(32,MAX_ROM_SIZE + 0x200 + 0x8000); ROM = (uint8 *) memalign(32,MAX_ROM_SIZE + 0x200 + 0x8000);
#endif
IPPU.TileCache[TILE_2BIT] = (uint8 *) memalign(32,MAX_2BIT_TILES * 64); IPPU.TileCache[TILE_2BIT] = (uint8 *) memalign(32,MAX_2BIT_TILES * 64);
IPPU.TileCache[TILE_4BIT] = (uint8 *) memalign(32,MAX_4BIT_TILES * 64); IPPU.TileCache[TILE_4BIT] = (uint8 *) memalign(32,MAX_4BIT_TILES * 64);
@ -1178,7 +1185,11 @@ void CMemory::Deinit (void)
if (ROM) if (ROM)
{ {
ROM -= 0x8000; ROM -= 0x8000;
#ifdef USE_VM
vm_free(ROM);
#else
free(ROM); free(ROM);
#endif
ROM = NULL; ROM = NULL;
} }

View File

@ -187,7 +187,11 @@ struct CMemory
{ {
enum enum
#ifdef HW_DOL #ifdef HW_DOL
#ifdef USE_VM
{ MAX_ROM_SIZE = 0x800000 };
#else
{ MAX_ROM_SIZE = 0x600000 }; { MAX_ROM_SIZE = 0x600000 };
#endif
#else #else
{ MAX_ROM_SIZE = 0x800000 }; { MAX_ROM_SIZE = 0x800000 };
#endif #endif

View File

@ -27,6 +27,9 @@
#include <wiiuse/wpad.h> #include <wiiuse/wpad.h>
#include <wupc/wupc.h> #include <wupc/wupc.h>
#endif #endif
#ifdef USE_VM
#include "vmalloc.h"
#endif
#include "snes9xgx.h" #include "snes9xgx.h"
#include "networkop.h" #include "networkop.h"
@ -58,6 +61,9 @@ char appPath[1024] = { 0 };
static int currentMode; static int currentMode;
extern "C" { extern "C" {
#ifdef USE_VM
#include "utils/vm/vm.h"
#endif
extern char* strcasestr(const char *, const char *); extern char* strcasestr(const char *, const char *);
extern void __exception_setreload(int t); extern void __exception_setreload(int t);
} }
@ -363,6 +369,10 @@ extern "C" {
int main(int argc, char *argv[]) int main(int argc, char *argv[])
{ {
#ifdef USE_VM
VM_Init(ARAM_SIZE, MRAM_BACKING); // Setup Virtual Memory with the entire ARAM
#endif
#ifdef HW_RVL #ifdef HW_RVL
L2Enhance(); L2Enhance();
@ -443,8 +453,13 @@ int main(int argc, char *argv[])
savebuffer = (unsigned char *)mem2_malloc(SAVEBUFFERSIZE); savebuffer = (unsigned char *)mem2_malloc(SAVEBUFFERSIZE);
browserList = (BROWSERENTRY *)mem2_malloc(sizeof(BROWSERENTRY)*MAX_BROWSER_SIZE); browserList = (BROWSERENTRY *)mem2_malloc(sizeof(BROWSERENTRY)*MAX_BROWSER_SIZE);
#else #else
savebuffer = (unsigned char *)malloc(SAVEBUFFERSIZE); #ifdef USE_VM
browserList = (BROWSERENTRY *)malloc(sizeof(BROWSERENTRY)*MAX_BROWSER_SIZE); savebuffer = (unsigned char *)vm_malloc(SAVEBUFFERSIZE);
browserList = (BROWSERENTRY *)vm_malloc(sizeof(BROWSERENTRY)*MAX_BROWSER_SIZE);
#else
savebuffer = (unsigned char *)memalign(32,SAVEBUFFERSIZE);
browserList = (BROWSERENTRY *)memalign(32,sizeof(BROWSERENTRY)*MAX_BROWSER_SIZE);
#endif
#endif #endif
InitGUIThreads(); InitGUIThreads();

338
source/utils/vm/asm.h Normal file
View File

@ -0,0 +1,338 @@
#ifndef __ASM_H__
#define __ASM_H__
#ifdef _LANGUAGE_ASSEMBLY
/* Condition Register Bit Fields */
#define cr0 0
#define cr1 1
#define cr2 2
#define cr3 3
#define cr4 4
#define cr5 5
#define cr6 6
#define cr7 7
/* General Purpose Registers (GPRs) */
#define r0 0
#define r1 1
#define sp 1
#define r2 2
#define toc 2
#define r3 3
#define r4 4
#define r5 5
#define r6 6
#define r7 7
#define r8 8
#define r9 9
#define r10 10
#define r11 11
#define r12 12
#define r13 13
#define r14 14
#define r15 15
#define r16 16
#define r17 17
#define r18 18
#define r19 19
#define r20 20
#define r21 21
#define r22 22
#define r23 23
#define r24 24
#define r25 25
#define r26 26
#define r27 27
#define r28 28
#define r29 29
#define r30 30
#define r31 31
/* Floating Point Registers (FPRs) */
#define fr0 0
#define fr1 1
#define fr2 2
#define fr3 3
#define fr4 4
#define fr5 5
#define fr6 6
#define fr7 7
#define fr8 8
#define fr9 9
#define fr10 10
#define fr11 11
#define fr12 12
#define fr13 13
#define fr14 14
#define fr15 15
#define fr16 16
#define fr17 17
#define fr18 18
#define fr19 19
#define fr20 20
#define fr21 21
#define fr22 22
#define fr23 23
#define fr24 24
#define fr25 25
#define fr26 26
#define fr27 27
#define fr28 28
#define fr29 29
#define fr30 30
#define fr31 31
#define vr0 0
#define vr1 1
#define vr2 2
#define vr3 3
#define vr4 4
#define vr5 5
#define vr6 6
#define vr7 7
#define vr8 8
#define vr9 9
#define vr10 10
#define vr11 11
#define vr12 12
#define vr13 13
#define vr14 14
#define vr15 15
#define vr16 16
#define vr17 17
#define vr18 18
#define vr19 19
#define vr20 20
#define vr21 21
#define vr22 22
#define vr23 23
#define vr24 24
#define vr25 25
#define vr26 26
#define vr27 27
#define vr28 28
#define vr29 29
#define vr30 30
#define vr31 31
#endif //_LANGUAGE_ASSEMBLY
#define SPRG0 272
#define SPRG1 273
#define SPRG2 274
#define SPRG3 275
#define PMC1 953
#define PMC2 954
#define PMC3 957
#define PMC4 958
#define MMCR0 952
#define MMCR1 956
#define LINK_REGISTER_CALLEE_UPDATE_ROOM 4
#define EXCEPTION_NUMBER 8
#define SRR0_OFFSET 12
#define SRR1_OFFSET 16
#define GPR0_OFFSET 20
#define GPR1_OFFSET 24
#define GPR2_OFFSET 28
#define GPR3_OFFSET 32
#define GPR4_OFFSET 36
#define GPR5_OFFSET 40
#define GPR6_OFFSET 44
#define GPR7_OFFSET 48
#define GPR8_OFFSET 52
#define GPR9_OFFSET 56
#define GPR10_OFFSET 60
#define GPR11_OFFSET 64
#define GPR12_OFFSET 68
#define GPR13_OFFSET 72
#define GPR14_OFFSET 76
#define GPR15_OFFSET 80
#define GPR16_OFFSET 84
#define GPR17_OFFSET 88
#define GPR18_OFFSET 92
#define GPR19_OFFSET 96
#define GPR20_OFFSET 100
#define GPR21_OFFSET 104
#define GPR22_OFFSET 108
#define GPR23_OFFSET 112
#define GPR24_OFFSET 116
#define GPR25_OFFSET 120
#define GPR26_OFFSET 124
#define GPR27_OFFSET 128
#define GPR28_OFFSET 132
#define GPR29_OFFSET 136
#define GPR30_OFFSET 140
#define GPR31_OFFSET 144
#define GQR0_OFFSET 148
#define GQR1_OFFSET 152
#define GQR2_OFFSET 156
#define GQR3_OFFSET 160
#define GQR4_OFFSET 164
#define GQR5_OFFSET 168
#define GQR6_OFFSET 172
#define GQR7_OFFSET 176
#define CR_OFFSET 180
#define LR_OFFSET 184
#define CTR_OFFSET 188
#define XER_OFFSET 192
#define MSR_OFFSET 196
#define DAR_OFFSET 200
#define STATE_OFFSET 204
#define MODE_OFFSET 206
#define FPR0_OFFSET 208
#define FPR1_OFFSET 216
#define FPR2_OFFSET 224
#define FPR3_OFFSET 232
#define FPR4_OFFSET 240
#define FPR5_OFFSET 248
#define FPR6_OFFSET 256
#define FPR7_OFFSET 264
#define FPR8_OFFSET 272
#define FPR9_OFFSET 280
#define FPR10_OFFSET 288
#define FPR11_OFFSET 296
#define FPR12_OFFSET 304
#define FPR13_OFFSET 312
#define FPR14_OFFSET 320
#define FPR15_OFFSET 328
#define FPR16_OFFSET 336
#define FPR17_OFFSET 344
#define FPR18_OFFSET 352
#define FPR19_OFFSET 360
#define FPR20_OFFSET 368
#define FPR21_OFFSET 376
#define FPR22_OFFSET 384
#define FPR23_OFFSET 392
#define FPR24_OFFSET 400
#define FPR25_OFFSET 408
#define FPR26_OFFSET 416
#define FPR27_OFFSET 424
#define FPR28_OFFSET 432
#define FPR29_OFFSET 440
#define FPR30_OFFSET 448
#define FPR31_OFFSET 456
#define FPSCR_OFFSET 464
#define PSR0_OFFSET 472
#define PSR1_OFFSET 480
#define PSR2_OFFSET 488
#define PSR3_OFFSET 496
#define PSR4_OFFSET 504
#define PSR5_OFFSET 512
#define PSR6_OFFSET 520
#define PSR7_OFFSET 528
#define PSR8_OFFSET 536
#define PSR9_OFFSET 544
#define PSR10_OFFSET 552
#define PSR11_OFFSET 560
#define PSR12_OFFSET 568
#define PSR13_OFFSET 576
#define PSR14_OFFSET 584
#define PSR15_OFFSET 592
#define PSR16_OFFSET 600
#define PSR17_OFFSET 608
#define PSR18_OFFSET 616
#define PSR19_OFFSET 624
#define PSR20_OFFSET 632
#define PSR21_OFFSET 640
#define PSR22_OFFSET 648
#define PSR23_OFFSET 656
#define PSR24_OFFSET 664
#define PSR25_OFFSET 672
#define PSR26_OFFSET 680
#define PSR27_OFFSET 688
#define PSR28_OFFSET 696
#define PSR29_OFFSET 704
#define PSR30_OFFSET 712
#define PSR31_OFFSET 720
/*
* maintain the EABI requested 8 bytes aligment
* As SVR4 ABI requires 16, make it 16 (as some
* exception may need more registers to be processed...)
*/
#define EXCEPTION_FRAME_END 728
#define IBAT0U 528
#define IBAT0L 529
#define IBAT1U 530
#define IBAT1L 531
#define IBAT2U 532
#define IBAT2L 533
#define IBAT3U 534
#define IBAT3L 535
#define IBAT4U 560
#define IBAT4L 561
#define IBAT5U 562
#define IBAT5L 563
#define IBAT6U 564
#define IBAT6L 565
#define IBAT7U 566
#define IBAT7L 567
#define DBAT0U 536
#define DBAT0L 537
#define DBAT1U 538
#define DBAT1L 539
#define DBAT2U 540
#define DBAT2L 541
#define DBAT3U 542
#define DBAT3L 543
#define DBAT4U 568
#define DBAT4L 569
#define DBAT5U 570
#define DBAT5L 571
#define DBAT6U 572
#define DBAT6L 573
#define DBAT7U 574
#define DBAT7L 575
#define HID0 1008
#define HID1 1009
#define HID2 920
#define HID4 1011
#define GQR0 912
#define GQR1 913
#define GQR2 914
#define GQR3 915
#define GQR4 916
#define GQR5 917
#define GQR6 918
#define GQR7 919
#define L2CR 1017
#define WPAR 921
#define DMAU 922
#define DMAL 923
#define MSR_RI 0x00000002
#define MSR_DR 0x00000010
#define MSR_IR 0x00000020
#define MSR_IP 0x00000040
#define MSR_SE 0x00000400
#define MSR_ME 0x00001000
#define MSR_FP 0x00002000
#define MSR_POW 0x00004000
#define MSR_EE 0x00008000
#define PPC_ALIGNMENT 8
#define PPC_CACHE_ALIGNMENT 32
#endif //__ASM_H__

View File

@ -0,0 +1,71 @@
/* Copyright 2013 tueidj All Rights Reserved
* This code may not be used in any project
* without explicit permission from the author.
*/
#include <ppc-asm.h>
#include "asm.h"
.extern vm_dsi_handler
.extern default_exceptionhandler
FUNC_START(dsi_handler)
stwu sp,-EXCEPTION_FRAME_END(sp)
stw r6,GPR6_OFFSET(sp)
stw r7,GPR7_OFFSET(sp)
stw r8,GPR8_OFFSET(sp)
stw r9,GPR9_OFFSET(sp)
stw r10,GPR10_OFFSET(sp)
stw r11,GPR11_OFFSET(sp)
stw r12,GPR12_OFFSET(sp)
mfdsisr r4
mfmsr r3
ori r3,r3,MSR_RI
mtmsr r3
addi r3,sp,8
bl vm_dsi_handler
# check if it was handled correctly
cmplwi r3,0
lwz r6,GPR6_OFFSET(sp)
lwz r7,GPR7_OFFSET(sp)
lwz r8,GPR8_OFFSET(sp)
lwz r9,GPR9_OFFSET(sp)
lwz r10,GPR10_OFFSET(sp)
lwz r11,GPR11_OFFSET(sp)
lwz r12,GPR12_OFFSET(sp)
# clear MSR_RI
mfmsr r3
rlwinm r3,r3,0,31,29
mtmsr r3
bne 1f
# jump to libogc's default handler
addi sp,sp,EXCEPTION_FRAME_END
b default_exceptionhandler
1:
lwz r3,CR_OFFSET(sp)
lwz r4,LR_OFFSET(sp)
lwz r5,CTR_OFFSET(sp)
lwz r0,XER_OFFSET(sp)
mtcr r3
mtlr r4
mtctr r5
mtxer r0
lwz r0,GPR0_OFFSET(sp)
lwz r5,GPR5_OFFSET(sp)
lwz r3,SRR0_OFFSET(sp)
lwz r4,SRR1_OFFSET(sp)
mtsrr0 r3
mtsrr1 r4
lwz r3,GPR3_OFFSET(sp)
lwz r4,GPR4_OFFSET(sp)
lwz sp,GPR1_OFFSET(sp)
rfi
FUNC_END(dsi_handler)

367
source/utils/vm/vm.c Normal file
View File

@ -0,0 +1,367 @@
/* Copyright 2013 tueidj All Rights Reserved
* This code may not be used in any project
* without explicit permission from the author.
*/
#include <gccore.h>
#include <stdlib.h>
#include <malloc.h>
#include <errno.h>
#include <ogc/machine/processor.h>
#include <ogc/aram.h>
#include "vm.h"
#include <stdio.h>
typedef u8 vm_page[PAGE_SIZE];
static p_map phys_map[2048+(PTE_SIZE/PAGE_SIZE)];
static vm_map virt_map[65536];
static u16 pmap_max, pmap_head;
static PTE* HTABORG;
static vm_page* VM_Base;
static vm_page* MEM_Base = NULL;
static int pagefile_fd = -1;
static mutex_t vm_mutex = LWP_MUTEX_NULL;
static bool vm_initialized = 0;
static __inline__ void tlbie(void* p)
{
asm volatile("tlbie %0" :: "r"(p));
}
static u16 locate_oldest(void)
{
u16 head = pmap_head;
for(;;++head)
{
PTE *p;
if (head >= pmap_max)
head = 0;
if (!phys_map[head].valid || phys_map[head].locked)
continue;
p = HTABORG+phys_map[head].pte_index;
tlbie(VM_Base+phys_map[head].page_index);
if (p->C)
{
p->C = 0;
phys_map[head].dirty = 1;
continue;
}
if (p->R)
{
p->R = 0;
continue;
}
p->data[0] = 0;
pmap_head = head+1;
return head;
}
}
static PTE* StorePTE(PTEG pteg, u32 virtual, u32 physical, u8 WIMG, u8 PP, int secondary)
{
int i;
PTE p = {{0}};
p.valid = 1;
p.VSID = VM_VSID;
p.hash = secondary ? 1:0;
p.API = virtual >> 22;
p.RPN = physical >> 12;
p.WIMG = WIMG;
p.PP = PP;
for (i=0; i < 8; i++)
{
if (pteg[i].data[0] == p.data[0])
{
// printf("Error: address %08x already had a PTE entry\r\n", virtual);
// abort();
}
else if (pteg[i].valid)
continue;
asm volatile("tlbie %0" : : "r"(virtual));
pteg[i].data[1] = p.data[1];
pteg[i].data[0] = p.data[0];
// if (i || secondary)
// printf("PTE for address %08x/%08x in PTEG %p index %d (%s)\r\n", virtual, physical, pteg, i, secondary ? "secondary" : "primary");
return pteg+i;
}
return NULL;
}
static PTEG CalcPTEG(u32 virtual, int secondary)
{
uint32_t segment_index = (virtual >> 12) & 0xFFFF;
u32 ptr = MEM_VIRTUAL_TO_PHYSICAL(HTABORG);
u32 hash = segment_index ^ VM_VSID;
if (secondary) hash = ~hash;
hash &= (HTABMASK << 10) | 0x3FF;
ptr |= hash << 6;
return (PTEG)MEM_PHYSICAL_TO_K0(ptr);
}
static PTE* insert_pte(u16 index, u32 physical, u8 WIMG, u8 PP)
{
PTE *pte;
int i;
u32 virtual = (u32)(VM_Base+index);
for (i=0; i < 2; i++)
{
PTEG pteg = CalcPTEG(virtual, i);
pte = StorePTE(pteg, virtual, physical, WIMG, PP, i);
if (pte)
return pte;
}
// printf("Failed to insert PTE for %p\r\n", VM_Base+index);
// abort();
return NULL;
}
static void tlbia(void)
{
int i;
for (i=0; i < 64; i++)
asm volatile("tlbie %0" :: "r" (i*PAGE_SIZE));
}
/* This definition is wrong, pHndl does not take frame_context* as a parameter,
* it has to adjust the stack pointer and finish filling frame_context itself
*/
void __exception_sethandler(u32 nExcept, void (*pHndl)(frame_context*));
extern void default_exceptionhandler();
// use our own exception stub because libogc stupidly requires it
extern void dsi_handler();
void* VM_Init(u32 VMSize, u32 MEMSize)
{
u32 i;
u16 index, v_index;
if (vm_initialized)
return VM_Base;
// parameter checking
if (VMSize>MAX_VM_SIZE || MEMSize<MIN_MEM_SIZE || MEMSize>MAX_MEM_SIZE)
{
errno = EINVAL;
return NULL;
}
VMSize = (VMSize+PAGE_SIZE-1)&PAGE_MASK;
MEMSize = (MEMSize+PAGE_SIZE-1)&PAGE_MASK;
//VM_Base = (vm_page*)(0x80000000 - VMSize);
VM_Base = (vm_page*)(ARAM_VM_BASE);
pmap_max = MEMSize / PAGE_SIZE + 16;
// printf("VMSize %08x MEMSize %08x VM_Base %p pmap_max %u\r\n", VMSize, MEMSize, VM_Base, pmap_max);
if (VMSize <= MEMSize)
{
errno = EINVAL;
return NULL;
}
if (LWP_MutexInit(&vm_mutex, 0) != 0)
{
errno = ENOLCK;
return NULL;
}
pagefile_fd = AR_Init(NULL, 0);
// ISFS_Initialize();
// doesn't matter if this fails, will be caught when file is opened
// ISFS_CreateFile(VM_FILENAME, 0, ISFS_OPEN_RW, ISFS_OPEN_RW, ISFS_OPEN_RW);
// pagefile_fd = ISFS_Open(VM_FILENAME, ISFS_OPEN_RW);
if (pagefile_fd < 0)
{
errno = ENOENT;
return NULL;
}
MEMSize += PTE_SIZE;
MEM_Base = (vm_page*)memalign(PAGE_SIZE, MEMSize);
// printf("MEM_Base: %p\r\n", MEM_Base);
if (MEM_Base==NULL)
{
AR_Reset();
// ISFS_Close(pagefile_fd);
errno = ENOMEM;
return NULL;
}
tlbia();
DCZeroRange(MEM_Base, MEMSize);
HTABORG = (PTE*)(((u32)MEM_Base+0xFFFF)&~0xFFFF);
// printf("HTABORG: %p\r\n", HTABORG);
// attempt to make the pagefile the correct size
/* ISFS_Seek(pagefile_fd, 0, SEEK_SET);
for (i=0; i<VMSize;)
{
u32 to_write = VMSize - i;
if (to_write > MEMSize)
to_write = MEMSize;
if (ISFS_Write(pagefile_fd, MEM_Base, to_write) != to_write)
{
free(MEM_Base);
ISFS_Close(pagefile_fd);
errno = ENOSPC;
return NULL;
}
// printf("Wrote %u bytes to offset %u\r\n", to_write, page);
i += to_write;
}*/
// initial commit: map pmap_max pages to fill PTEs with valid RPNs
for (index=0,v_index=0; index<pmap_max; ++index,++v_index)
{
if ((PTE*)(MEM_Base+index) == HTABORG)
{
// printf("p_map hole: %u -> %u\r\n", index, index+(PTE_SIZE/PAGE_SIZE));
for (i=0; i<(PTE_SIZE/PAGE_SIZE); ++i,++index)
phys_map[index].valid = 0;
--index;
--v_index;
continue;
}
phys_map[index].valid = 1;
phys_map[index].locked = 0;
phys_map[index].dirty = 0;
phys_map[index].page_index = v_index;
phys_map[index].pte_index = insert_pte(v_index, MEM_VIRTUAL_TO_PHYSICAL(MEM_Base+index), 0, 0b10) - HTABORG;
virt_map[v_index].committed = 0;
virt_map[v_index].p_map_index = index;
}
// all indexes up to 65536
for (; v_index; ++v_index)
{
virt_map[v_index].committed = 0;
virt_map[v_index].p_map_index = pmap_max;
}
pmap_head = 0;
// set SDR1
mtspr(25, MEM_VIRTUAL_TO_PHYSICAL(HTABORG)|HTABMASK);
//printf("SDR1: %08x\r\n", MEM_VIRTUAL_TO_PHYSICAL(HTABORG));
// enable SR
asm volatile("mtsrin %0,%1" :: "r"(VM_VSID), "r"(VM_Base));
// hook DSI
__exception_sethandler(EX_DSI, dsi_handler);
atexit(VM_Deinit);
vm_initialized = 1;
return VM_Base;
}
void VM_Deinit(void)
{
if (!vm_initialized)
return;
// disable SR
asm volatile("mtsrin %0,%1" :: "r"(0x80000000), "r"(VM_Base));
// restore default DSI handler
__exception_sethandler(EX_DSI, default_exceptionhandler);
free(MEM_Base);
MEM_Base = NULL;
if (vm_mutex != LWP_MUTEX_NULL)
{
LWP_MutexDestroy(vm_mutex);
vm_mutex = LWP_MUTEX_NULL;
}
if (pagefile_fd)
{
AR_Reset();
// ISFS_Close(pagefile_fd);
pagefile_fd = -1;
// ISFS_Delete(VM_FILENAME);
}
vm_initialized = 0;
}
int vm_dsi_handler(frame_context* state, u32 DSISR)
{
u16 v_index;
u16 p_index;
if (state->DAR<(u32)VM_Base || state->DAR>=0x80000000)
return 0;
if ((DSISR&~0x02000000)!=0x40000000)
return 0;
if (!vm_initialized)
return 0;
LWP_MutexLock(vm_mutex);
state->DAR &= ~0xFFF;
v_index = (vm_page*)state->DAR - VM_Base;
p_index = locate_oldest();
// purge p_index if it's dirty
if (phys_map[p_index].dirty)
{
DCFlushRange(MEM_Base+p_index, PAGE_SIZE);
AR_StartDMA(AR_MRAMTOARAM,(u32)(MEM_Base+p_index),phys_map[p_index].page_index*PAGE_SIZE,PAGE_SIZE);
while (AR_GetDMAStatus());
virt_map[phys_map[p_index].page_index].committed = 1;
virt_map[phys_map[p_index].page_index].p_map_index = pmap_max;
phys_map[p_index].dirty = 0;
// printf("VM page %d was purged\r\n", phys_map[p_index].page_index);
}
// fetch v_index if it has been previously committed
if (virt_map[v_index].committed)
{
DCInvalidateRange(MEM_Base+p_index, PAGE_SIZE);
AR_StartDMA(AR_ARAMTOMRAM,(u32)(MEM_Base+p_index),v_index*PAGE_SIZE,PAGE_SIZE);
while (AR_GetDMAStatus());
// printf("VM page %d was fetched\r\n", v_index);
}
else
DCZeroRange(MEM_Base+p_index, PAGE_SIZE);
// printf("VM page %u (0x%08x) replaced page %u (%p)\r\n", v_index, state->DAR, phys_map[p_index].page_index, VM_Base+phys_map[p_index].page_index);
virt_map[v_index].p_map_index = p_index;
phys_map[p_index].page_index = v_index;
phys_map[p_index].pte_index = insert_pte(v_index, MEM_VIRTUAL_TO_PHYSICAL(MEM_Base+p_index), 0, 0b10) - HTABORG;
LWP_MutexUnlock(vm_mutex);
return 1;
}

90
source/utils/vm/vm.h Normal file
View File

@ -0,0 +1,90 @@
/* Copyright 2013 tueidj All Rights Reserved
* This code may not be used in any project
* without explicit permission from the author.
*/
#ifndef _VM_H_
#define _VM_H_
#include <gccore.h>
#include <stdlib.h>
#include <malloc.h>
#include <errno.h>
#include <ogc/machine/processor.h>
#include <stdio.h>
#define KB (1024)
#define MB (1024*KB)
#define MRAM_BACKING (4*MB) // Use 4MB to page our 16MB
#define ARAM_RESERVED (64*KB) // Reserved for DSP/AESND/etc
#define ARAM_VM_BASE (0x7F000000) // Map ARAM to here
#define ARAM_START (ARAM_RESERVED + ARAM_VM_BASE)
#define ARAM_SIZE ((16*MB) - ARAM_RESERVED) // ARAM is ~16MB
// maximum virtual memory size
#define MAX_VM_SIZE (256*1024*1024)
// maximum physical memory size
#define MAX_MEM_SIZE ( 8*1024*1024)
// minimum physical memory size
#define MIN_MEM_SIZE (256*1024)
// page size as defined by hardware
#define PAGE_SIZE 4096
#define PAGE_MASK (~(PAGE_SIZE-1))
#define VM_VSID 0
#define VM_SEGMENT 0x70000000
// use 64KB for PTEs
#define HTABMASK 0
#define PTE_SIZE ((HTABMASK+1)*65536)
#define PTE_COUNT (PTE_SIZE>>3)
//#define VM_FILENAME "/tmp/pagefile.sys"
// keeps a record of each currently mapped page
typedef union
{
u32 data;
struct
{
u32 valid : 1;
u32 locked : 1;
u32 dirty : 1;
u32 pte_index : 13;
u32 page_index : 16;
};
} p_map;
// maps VM addresses to mapped pages
typedef struct
{
// data must be fetched when paging in?
u16 committed : 1;
u16 p_map_index: 12;
} vm_map;
typedef union
{
u32 data[2];
struct
{
u32 valid : 1;
u32 VSID : 24;
u32 hash : 1;
u32 API : 6;
u32 RPN : 20;
u32 pad0 : 3;
u32 R : 1;
u32 C : 1;
u32 WIMG : 4;
u32 pad1 : 1;
u32 PP : 2;
};
} PTE;
typedef PTE* PTEG;
extern void* VM_Init(u32 VMSize, u32 MEMSize);
extern void VM_Deinit(void);
#endif

49
source/vmalloc.cpp Normal file
View File

@ -0,0 +1,49 @@
/****************************************************************************
* Snes9x Nintendo Wii/Gamecube Port
*
* emu_kidid 2015-2018
*
* vmalloc.cpp
*
* GC VM memory allocator
***************************************************************************/
#ifdef USE_VM
#include <ogc/machine/asm.h>
#include <ogc/lwp_heap.h>
#include <ogc/system.h>
#include <ogc/machine/processor.h>
#include "utils/vm/vm.h"
static heap_cntrl vm_heap;
static int vm_initialised = 0;
void InitVmManager ()
{
__lwp_heap_init(&vm_heap, (void *)ARAM_VM_BASE, ARAM_SIZE, 32);
vm_initialised = 1;
}
void* vm_malloc(u32 size)
{
if(!vm_initialised) InitVmManager();
return __lwp_heap_allocate(&vm_heap, size);
}
bool vm_free(void *ptr)
{
if(!vm_initialised) InitVmManager();
return __lwp_heap_free(&vm_heap, ptr);
}
int vm_size_free()
{
if(!vm_initialised) InitVmManager();
heap_iblock info;
__lwp_heap_getinfo(&vm_heap,&info);
return info.free_size;
}
#endif

21
source/vmalloc.h Normal file
View File

@ -0,0 +1,21 @@
/****************************************************************************
* Snes9x Nintendo Wii/Gamecube Port
*
* emu_kidid 2015-2018
*
* vmalloc.h
*
* GC VM memory allocator
***************************************************************************/
#ifdef USE_VM
#ifndef _VMMANAGER_H_
#define _VMMANAGER_H_
void* vm_malloc(u32 size);
bool vm_free(void *ptr);
int vm_size_free();
#endif
#endif