From 3c10dfd73d1d9674b4424df2976f228e52af9f32 Mon Sep 17 00:00:00 2001 From: retro100 Date: Sat, 6 Feb 2021 18:01:05 +0100 Subject: [PATCH] Add PowerPC Dynamic Recompiler patch from jmarsh (ppc_dynrec.diff and drive_fat_BE.diff) --- README | 2 + include/fpu.h | 2 +- include/mem.h | 43 +- src/cpu/core_dynrec.cpp | 8 + src/cpu/core_dynrec/Makefile.am | 3 +- src/cpu/core_dynrec/cache.h | 61 +- src/cpu/core_dynrec/decoder_basic.h | 17 +- src/cpu/core_dynrec/decoder_opcodes.h | 17 +- src/cpu/core_dynrec/risc_ppc.h | 901 ++++++++++++++++++++++++++ src/dos/drive_fat.cpp | 70 +- src/platform/wii/config.h | 4 +- 11 files changed, 1043 insertions(+), 85 deletions(-) create mode 100644 src/cpu/core_dynrec/risc_ppc.h diff --git a/README b/README index 7a8c749..f99e7c7 100644 --- a/README +++ b/README @@ -23,6 +23,8 @@ A port of DOSBox to the Wii using SDL Wii. * Updated to latest libogc and devkitPPC * Convert CRLF line terminators to unix line terminators (like at original SVN repo) * Sync to DOSBox SVN r4301 +* Add PowerPC Dynamic Recompiler patch from jmarsh (ppc_dynrec.diff and drive_fat_BE.diff) + see: https://www.vogons.org/viewtopic.php?t=65057 [1.7 - June 30, 2012] diff --git a/include/fpu.h b/include/fpu.h index 44acd31..2b7cee5 100644 --- a/include/fpu.h +++ b/include/fpu.h @@ -85,7 +85,7 @@ enum FPU_Round { ROUND_Chop = 3 }; -typedef struct { +typedef struct FPU_rec { FPU_Reg regs[9]; FPU_P_Reg p_regs[9]; FPU_Tag tags[9]; diff --git a/include/mem.h b/include/mem.h index 99b082e..7393b77 100644 --- a/include/mem.h +++ b/include/mem.h @@ -57,20 +57,39 @@ MemHandle MEM_NextHandleAt(MemHandle handle,Bitu where); Working on big or little endian machines */ -#if defined(WORDS_BIGENDIAN) || !defined(C_UNALIGNED_MEMORY) - static INLINE Bit8u host_readb(HostPt off) { return off[0]; } + +static INLINE void host_writeb(HostPt off,Bit8u val) { + off[0]=val; +} + +// use __builtin_bswap* for gcc >= 4.3 +#if defined(WORDS_BIGENDIAN) && defined(__GNUC__) && \ + (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3)) + +static INLINE Bit16u host_readw(HostPt off) { + return __builtin_bswap16(*(Bit16u *)off); +} +static INLINE Bit32u host_readd(HostPt off) { + return __builtin_bswap32(*(Bit32u *)off); +} +static INLINE void host_writew(HostPt off, Bit16u val) { + *(Bit16u *)off = __builtin_bswap16(val); +} +static INLINE void host_writed(HostPt off, Bit32u val) { + *(Bit32u *)off = __builtin_bswap32(val); +} + +#elif defined(WORDS_BIGENDIAN) || !defined(C_UNALIGNED_MEMORY) + static INLINE Bit16u host_readw(HostPt off) { return off[0] | (off[1] << 8); } static INLINE Bit32u host_readd(HostPt off) { return off[0] | (off[1] << 8) | (off[2] << 16) | (off[3] << 24); } -static INLINE void host_writeb(HostPt off,Bit8u val) { - off[0]=val; -} static INLINE void host_writew(HostPt off,Bit16u val) { off[0]=(Bit8u)(val); off[1]=(Bit8u)(val >> 8); @@ -84,18 +103,12 @@ static INLINE void host_writed(HostPt off,Bit32u val) { #else -static INLINE Bit8u host_readb(HostPt off) { - return *(Bit8u *)off; -} static INLINE Bit16u host_readw(HostPt off) { return *(Bit16u *)off; } static INLINE Bit32u host_readd(HostPt off) { return *(Bit32u *)off; } -static INLINE void host_writeb(HostPt off,Bit8u val) { - *(Bit8u *)(off)=val; -} static INLINE void host_writew(HostPt off,Bit16u val) { *(Bit16u *)(off)=val; } @@ -118,6 +131,14 @@ static INLINE void var_write(Bit32u * var, Bit32u val) { host_writed((HostPt)var, val); } +static INLINE Bit16u var_read(Bit16u * var) { + return host_readw((HostPt)var); +} + +static INLINE Bit32u var_read(Bit32u * var) { + return host_readd((HostPt)var); +} + /* The Folowing six functions are slower but they recognize the paged memory system */ Bit8u mem_readb(PhysPt pt); diff --git a/src/cpu/core_dynrec.cpp b/src/cpu/core_dynrec.cpp index 9e23291..b84cb42 100644 --- a/src/cpu/core_dynrec.cpp +++ b/src/cpu/core_dynrec.cpp @@ -138,6 +138,7 @@ static struct { #define MIPSEL 0x03 #define ARMV4LE 0x04 #define ARMV7LE 0x05 +#define POWERPC 0x06 #define ARMV8LE 0x07 #if C_TARGETCPU == X86_64 @@ -148,10 +149,17 @@ static struct { #include "core_dynrec/risc_mipsel32.h" #elif (C_TARGETCPU == ARMV4LE) || (C_TARGETCPU == ARMV7LE) #include "core_dynrec/risc_armv4le.h" +#elif C_TARGETCPU == POWERPC +#include "core_dynrec/risc_ppc.h" #elif C_TARGETCPU == ARMV8LE #include "core_dynrec/risc_armv8le.h" #endif +#if !defined(WORDS_BIGENDIAN) +#define gen_add_LE gen_add +#define gen_mov_LE_word_to_reg gen_mov_word_to_reg +#endif + #include "core_dynrec/decoder.h" CacheBlockDynRec * LinkBlocks(BlockReturn ret) { diff --git a/src/cpu/core_dynrec/Makefile.am b/src/cpu/core_dynrec/Makefile.am index f135543..57e7258 100644 --- a/src/cpu/core_dynrec/Makefile.am +++ b/src/cpu/core_dynrec/Makefile.am @@ -2,4 +2,5 @@ noinst_HEADERS = cache.h decoder.h decoder_basic.h decoder_opcodes.h \ dyn_fpu.h operators.h risc_x64.h risc_x86.h risc_mipsel32.h \ risc_armv4le.h risc_armv4le-common.h \ risc_armv4le-o3.h risc_armv4le-thumb.h \ - risc_armv4le-thumb-iw.h risc_armv4le-thumb-niw.h risc_armv8le.h + risc_armv4le-thumb-iw.h risc_armv4le-thumb-niw.h risc_armv8le.h \ + risc_ppc.h diff --git a/src/cpu/core_dynrec/cache.h b/src/cpu/core_dynrec/cache.h index 9ae81eb..faee316 100644 --- a/src/cpu/core_dynrec/cache.h +++ b/src/cpu/core_dynrec/cache.h @@ -145,7 +145,7 @@ public: if (host_readb(hostmem+addr)==(Bit8u)val) return; host_writeb(hostmem+addr,val); // see if there's code where we are writing to - if (!host_readb(&write_map[addr])) { + if (!write_map[addr]) { if (active_blocks) return; // still some blocks in this page active_count--; if (!active_count) Release(); // delay page releasing until active_count is zero @@ -162,7 +162,7 @@ public: if (host_readw(hostmem+addr)==(Bit16u)val) return; host_writew(hostmem+addr,val); // see if there's code where we are writing to - if (!host_readw(&write_map[addr])) { + if (!*(Bit16u*)&write_map[addr]) { if (active_blocks) return; // still some blocks in this page active_count--; if (!active_count) Release(); // delay page releasing until active_count is zero @@ -171,7 +171,7 @@ public: invalidation_map=(Bit8u*)malloc(4096); memset(invalidation_map,0,4096); } -#if defined(WORDS_BIGENDIAN) || !defined(C_UNALIGNED_MEMORY) +#if !defined(C_UNALIGNED_MEMORY) host_writew(&invalidation_map[addr], host_readw(&invalidation_map[addr])+0x101); #else @@ -184,7 +184,7 @@ public: if (host_readd(hostmem+addr)==(Bit32u)val) return; host_writed(hostmem+addr,val); // see if there's code where we are writing to - if (!host_readd(&write_map[addr])) { + if (!*(Bit32u*)&write_map[addr]) { if (active_blocks) return; // still some blocks in this page active_count--; if (!active_count) Release(); // delay page releasing until active_count is zero @@ -193,7 +193,7 @@ public: invalidation_map=(Bit8u*)malloc(4096); memset(invalidation_map,0,4096); } -#if defined(WORDS_BIGENDIAN) || !defined(C_UNALIGNED_MEMORY) +#if !defined(C_UNALIGNED_MEMORY) host_writed(&invalidation_map[addr], host_readd(&invalidation_map[addr])+0x1010101); #else @@ -229,7 +229,7 @@ public: addr&=4095; if (host_readw(hostmem+addr)==(Bit16u)val) return false; // see if there's code where we are writing to - if (!host_readw(&write_map[addr])) { + if (!*(Bit16u*)&write_map[addr]) { if (!active_blocks) { // no blocks left in this page, still delay the page releasing a bit active_count--; @@ -240,7 +240,7 @@ public: invalidation_map=(Bit8u*)malloc(4096); memset(invalidation_map,0,4096); } -#if defined(WORDS_BIGENDIAN) || !defined(C_UNALIGNED_MEMORY) +#if !defined(C_UNALIGNED_MEMORY) host_writew(&invalidation_map[addr], host_readw(&invalidation_map[addr])+0x101); #else @@ -258,7 +258,7 @@ public: addr&=4095; if (host_readd(hostmem+addr)==(Bit32u)val) return false; // see if there's code where we are writing to - if (!host_readd(&write_map[addr])) { + if (!*(Bit32u*)&write_map[addr]) { if (!active_blocks) { // no blocks left in this page, still delay the page releasing a bit active_count--; @@ -269,7 +269,7 @@ public: invalidation_map=(Bit8u*)malloc(4096); memset(invalidation_map,0,4096); } -#if defined(WORDS_BIGENDIAN) || !defined(C_UNALIGNED_MEMORY) +#if !defined(C_UNALIGNED_MEMORY) host_writed(&invalidation_map[addr], host_readd(&invalidation_map[addr])+0x1010101); #else @@ -372,11 +372,11 @@ public: return 0; // none found } - HostPt GetHostReadPt(Bitu phys_page) { + HostPt GetHostReadPt(Bitu phys_page) { hostmem=old_pagehandler->GetHostReadPt(phys_page); return hostmem; } - HostPt GetHostWritePt(Bitu phys_page) { + HostPt GetHostWritePt(Bitu phys_page) { return GetHostReadPt( phys_page ); } public: @@ -392,7 +392,7 @@ private: Bitu active_blocks; // the number of cache blocks in this page Bitu active_count; // delaying parameter to not immediately release a page - HostPt hostmem; + HostPt hostmem; Bitu phys_page; }; @@ -433,13 +433,13 @@ void CacheBlockDynRec::Clear(void) { wherelink = &(*wherelink)->link[ind].next; } // now remove the link - if(*wherelink) + if(*wherelink) *wherelink = (*wherelink)->link[ind].next; else { LOG(LOG_CPU,LOG_ERROR)("Cache anomaly. please investigate"); } } - } else + } else cache_addunusedblock(this); if (crossblock) { // clear out the crossblock (in the page before) as well @@ -464,7 +464,7 @@ static CacheBlockDynRec * cache_openblock(void) { // check for enough space in this block Bitu size=block->cache.size; CacheBlockDynRec * nextblock=block->cache.next; - if (block->page.handler) + if (block->page.handler) block->Clear(); // block size must be at least CACHE_MAXSIZE while (sizecache.size; CacheBlockDynRec * tempblock=nextblock->cache.next; - if (nextblock->page.handler) + if (nextblock->page.handler) nextblock->Clear(); // block is free now cache_addunusedblock(nextblock); @@ -500,8 +500,8 @@ static void cache_closeblock(void) { Bitu written=(Bitu)(cache.pos-block->cache.start); if (written>block->cache.size) { if (!block->cache.next) { - if (written>block->cache.size+CACHE_MAXSIZE) E_Exit("CacheBlock overrun 1 %d",written-block->cache.size); - } else E_Exit("CacheBlock overrun 2 written %d size %d",written,block->cache.size); + if (written>block->cache.size+CACHE_MAXSIZE) E_Exit("CacheBlock overrun 1 %d",written-block->cache.size); + } else E_Exit("CacheBlock overrun 2 written %d size %d",written,block->cache.size); } else { Bitu new_size; Bitu left=block->cache.size-written; @@ -553,12 +553,14 @@ static INLINE void cache_addq(Bit64u val) { static void dyn_return(BlockReturn retcode,bool ret_exception); static void dyn_run_code(void); +static void cache_block_before_close(void); +static void cache_block_closing(Bit8u* block_start,Bitu block_size); /* Define temporary pagesize so the MPROTECT case and the regular case share as much code as possible */ #if (C_HAVE_MPROTECT) #define PAGESIZE_TEMP PAGESIZE -#else +#else #define PAGESIZE_TEMP 4096 #endif @@ -614,18 +616,27 @@ static void cache_init(bool enable) { } // setup the default blocks for block linkage returns cache.pos=&cache_code_link_blocks[0]; + core_dynrec.runcode=(BlockReturn (*)(Bit8u*))cache.pos; + // can use op to PAGESIZE_TEMP-64 bytes + dyn_run_code(); + cache_block_before_close(); + cache_block_closing(cache_code_link_blocks, cache.pos-cache_code_link_blocks); + + cache.pos=&cache_code_link_blocks[PAGESIZE_TEMP-64]; link_blocks[0].cache.start=cache.pos; // link code that returns with a special return code + // must be less than 32 bytes dyn_return(BR_Link1,false); - cache.pos=&cache_code_link_blocks[32]; + cache_block_before_close(); + cache_block_closing(link_blocks[0].cache.start, cache.pos-link_blocks[0].cache.start); + + cache.pos=&cache_code_link_blocks[PAGESIZE_TEMP-32]; link_blocks[1].cache.start=cache.pos; // link code that returns with a special return code + // must be less than 32 bytes dyn_return(BR_Link2,false); - - cache.pos=&cache_code_link_blocks[64]; - core_dynrec.runcode=(BlockReturn (*)(Bit8u*))cache.pos; -// link_blocks[1].cache.start=cache.pos; - dyn_run_code(); + cache_block_before_close(); + cache_block_closing(link_blocks[1].cache.start, cache.pos-link_blocks[1].cache.start); cache.free_pages=0; cache.last_page=0; diff --git a/src/cpu/core_dynrec/decoder_basic.h b/src/cpu/core_dynrec/decoder_basic.h index c8e2a8e..3352c5f 100644 --- a/src/cpu/core_dynrec/decoder_basic.h +++ b/src/cpu/core_dynrec/decoder_basic.h @@ -502,7 +502,6 @@ static INLINE void dyn_set_eip_end(HostReg reg,Bit32u imm=0) { gen_mov_word_to_reg(reg,®_eip,true); //get_extend_word will mask off the upper bits //gen_mov_word_to_reg(reg,®_eip,decode.big_op); gen_add_imm(reg,(Bit32u)(decode.code-decode.code_start+imm)); - if (!decode.big_op) gen_extend_word(false,reg); } @@ -995,10 +994,10 @@ skip_extend_word: // succeeded, use the pointer to avoid code invalidation if (!addseg) { if (!scaled_reg_used) { - gen_mov_word_to_reg(ea_reg,(void*)val,true); + gen_mov_LE_word_to_reg(ea_reg,(void*)val,true); } else { DYN_LEA_MEM_REG_VAL(ea_reg,NULL,scaled_reg,scale,0); - gen_add(ea_reg,(void*)val); + gen_add_LE(ea_reg,(void*)val); } } else { if (!scaled_reg_used) { @@ -1006,7 +1005,7 @@ skip_extend_word: } else { DYN_LEA_SEG_PHYS_REG_VAL(ea_reg,(decode.seg_prefix_used ? decode.seg_prefix : seg_base),scaled_reg,scale,0); } - gen_add(ea_reg,(void*)val); + gen_add_LE(ea_reg,(void*)val); } return; } @@ -1047,10 +1046,10 @@ skip_extend_word: if (!addseg) { if (!scaled_reg_used) { MOV_REG_VAL_TO_HOST_REG(ea_reg,base_reg); - gen_add(ea_reg,(void*)val); + gen_add_LE(ea_reg,(void*)val); } else { DYN_LEA_REG_VAL_REG_VAL(ea_reg,base_reg,scaled_reg,scale,0); - gen_add(ea_reg,(void*)val); + gen_add_LE(ea_reg,(void*)val); } } else { if (!scaled_reg_used) { @@ -1059,7 +1058,7 @@ skip_extend_word: DYN_LEA_SEG_PHYS_REG_VAL(ea_reg,(decode.seg_prefix_used ? decode.seg_prefix : seg_base),scaled_reg,scale,0); } ADD_REG_VAL_TO_HOST_REG(ea_reg,base_reg); - gen_add(ea_reg,(void*)val); + gen_add_LE(ea_reg,(void*)val); } return; } @@ -1124,11 +1123,11 @@ skip_extend_word: // succeeded, use the pointer to avoid code invalidation if (!addseg) { MOV_REG_VAL_TO_HOST_REG(ea_reg,base_reg); - gen_add(ea_reg,(void*)val); + gen_add_LE(ea_reg,(void*)val); } else { MOV_SEG_PHYS_TO_HOST_REG(ea_reg,(decode.seg_prefix_used ? decode.seg_prefix : seg_base)); ADD_REG_VAL_TO_HOST_REG(ea_reg,base_reg); - gen_add(ea_reg,(void*)val); + gen_add_LE(ea_reg,(void*)val); } return; } diff --git a/src/cpu/core_dynrec/decoder_opcodes.h b/src/cpu/core_dynrec/decoder_opcodes.h index 67eaee2..82bbbbb 100644 --- a/src/cpu/core_dynrec/decoder_opcodes.h +++ b/src/cpu/core_dynrec/decoder_opcodes.h @@ -250,12 +250,12 @@ static void dyn_prep_word_imm(Bit8u reg) { Bitu val; if (decode.big_op) { if (decode_fetchd_imm(val)) { - gen_mov_word_to_reg(FC_OP2,(void*)val,true); + gen_mov_LE_word_to_reg(FC_OP2,(void*)val,true); return; } } else { if (decode_fetchw_imm(val)) { - gen_mov_word_to_reg(FC_OP2,(void*)val,false); + gen_mov_LE_word_to_reg(FC_OP2,(void*)val,false); return; } } @@ -287,13 +287,13 @@ static void dyn_mov_word_imm(Bit8u reg) { Bitu val; if (decode.big_op) { if (decode_fetchd_imm(val)) { - gen_mov_word_to_reg(FC_OP1,(void*)val,true); + gen_mov_LE_word_to_reg(FC_OP1,(void*)val,true); MOV_REG_WORD32_FROM_HOST_REG(FC_OP1,reg); return; } } else { if (decode_fetchw_imm(val)) { - gen_mov_word_to_reg(FC_OP1,(void*)val,false); + gen_mov_LE_word_to_reg(FC_OP1,(void*)val,false); MOV_REG_WORD16_FROM_HOST_REG(FC_OP1,reg); return; } @@ -330,7 +330,7 @@ static void dyn_mov_byte_direct_al() { if (decode.big_addr) { Bitu val; if (decode_fetchd_imm(val)) { - gen_add(FC_ADDR,(void*)val); + gen_add_LE(FC_ADDR,(void*)val); } else { gen_add_imm(FC_ADDR,(Bit32u)val); } @@ -1184,11 +1184,8 @@ static void dyn_ret_near(Bitu bytes) { dyn_reduce_cycles(); if (decode.big_op) gen_call_function_raw((void*)&dynrec_pop_dword); - else { - gen_call_function_raw((void*)&dynrec_pop_word); - gen_extend_word(false,FC_RETOP); - } - gen_mov_word_from_reg(FC_RETOP,decode.big_op?(void*)(®_eip):(void*)(®_ip),true); + else gen_call_function_raw((void*)&dynrec_pop_word); + gen_mov_word_from_reg(FC_RETOP,decode.big_op?(void*)(®_eip):(void*)(®_ip),decode.big_op); if (bytes) gen_add_direct_word(®_esp,bytes,true); dyn_return(BR_Normal); diff --git a/src/cpu/core_dynrec/risc_ppc.h b/src/cpu/core_dynrec/risc_ppc.h new file mode 100644 index 0000000..d424ff5 --- /dev/null +++ b/src/cpu/core_dynrec/risc_ppc.h @@ -0,0 +1,901 @@ +/* + * Copyright (C) 2002-2019 The DOSBox Team + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +// some configuring defines that specify the capabilities of this architecture +// or aspects of the recompiling + +// protect FC_ADDR over function calls if necessaray +//#define DRC_PROTECT_ADDR_REG + +// try to use non-flags generating functions if possible +#define DRC_FLAGS_INVALIDATION +// try to replace _simple functions by code +#define DRC_FLAGS_INVALIDATION_DCODE + +// type with the same size as a pointer +#define DRC_PTR_SIZE_IM Bit32u + +// calling convention modifier +#define DRC_FC /* nothing */ +#define DRC_CALL_CONV /* nothing */ + +#define DRC_USE_REGS_ADDR +#define DRC_USE_SEGS_ADDR + +#if defined(_CALL_SYSV) +// disable if your toolchain doesn't provide a _SDA_BASE_ symbol (r13 constant value) +#define USE_SDA_BASE +#endif + +// register mapping +enum HostReg { + HOST_R0=0, + HOST_R1, + HOST_R2, + HOST_R3, + HOST_R4, + HOST_R5, + HOST_R6, + HOST_R7, + HOST_R8, + HOST_R9, + HOST_R10, + HOST_R11, + HOST_R12, + HOST_R13, + HOST_R14, + HOST_R15, + HOST_R16, + HOST_R17, + HOST_R18, + HOST_R19, + HOST_R20, + HOST_R21, + HOST_R22, + HOST_R23, + HOST_R24, + HOST_R25, + HOST_R26, // generic non-volatile (used for inline adc/sbb) + HOST_R27, // points to current CacheBlockDynRec (decode.block) + HOST_R28, // points to fpu + HOST_R29, // FC_ADDR + HOST_R30, // points to Segs + HOST_R31, // points to cpu_regs + + HOST_NONE +}; + +static const HostReg RegParams[] = { + HOST_R3, HOST_R4, HOST_R5, HOST_R6, + HOST_R7, HOST_R8, HOST_R9, HOST_R10 +}; + +#if C_FPU +#include "fpu.h" +extern struct FPU_rec fpu; +#endif + +#if defined(USE_SDA_BASE) +extern Bit32u _SDA_BASE_[]; +#endif + +// register that holds function return values +#define FC_RETOP HOST_R3 + +// register used for address calculations, if the ABI does not +// state that this register is preserved across function calls +// then define DRC_PROTECT_ADDR_REG above +#define FC_ADDR HOST_R29 + +// register that points to Segs[] +#define FC_SEGS_ADDR HOST_R30 +// register that points to cpu_regs[] +#define FC_REGS_ADDR HOST_R31 + +// register that holds the first parameter +#define FC_OP1 RegParams[0] + +// register that holds the second parameter +#define FC_OP2 RegParams[1] + +// special register that holds the third parameter for _R3 calls (byte accessible) +#define FC_OP3 RegParams[2] + +// register that holds byte-accessible temporary values +#define FC_TMP_BA1 FC_OP2 + +// register that holds byte-accessible temporary values +#define FC_TMP_BA2 FC_OP1 + +// temporary register for LEA +#define TEMP_REG_DRC HOST_R10 + +#define IMM(op, regsd, rega, imm) (((op)<<26)|((regsd)<<21)|((rega)<<16)| (((Bit32u)(imm))&0xFFFF)) +#define EXT(regsd, rega, regb, op, rc) ( (31<<26)|((regsd)<<21)|((rega)<<16)|((regb)<<11)| ((op)<<1)|(rc)) +#define RLW(op, regs, rega, sh, mb, me, rc) (((op)<<26)|((regs) <<21)|((rega)<<16)| ((sh)<<11)|((mb)<<6)|((me)<<1)|(rc)) + +#define IMM_OP(op, regsd, rega, imm) cache_addd(IMM(op, regsd, rega, imm)) +#define EXT_OP(regsd, rega, regb, op, rc) cache_addd(EXT(regsd, rega, regb, op, rc)) +#define RLW_OP(op, regs, rega, sh, mb, me, rc) cache_addd(RLW(op, regs, rega, sh, mb, me, rc)) + +// move a full register from reg_src to reg_dst +static void gen_mov_regs(HostReg reg_dst,HostReg reg_src) +{ + if (reg_dst != reg_src) + EXT_OP(reg_src,reg_dst,reg_src,444,0); // or dst,src,src (mr dst,src) +} + +// move a 16bit constant value into dest_reg +// the upper 16bit of the destination register may be destroyed +static void gen_mov_word_to_reg_imm(HostReg dest_reg,Bit16u imm) +{ + IMM_OP(14, dest_reg, 0, imm); // li dest,imm +} + +DRC_PTR_SIZE_IM block_ptr; + +// Helper for loading addresses +static HostReg INLINE gen_addr(Bit32s &addr, HostReg dest) +{ + Bit32s off; + + if ((Bit16s)addr == addr) + return HOST_R0; + + off = addr - (Bit32s)&Segs; + if ((Bit16s)off == off) + { + addr = off; + return FC_SEGS_ADDR; + } + + off = addr - (Bit32s)&cpu_regs; + if ((Bit16s)off == off) + { + addr = off; + return FC_REGS_ADDR; + } + + off = addr - (Bit32s)block_ptr; + if ((Bit16s)off == off) + { + addr = off; + return HOST_R27; + } + +#if C_FPU + off = addr - (Bit32s)&fpu; + if ((Bit16s)off == off) + { + addr = off; + return HOST_R28; + } +#endif + +#if defined(USE_SDA_BASE) + off = addr - (Bit32s)_SDA_BASE_; + if ((Bit16s)off == off) + { + addr = off; + return HOST_R13; + } +#endif + + IMM_OP(15, dest, 0, (addr+0x8000)>>16); // lis dest, addr@ha + addr = (Bit16s)addr; + return dest; +} + +// move a 32bit constant value into dest_reg +static void gen_mov_dword_to_reg_imm(HostReg dest_reg,Bit32u imm) +{ + HostReg ld = gen_addr((Bit32s&)imm, dest_reg); + if (imm || ld != dest_reg) + IMM_OP(14, dest_reg, ld, imm); // addi dest_reg, ldr, imm@l +} + +// move a 32bit (dword==true) or 16bit (dword==false) value from memory into dest_reg +// 16bit moves may destroy the upper 16bit of the destination register +static void gen_mov_word_to_reg(HostReg dest_reg,void* data,bool dword) +{ + Bit32s addr = (Bit32s)data; + HostReg ld = gen_addr(addr, dest_reg); + IMM_OP(dword ? 32:40, dest_reg, ld, addr); // lwz/lhz dest, addr@l(ld) +} + +// move a 32bit (dword==true) or 16bit (dword==false) value from host memory into dest_reg +static void gen_mov_LE_word_to_reg(HostReg dest_reg,void* data, bool dword) { + Bit32u addr = (Bit32u)data; + gen_mov_dword_to_reg_imm(dest_reg, addr); + EXT_OP(dest_reg, 0, dest_reg, dword ? 534 : 790, 0); // lwbrx/lhbrx dest, 0, dest +} + +// move an 8bit constant value into dest_reg +// the upper 24bit of the destination register can be destroyed +// this function does not use FC_OP1/FC_OP2 as dest_reg as these +// registers might not be directly byte-accessible on some architectures +static void gen_mov_byte_to_reg_low_imm(HostReg dest_reg,Bit8u imm) { + gen_mov_word_to_reg_imm(dest_reg, imm); +} + +// move an 8bit constant value into dest_reg +// the upper 24bit of the destination register can be destroyed +// this function can use FC_OP1/FC_OP2 as dest_reg which are +// not directly byte-accessible on some architectures +static void gen_mov_byte_to_reg_low_imm_canuseword(HostReg dest_reg,Bit8u imm) { + gen_mov_word_to_reg_imm(dest_reg, imm); +} + +// move 32bit (dword==true) or 16bit (dword==false) of a register into memory +static void gen_mov_word_from_reg(HostReg src_reg,void* dest,bool dword) +{ + Bit32s addr = (Bit32s)dest; + HostReg ld = gen_addr(addr, HOST_R8); + IMM_OP(dword ? 36 : 44, src_reg, ld, addr); // stw/sth src,addr@l(ld) +} + +// move an 8bit value from memory into dest_reg +// the upper 24bit of the destination register can be destroyed +// this function does not use FC_OP1/FC_OP2 as dest_reg as these +// registers might not be directly byte-accessible on some architectures +static void gen_mov_byte_to_reg_low(HostReg dest_reg,void* data) +{ + Bit32s addr = (Bit32s)data; + HostReg ld = gen_addr(addr, dest_reg); + IMM_OP(34, dest_reg, ld, addr); // lbz dest,addr@l(ld) +} + +// move an 8bit value from memory into dest_reg +// the upper 24bit of the destination register can be destroyed +// this function can use FC_OP1/FC_OP2 as dest_reg which are +// not directly byte-accessible on some architectures +static void gen_mov_byte_to_reg_low_canuseword(HostReg dest_reg,void* data) { + gen_mov_byte_to_reg_low(dest_reg, data); +} + +// move the lowest 8bit of a register into memory +static void gen_mov_byte_from_reg_low(HostReg src_reg,void* dest) +{ + Bit32s addr = (Bit32s)dest; + HostReg ld = gen_addr(addr, HOST_R8); + IMM_OP(38, src_reg, ld, addr); // stb src_reg,addr@l(ld) +} + +// convert an 8bit word to a 32bit dword +// the register is zero-extended (sign==false) or sign-extended (sign==true) +static void gen_extend_byte(bool sign,HostReg reg) +{ + if (sign) + EXT_OP(reg, reg, 0, 954, 0); // extsb reg, src + else + RLW_OP(21, reg, reg, 0, 24, 31, 0); // rlwinm reg, src, 0, 24, 31 +} + +// convert a 16bit word to a 32bit dword +// the register is zero-extended (sign==false) or sign-extended (sign==true) +static void gen_extend_word(bool sign,HostReg reg) +{ + if (sign) + EXT_OP(reg, reg, 0, 922, 0); // extsh reg, reg + else + RLW_OP(21, reg, reg, 0, 16, 31, 0); // rlwinm reg, reg, 0, 16, 31 +} + +// add a 32bit value from memory to a full register +static void gen_add(HostReg reg,void* op) +{ + gen_mov_word_to_reg(HOST_R8, op, true); // r8 = *(Bit32u*)op + EXT_OP(reg,reg,HOST_R8,266,0); // add reg,reg,r8 +} + +// add a 32bit value from host memory to a full register +static void gen_add_LE(HostReg reg,void* op) +{ + gen_mov_LE_word_to_reg(HOST_R8, op, true); // r8 = op[0]|(op[1]<<8)|(op[2]<<16)|(op[3]<<24); + EXT_OP(reg,reg,HOST_R8,266,0); // add reg,reg,r8 +} + +// add a 32bit constant value to a full register +static void gen_add_imm(HostReg reg,Bit32u imm) +{ + if ((Bit16s)imm != (Bit32s)imm) + IMM_OP(15, reg, reg, (imm+0x8000)>>16); // addis reg,reg,imm@ha + if ((Bit16s)imm) + IMM_OP(14, reg, reg, imm); // addi reg, reg, imm@l +} + +// and a 32bit constant value with a full register +static void gen_and_imm(HostReg reg,Bit32u imm) { + Bits sbit,ebit,tbit,bbit,abit,i; + + // sbit = number of leading 0 bits + // ebit = number of trailing 0 bits + // tbit = number of total 0 bits + // bbit = number of leading 1 bits + // abit = number of trailing 1 bits + + if (imm == 0xFFFFFFFF) + return; + + if (!imm) + return gen_mov_word_to_reg_imm(reg, 0); + + sbit = ebit = tbit = bbit = abit = 0; + for (i=0; i < 32; i++) + { + if (!(imm & (1<<(31-i)))) + { + abit = 0; + tbit++; + if (sbit == i) + sbit++; + ebit++; + } + else + { + ebit = 0; + if (bbit == i) + bbit++; + abit++; + } + } + + if (sbit + ebit == tbit) + { + RLW_OP(21,reg,reg,0,sbit,31-ebit,0); // rlwinm reg,reg,0,sbit,31-ebit + return; + } + + if (sbit >= 16) + { + IMM_OP(28,reg,reg,imm); // andi. reg,reg,imm + return; + } + if (ebit >= 16) + { + IMM_OP(29,reg,reg,imm>>16); // andis. reg,reg,(imm>>16) + return; + } + + if (bbit + abit == (32 - tbit)) + { + RLW_OP(21,reg,reg,0,32-abit,bbit-1,0); // rlwinm reg,reg,0,32-abit,bbit-1 + return; + } + + IMM_OP(28, reg, HOST_R0, imm); // andi. r0, reg, imm@l + IMM_OP(29, reg, reg, imm>16); // andis. reg, reg, imm@h + EXT_OP(reg, reg, HOST_R0, 444, 0); // or reg, reg, r0 +} + +// move a 32bit constant value into memory +static void gen_mov_direct_dword(void* dest,Bit32u imm) { + gen_mov_dword_to_reg_imm(HOST_R9, imm); + gen_mov_word_from_reg(HOST_R9, dest, 1); +} + +// move an address into memory (assumes address != NULL) +static void INLINE gen_mov_direct_ptr(void* dest,DRC_PTR_SIZE_IM imm) +{ + block_ptr = 0; + gen_mov_dword_to_reg_imm(HOST_R27, imm); + // this will be used to look-up the linked blocks + block_ptr = imm; + gen_mov_word_from_reg(HOST_R27, dest, 1); +} + +// add a 32bit (dword==true) or 16bit (dword==false) constant value to a 32bit memory value +static void gen_add_direct_word(void* dest,Bit32u imm,bool dword) +{ + HostReg ld; + Bit32s addr = (Bit32s)dest; + + if (!dword) + { + imm &= 0xFFFF; + addr += 2; + } + + if (!imm) + return; + + ld = gen_addr(addr, HOST_R8); + IMM_OP(dword ? 32 : 40, HOST_R9, ld, addr); // lwz/lhz r9, addr@l(ld) + if (dword && (Bit16s)imm != (Bit32s)imm) + IMM_OP(15, HOST_R9, HOST_R9, (imm+0x8000)>>16); // addis r9,r9,imm@ha + if (!dword || (Bit16s)imm) + IMM_OP(14, HOST_R9, HOST_R9, imm); // addi r9,r9,imm@l + IMM_OP(dword ? 36 : 44, HOST_R9, ld, addr); // stw/sth r9, addr@l(ld) +} + +// subtract a 32bit (dword==true) or 16bit (dword==false) constant value from a 32-bit memory value +static void gen_sub_direct_word(void* dest,Bit32u imm,bool dword) { + gen_add_direct_word(dest, -(Bit32s)imm, dword); +} + +// effective address calculation, destination is dest_reg +// scale_reg is scaled by scale (scale_reg*(2^scale)) and +// added to dest_reg, then the immediate value is added +static INLINE void gen_lea(HostReg dest_reg,HostReg scale_reg,Bitu scale,Bits imm) +{ + if (scale) + { + RLW_OP(21, scale_reg, HOST_R8, scale, 0, 31-scale, 0); // slwi scale_reg,r8,scale + scale_reg = HOST_R8; + } + + gen_add_imm(dest_reg, imm); + EXT_OP(dest_reg, dest_reg, scale_reg, 266, 0); // add dest,dest,scaled +} + +// effective address calculation, destination is dest_reg +// dest_reg is scaled by scale (dest_reg*(2^scale)), +// then the immediate value is added +static INLINE void gen_lea(HostReg dest_reg,Bitu scale,Bits imm) +{ + if (scale) + { + RLW_OP(21, dest_reg, dest_reg, scale, 0, 31-scale, 0); // slwi dest,dest,scale + } + + gen_add_imm(dest_reg, imm); +} + +// helper function to choose direct or indirect call +static int INLINE do_gen_call(void *func, Bit32u *pos, bool pad) +{ + Bit32s f = (Bit32s)func; + Bit32s off = f - (Bit32s)pos; + + // relative branches are limited to +/- ~32MB + if (off < 0x02000000 && off >= -0x02000000) + { + pos[0] = 0x48000001 | (off & 0x03FFFFFC); // bl func + if (pad) + { + pos[1] = 0x4800000C; // b 12+ + pos[2] = pos[3] = IMM(24, 0, 0, 0); // nop + return 16; + } + return 4; + } + + pos[0] = IMM(15, HOST_R8, 0, f>>16); // lis r8,imm@h + pos[1] = IMM(24, HOST_R8, HOST_R8, f); // ori r8,r8,imm@l + pos[2] = EXT(HOST_R8, 9, 0, 467, 0); // mtctr r8 + pos[3] = IMM(19, 0x14, 0, (528<<1)|1); // bctrl + return 16; +} + +// generate a call to a parameterless function +static void INLINE gen_call_function_raw(void * func,bool fastcall=true) +{ + cache.pos += do_gen_call(func, (Bit32u*)cache.pos, fastcall); +} + +// generate a call to a function with paramcount parameters +// note: the parameters are loaded in the architecture specific way +// using the gen_load_param_ functions below +static Bit32u INLINE gen_call_function_setup(void * func,Bitu paramcount,bool fastcall=false) +{ + Bit32u proc_addr=(Bit32u)cache.pos; + gen_call_function_raw(func,fastcall); + return proc_addr; +} + +// load an immediate value as param'th function parameter +static void INLINE gen_load_param_imm(Bitu imm,Bitu param) { + gen_mov_dword_to_reg_imm(RegParams[param], imm); +} + +// load an address as param'th function parameter +static void INLINE gen_load_param_addr(Bitu addr,Bitu param) { + gen_load_param_imm(addr, param); +} + +// load a host-register as param'th function parameter +static void INLINE gen_load_param_reg(Bitu reg,Bitu param) { + gen_mov_regs(RegParams[param], (HostReg)reg); +} + +// load a value from memory as param'th function parameter +static void INLINE gen_load_param_mem(Bitu mem,Bitu param) { + gen_mov_word_to_reg(RegParams[param], (void*)mem, true); +} + +// jump to an address pointed at by ptr, offset is in imm +static void gen_jmp_ptr(void * ptr,Bits imm=0) { + gen_mov_word_to_reg(HOST_R8,ptr,true); // r8 = *(Bit32u*)ptr + if ((Bit16s)imm != (Bit32s)imm) + IMM_OP(15, HOST_R8, HOST_R8, (imm + 0x8000)>>16); // addis r8, r8, imm@ha + IMM_OP(32, HOST_R8, HOST_R8, imm); // lwz r8, imm@l(r8) + EXT_OP(HOST_R8, 9, 0, 467, 0); // mtctr r8 + IMM_OP(19, 0x14, 0, 528<<1); // bctr +} + +// short conditional jump (+-127 bytes) if register is zero +// the destination is set by gen_fill_branch() later +static Bit32u gen_create_branch_on_zero(HostReg reg,bool dword) +{ + if (!dword) + IMM_OP(28,reg,HOST_R0,0xFFFF); // andi. r0,reg,0xFFFF + else + IMM_OP(11, 0, reg, 0); // cmpwi cr0, reg, 0 + + IMM_OP(16, 0x0C, 2, 0); // bc 12,CR0[Z] (beq) + return ((Bit32u)cache.pos-4); +} + +// short conditional jump (+-127 bytes) if register is nonzero +// the destination is set by gen_fill_branch() later +static Bit32u gen_create_branch_on_nonzero(HostReg reg,bool dword) +{ + if (!dword) + IMM_OP(28,reg,HOST_R0,0xFFFF); // andi. r0,reg,0xFFFF + else + IMM_OP(11, 0, reg, 0); // cmpwi cr0, reg, 0 + + IMM_OP(16, 0x04, 2, 0); // bc 4,CR0[Z] (bne) + return ((Bit32u)cache.pos-4); +} + +// calculate relative offset and fill it into the location pointed to by data +static void gen_fill_branch(DRC_PTR_SIZE_IM data) +{ +#if C_DEBUG + Bits len=(Bit32u)cache.pos-data; + if (len<0) len=-len; + if (len >= 0x8000) LOG_MSG("Big jump %d",len); +#endif + + ((Bit16u*)data)[1] =((Bit32u)cache.pos-data) & 0xFFFC; +} + + +// conditional jump if register is nonzero +// for isdword==true the 32bit of the register are tested +// for isdword==false the lowest 8bit of the register are tested +static Bit32u gen_create_branch_long_nonzero(HostReg reg,bool dword) +{ + if (!dword) + IMM_OP(28,reg,HOST_R0,0xFF); // andi. r0,reg,0xFF + else + IMM_OP(11, 0, reg, 0); // cmpwi cr0, reg, 0 + + IMM_OP(16, 0x04, 2, 0); // bne + return ((Bit32u)cache.pos-4); +} + +// compare 32bit-register against zero and jump if value less/equal than zero +static Bit32u gen_create_branch_long_leqzero(HostReg reg) +{ + IMM_OP(11, 0, reg, 0); // cmpwi cr0, reg, 0 + + IMM_OP(16, 0x04, 1, 0); // ble + return ((Bit32u)cache.pos-4); +} + +// calculate long relative offset and fill it into the location pointed to by data +static void gen_fill_branch_long(Bit32u data) { + return gen_fill_branch((DRC_PTR_SIZE_IM)data); +} + +static void cache_block_closing(Bit8u* block_start,Bitu block_size) +{ +#if defined(__GNUC__) + Bit8u* start = (Bit8u*)((Bit32u)block_start & -32); + + while (start < block_start + block_size) + { + asm volatile("dcbst %y0\n\t icbi %y0" :: "Z"(*start)); + start += 32; + } + asm volatile("sync\n\t isync"); +#else + #error "Don't know how to flush/invalidate CacheBlock with this compiler" +#endif +} + +static void cache_block_before_close(void) {} + +static void gen_function(void* func) +{ + Bit32s off = (Bit32s)func - (Bit32s)cache.pos; + + // relative branches are limited to +/- 32MB + if (off < 0x02000000 && off >= -0x02000000) { + cache_addd(0x48000000 | (off & 0x03FFFFFC)); // b func + return; + } + + gen_mov_dword_to_reg_imm(HOST_R8, (Bit32u)func); // r8 = func + EXT_OP(HOST_R8, 9, 0, 467, 0); // mtctr r8 + IMM_OP(19, 0x14, 0, 528<<1); // bctr +} + +// gen_run_code is assumed to be called exactly once, gen_return_function() jumps back to it +static void* epilog_addr; +static Bit8u *getCF_glue; +static void gen_run_code(void) +{ + // prolog + IMM_OP(37, HOST_R1, HOST_R1, -256); // stwu sp,-256(sp) + EXT_OP(FC_OP1, 9, 0, 467, 0); // mtctr FC_OP1 + EXT_OP(HOST_R0, 8, 0, 339, 0); // mflr r0 + + IMM_OP(47, HOST_R26, HOST_R1, 128); // stmw r26, 128(sp) + + IMM_OP(15, FC_SEGS_ADDR, 0, ((Bit32u)&Segs)>>16); // lis FC_SEGS_ADDR, Segs@h + IMM_OP(24, FC_SEGS_ADDR, FC_SEGS_ADDR, &Segs); // ori FC_SEGS_ADDR, FC_SEGS_ADDR, Segs@l + + IMM_OP(15, FC_REGS_ADDR, 0, ((Bit32u)&cpu_regs)>>16); // lis FC_REGS_ADDR, cpu_regs@h + IMM_OP(24, FC_REGS_ADDR, FC_REGS_ADDR, &cpu_regs); // ori FC_REGS_ADDR, FC_REGS_ADDR, cpu_regs@l + +#if C_FPU + IMM_OP(15, HOST_R28, 0, ((Bit32u)&fpu)>>16); // lis r28, fpu@h + IMM_OP(24, HOST_R28, HOST_R28, &fpu); // ori r28, r28, fpu@l +#endif + + IMM_OP(36, HOST_R0, HOST_R1, 256+4); // stw r0,256+4(sp) + IMM_OP(19, 0x14, 0, 528<<1); // bctr + + // epilog + epilog_addr = cache.pos; + IMM_OP(32, HOST_R0, HOST_R1, 256+4); // lwz r0,256+4(sp) + IMM_OP(46, HOST_R26, HOST_R1, 128); // lmw r26, 128(sp) + EXT_OP(HOST_R0, 8, 0, 467, 0); // mtlr r0 + IMM_OP(14, HOST_R1, HOST_R1, 256); // addi sp, sp, 256 + IMM_OP(19, 0x14, 0, 16<<1); // blr + + // trampoline to call get_CF() + getCF_glue = cache.pos; + gen_function((void*)get_CF); +} + +// return from a function +static void gen_return_function(void) +{ + gen_function(epilog_addr); +} + +// called when a call to a function can be replaced by a +// call to a simpler function +static void gen_fill_function_ptr(Bit8u * pos,void* fct_ptr,Bitu flags_type) +{ + Bit32u *op = (Bit32u*)pos; + Bit32u *end = op+4; + + switch (flags_type) { +#if defined(DRC_FLAGS_INVALIDATION_DCODE) + // try to avoid function calls but rather directly fill in code + case t_ADDb: + case t_ADDw: + case t_ADDd: + *op++ = EXT(FC_RETOP, FC_OP1, FC_OP2, 266, 0); // add FC_RETOP, FC_OP1, FC_OP2 + break; + case t_ORb: + case t_ORw: + case t_ORd: + *op++ = EXT(FC_OP1, FC_RETOP, FC_OP2, 444, 0); // or FC_RETOP, FC_OP1, FC_OP2 + break; + case t_ADCb: + case t_ADCw: + case t_ADCd: + op[0] = EXT(HOST_R26, FC_OP1, FC_OP2, 266, 0); // r26 = FC_OP1 + FC_OP2 + op[1] = 0x48000001 | ((getCF_glue-(pos+4)) & 0x03FFFFFC); // bl get_CF + op[2] = IMM(12, HOST_R0, FC_RETOP, -1); // addic r0, FC_RETOP, 0xFFFFFFFF (XER[CA] = !!CF) + op[3] = EXT(FC_RETOP, HOST_R26, 0, 202, 0); // addze; FC_RETOP = r26 + !!CF + return; + case t_SBBb: + case t_SBBw: + case t_SBBd: + op[0] = EXT(HOST_R26, FC_OP2, FC_OP1, 40, 0); // r26 = FC_OP1 - FC_OP2 + op[1] = 0x48000001 | ((getCF_glue-(pos+4)) & 0x03FFFFFC); // bl get_CF + op[2] = IMM(8, HOST_R0, FC_RETOP, 0); // subfic r0, FC_RETOP, 0 (XER[CA] = !CF) + op[3] = EXT(FC_RETOP, HOST_R26, 0, 234, 0); // addme; FC_RETOP = r26 - 1 + !CF + return; + case t_ANDb: + case t_ANDw: + case t_ANDd: + *op++ = EXT(FC_OP1, FC_RETOP, FC_OP2, 28, 0); // and FC_RETOP, FC_OP1, FC_OP2 + break; + case t_SUBb: + case t_SUBw: + case t_SUBd: + *op++ = EXT(FC_RETOP, FC_OP2, FC_OP1, 40, 0); // subf FC_RETOP, FC_OP2, FC_OP1 + break; + case t_XORb: + case t_XORw: + case t_XORd: + *op++ = EXT(FC_OP1, FC_RETOP, FC_OP2, 316, 0); // xor FC_RETOP, FC_OP1, FC_OP2 + break; + case t_CMPb: + case t_CMPw: + case t_CMPd: + case t_TESTb: + case t_TESTw: + case t_TESTd: + break; + case t_INCb: + case t_INCw: + case t_INCd: + *op++ = IMM(14, FC_RETOP, FC_OP1, 1); // addi FC_RETOP, FC_OP1, #1 + break; + case t_DECb: + case t_DECw: + case t_DECd: + *op++ = IMM(14, FC_RETOP, FC_OP1, -1); // addi FC_RETOP, FC_OP1, #-1 + break; + case t_NEGb: + case t_NEGw: + case t_NEGd: + *op++ = EXT(FC_RETOP, FC_OP1, 0, 104, 0); // neg FC_RETOP, FC_OP1 + break; + case t_SHLb: + case t_SHLw: + case t_SHLd: + *op++ = EXT(FC_OP1, FC_RETOP, FC_OP2, 24, 0); // slw FC_RETOP, FC_OP1, FC_OP2 + break; + case t_SHRb: + case t_SHRw: + case t_SHRd: + *op++ = EXT(FC_OP1, FC_RETOP, FC_OP2, 536, 0); // srw FC_RETOP, FC_OP1, FC_OP2 + break; + case t_SARb: + *op++ = EXT(FC_OP1, FC_RETOP, 0, 954, 0); // extsb FC_RETOP, FC_OP1 + case t_SARw: + if (flags_type == t_SARw) + *op++ = EXT(FC_OP1, FC_RETOP, 0, 922, 0); // extsh FC_RETOP, FC_OP1 + case t_SARd: + *op++ = EXT(FC_OP1, FC_RETOP, FC_OP2, 792, 0); // sraw FC_RETOP, FC_OP1, FC_OP2 + break; + + case t_ROLb: + *op++ = RLW(20, FC_OP1, FC_OP1, 24, 0, 7, 0); // rlwimi FC_OP1, FC_OP1, 24, 0, 7 + case t_ROLw: + if (flags_type == t_ROLw) + *op++ = RLW(20, FC_OP1, FC_OP1, 16, 0, 15, 0); // rlwimi FC_OP1, FC_OP1, 16, 0, 15 + case t_ROLd: + *op++ = RLW(23, FC_OP1, FC_RETOP, FC_OP2, 0, 31, 0); // rotlw FC_RETOP, FC_OP1, FC_OP2 + break; + + case t_RORb: + *op++ = RLW(20, FC_OP1, FC_OP1, 8, 16, 23, 0); // rlwimi FC_OP1, FC_OP1, 8, 16, 23 + case t_RORw: + if (flags_type == t_RORw) + *op++ = RLW(20, FC_OP1, FC_OP1, 16, 0, 15, 0); // rlwimi FC_OP1, FC_OP1, 16, 0, 15 + case t_RORd: + *op++ = IMM(8, FC_OP2, FC_OP2, 32); // subfic FC_OP2, FC_OP2, 32 (FC_OP2 = 32 - FC_OP2) + *op++ = RLW(23, FC_OP1, FC_RETOP, FC_OP2, 0, 31, 0); // rotlw FC_RETOP, FC_OP1, FC_OP2 + break; + + case t_DSHLw: // technically not correct for FC_OP3 > 16 + *op++ = RLW(20, FC_OP2, FC_RETOP, 16, 0, 15, 0); // rlwimi FC_RETOP, FC_OP2, 16, 0, 5 + *op++ = RLW(23, FC_RETOP, FC_RETOP, FC_OP3, 0, 31, 0); // rotlw FC_RETOP, FC_RETOP, FC_OP3 + break; + case t_DSHLd: + op[0] = EXT(FC_OP1, FC_RETOP, FC_OP3, 24, 0); // slw FC_RETOP, FC_OP1, FC_OP3 + op[1] = IMM(8, FC_OP3, FC_OP3, 32); // subfic FC_OP3, FC_OP3, 32 (FC_OP3 = 32 - FC_OP3) + op[2] = EXT(FC_OP2, FC_OP2, FC_OP3, 536, 0); // srw FC_OP2, FC_OP2, FC_OP3 + op[3] = EXT(FC_RETOP, FC_RETOP, FC_OP2, 444, 0); // or FC_RETOP, FC_RETOP, FC_OP2 + return; + case t_DSHRw: // technically not correct for FC_OP3 > 16 + *op++ = RLW(20, FC_OP2, FC_RETOP, 16, 0, 15, 0); // rlwimi FC_RETOP, FC_OP2, 16, 0, 5 + *op++ = EXT(FC_RETOP, FC_RETOP, FC_OP3, 536, 0); // srw FC_RETOP, FC_RETOP, FC_OP3 + break; + case t_DSHRd: + op[0] = EXT(FC_OP1, FC_RETOP, FC_OP3, 536, 0); // srw FC_RETOP, FC_OP1, FC_OP3 + op[1] = IMM(8, FC_OP3, FC_OP3, 32); // subfic FC_OP3, FC_OP3, 32 (FC_OP32 = 32 - FC_OP3) + op[2] = EXT(FC_OP2, FC_OP2, FC_OP3, 24, 0); // slw FC_OP2, FC_OP2, FC_OP3 + op[3] = EXT(FC_RETOP, FC_RETOP, FC_OP2, 444, 0); // or FC_RETOP, FC_RETOP, FC_OP2 + return; +#endif + default: + do_gen_call(fct_ptr, op, true); + return; + } + + *op = 0x48000000 + 4*(end-op); // b end +} + +// mov 16bit value from Segs[index] into dest_reg using FC_SEGS_ADDR (index modulo 2 must be zero) +// 16bit moves may destroy the upper 16bit of the destination register +static void gen_mov_seg16_to_reg(HostReg dest_reg,Bitu index) { + gen_mov_word_to_reg(dest_reg, (Bit8u*)&Segs + index, false); +} + +// mov 32bit value from Segs[index] into dest_reg using FC_SEGS_ADDR (index modulo 4 must be zero) +static void gen_mov_seg32_to_reg(HostReg dest_reg,Bitu index) { + gen_mov_word_to_reg(dest_reg, (Bit8u*)&Segs + index, true); +} + +// add a 32bit value from Segs[index] to a full register using FC_SEGS_ADDR (index modulo 4 must be zero) +static void gen_add_seg32_to_reg(HostReg reg,Bitu index) { + gen_add(reg, (Bit8u*)&Segs + index); +} + +// mov 16bit value from cpu_regs[index] into dest_reg using FC_REGS_ADDR (index modulo 2 must be zero) +// 16bit moves may destroy the upper 16bit of the destination register +static void gen_mov_regval16_to_reg(HostReg dest_reg,Bitu index) +{ + gen_mov_word_to_reg(dest_reg, (Bit8u*)&cpu_regs + index, false); +} + +// mov 32bit value from cpu_regs[index] into dest_reg using FC_REGS_ADDR (index modulo 4 must be zero) +static void gen_mov_regval32_to_reg(HostReg dest_reg,Bitu index) +{ + gen_mov_word_to_reg(dest_reg, (Bit8u*)&cpu_regs + index, true); +} + +// move an 8bit value from cpu_regs[index] into dest_reg using FC_REGS_ADDR +// the upper 24bit of the destination register can be destroyed +// this function does not use FC_OP1/FC_OP2 as dest_reg as these +// registers might not be directly byte-accessible on some architectures +static void gen_mov_regbyte_to_reg_low(HostReg dest_reg,Bitu index) +{ + gen_mov_byte_to_reg_low(dest_reg, (Bit8u*)&cpu_regs + index); +} + +// move an 8bit value from cpu_regs[index] into dest_reg using FC_REGS_ADDR +// the upper 24bit of the destination register can be destroyed +// this function can use FC_OP1/FC_OP2 as dest_reg which are +// not directly byte-accessible on some architectures +static void INLINE gen_mov_regbyte_to_reg_low_canuseword(HostReg dest_reg,Bitu index) { + gen_mov_byte_to_reg_low_canuseword(dest_reg, (Bit8u*)&cpu_regs + index); +} + +// move 16bit of register into cpu_regs[index] using FC_REGS_ADDR (index modulo 2 must be zero) +static void gen_mov_regval16_from_reg(HostReg src_reg,Bitu index) +{ + gen_mov_word_from_reg(src_reg, (Bit8u*)&cpu_regs + index, false); +} + +// move 32bit of register into cpu_regs[index] using FC_REGS_ADDR (index modulo 4 must be zero) +static void gen_mov_regval32_from_reg(HostReg src_reg,Bitu index) +{ + gen_mov_word_from_reg(src_reg, (Bit8u*)&cpu_regs + index, true); +} + +// move the lowest 8bit of a register into cpu_regs[index] using FC_REGS_ADDR +static void gen_mov_regbyte_from_reg_low(HostReg src_reg,Bitu index) +{ + gen_mov_byte_from_reg_low(src_reg, (Bit8u*)&cpu_regs + index); +} + +// add a 32bit value from cpu_regs[index] to a full register using FC_REGS_ADDR (index modulo 4 must be zero) +static void gen_add_regval32_to_reg(HostReg reg,Bitu index) +{ + gen_add(reg, (Bit8u*)&cpu_regs + index); +} + +// move 32bit (dword==true) or 16bit (dword==false) of a register into cpu_regs[index] using FC_REGS_ADDR (if dword==true index modulo 4 must be zero) (if dword==false index modulo 2 must be zero) +static void gen_mov_regword_from_reg(HostReg src_reg,Bitu index,bool dword) { + if (dword) + gen_mov_regval32_from_reg(src_reg, index); + else + gen_mov_regval16_from_reg(src_reg, index); +} + +// move a 32bit (dword==true) or 16bit (dword==false) value from cpu_regs[index] into dest_reg using FC_REGS_ADDR (if dword==true index modulo 4 must be zero) (if dword==false index modulo 2 must be zero) +// 16bit moves may destroy the upper 16bit of the destination register +static void gen_mov_regword_to_reg(HostReg dest_reg,Bitu index,bool dword) { + if (dword) + gen_mov_regval32_to_reg(dest_reg, index); + else + gen_mov_regval16_to_reg(dest_reg, index); +} diff --git a/src/dos/drive_fat.cpp b/src/dos/drive_fat.cpp index e75b47c..d651703 100644 --- a/src/dos/drive_fat.cpp +++ b/src/dos/drive_fat.cpp @@ -223,20 +223,12 @@ bool fatFile::Write(Bit8u * data, Bit16u *size) { if(loadedSector) myDrive->writeSector(currentSector, sectorBuffer); currentSector = myDrive->getAbsoluteSectFromBytePos(firstCluster, seekpos); - if(currentSector == 0) { - /* EOC reached before EOF - try to increase file allocation */ - myDrive->appendCluster(firstCluster); - /* Try getting sector again */ - currentSector = myDrive->getAbsoluteSectFromBytePos(firstCluster, seekpos); - if(currentSector == 0) { - /* No can do. lets give up and go home. We must be out of room */ - loadedSector = false; - goto finalizeWrite; - } + if(currentSector == 0) loadedSector = false; + else { + curSectOff = 0; + myDrive->readSector(currentSector, sectorBuffer); + loadedSector = true; } - curSectOff = 0; - myDrive->readSector(currentSector, sectorBuffer); - loadedSector = true; } --sizedec; } @@ -333,7 +325,7 @@ Bit32u fatDrive::getClusterValue(Bit32u clustNum) { switch(fattype) { case FAT12: - clustValue = *((Bit16u *)&fatSectBuffer[fatentoff]); + clustValue = var_read((Bit16u *)&fatSectBuffer[fatentoff]); if(clustNum & 0x1) { clustValue >>= 4; } else { @@ -341,10 +333,10 @@ Bit32u fatDrive::getClusterValue(Bit32u clustNum) { } break; case FAT16: - clustValue = *((Bit16u *)&fatSectBuffer[fatentoff]); + clustValue = var_read((Bit16u *)&fatSectBuffer[fatentoff]); break; case FAT32: - clustValue = *((Bit32u *)&fatSectBuffer[fatentoff]); + clustValue = var_read((Bit32u *)&fatSectBuffer[fatentoff]); break; } @@ -380,7 +372,7 @@ void fatDrive::setClusterValue(Bit32u clustNum, Bit32u clustValue) { switch(fattype) { case FAT12: { - Bit16u tmpValue = *((Bit16u *)&fatSectBuffer[fatentoff]); + Bit16u tmpValue = var_read((Bit16u *)&fatSectBuffer[fatentoff]); if(clustNum & 0x1) { clustValue &= 0xfff; clustValue <<= 4; @@ -392,14 +384,14 @@ void fatDrive::setClusterValue(Bit32u clustNum, Bit32u clustValue) { tmpValue &= 0xf000; tmpValue |= (Bit16u)clustValue; } - *((Bit16u *)&fatSectBuffer[fatentoff]) = tmpValue; + var_write((Bit16u *)&fatSectBuffer[fatentoff], tmpValue); break; } case FAT16: - *((Bit16u *)&fatSectBuffer[fatentoff]) = (Bit16u)clustValue; + var_write((Bit16u *)&fatSectBuffer[fatentoff], (Bit16u)clustValue); break; case FAT32: - *((Bit32u *)&fatSectBuffer[fatentoff]) = clustValue; + var_write((Bit32u *)&fatSectBuffer[fatentoff], clustValue); break; } for(int fc=0;fcRead_AbsoluteSector(0+partSectOff,&bootbuffer); + bootbuffer.bytespersector = var_read(&bootbuffer.bytespersector); + bootbuffer.reservedsectors = var_read(&bootbuffer.reservedsectors); + bootbuffer.rootdirentries = var_read(&bootbuffer.rootdirentries); + bootbuffer.totalsectorcount = var_read(&bootbuffer.totalsectorcount); + bootbuffer.sectorsperfat = var_read(&bootbuffer.sectorsperfat); + bootbuffer.sectorspertrack = var_read(&bootbuffer.sectorspertrack); + bootbuffer.headcount = var_read(&bootbuffer.headcount); + bootbuffer.hiddensectorcount = var_read(&bootbuffer.hiddensectorcount); + bootbuffer.totalsecdword = var_read(&bootbuffer.totalsecdword); + if (!is_hdd) { /* Identify floppy format */ if ((bootbuffer.nearjmp[0] == 0x69 || bootbuffer.nearjmp[0] == 0xe9 || @@ -1039,6 +1043,18 @@ char* trimString(char* str) { return removeTrailingSpaces(removeLeadingSpaces(str)); } +static void copyDirEntry(const direntry *src, direntry *dst) { + memcpy(dst, src, 14); // single byte fields + var_write(&dst->crtTime, src->crtTime); + var_write(&dst->crtDate, src->crtDate); + var_write(&dst->accessDate, src->accessDate); + var_write(&dst->hiFirstClust, src->hiFirstClust); + var_write(&dst->modTime, src->modTime); + var_write(&dst->modDate, src->modDate); + var_write(&dst->loFirstClust, src->loFirstClust); + var_write(&dst->entrysize, src->entrysize); +} + bool fatDrive::FindNextInternal(Bit32u dirClustNumber, DOS_DTA &dta, direntry *foundEntry) { direntry sectbuf[16]; /* 16 directory entries per sector */ Bit32u logentsector; /* Logical entry sector */ @@ -1110,11 +1126,11 @@ nextfile: /* Compare name to search pattern */ if(!WildFileCmp(find_name,srch_pattern)) goto nextfile; - //dta.SetResult(find_name, sectbuf[entryoffset].entrysize, sectbuf[entryoffset].crtDate, sectbuf[entryoffset].crtTime, sectbuf[entryoffset].attrib); + copyDirEntry(§buf[entryoffset], foundEntry); - dta.SetResult(find_name, sectbuf[entryoffset].entrysize, sectbuf[entryoffset].modDate, sectbuf[entryoffset].modTime, sectbuf[entryoffset].attrib); + //dta.SetResult(find_name, foundEntry->entrysize, foundEntry->crtDate, foundEntry->crtTime, foundEntry->attrib); - memcpy(foundEntry, §buf[entryoffset], sizeof(direntry)); + dta.SetResult(find_name, foundEntry->entrysize, foundEntry->modDate, foundEntry->modTime, foundEntry->attrib); return true; } @@ -1189,7 +1205,7 @@ bool fatDrive::directoryBrowse(Bit32u dirClustNumber, direntry *useEntry, Bit32s --entNum; } - memcpy(useEntry, §buf[entryoffset],sizeof(direntry)); + copyDirEntry(§buf[entryoffset], useEntry); return true; } @@ -1223,9 +1239,9 @@ bool fatDrive::directoryChange(Bit32u dirClustNumber, direntry *useEntry, Bit32s --entNum; } if(tmpsector != 0) { - memcpy(§buf[entryoffset], useEntry, sizeof(direntry)); + copyDirEntry(useEntry, §buf[entryoffset]); writeSector(tmpsector, sectbuf); - return true; + return true; } else { return false; } @@ -1264,7 +1280,7 @@ bool fatDrive::addDirectoryEntry(Bit32u dirClustNumber, direntry useEntry) { /* Deleted file entry or end of directory list */ if ((sectbuf[entryoffset].entryname[0] == 0xe5) || (sectbuf[entryoffset].entryname[0] == 0x00)) { - sectbuf[entryoffset] = useEntry; + copyDirEntry(&useEntry, §buf[entryoffset]); writeSector(tmpsector,sectbuf); break; } diff --git a/src/platform/wii/config.h b/src/platform/wii/config.h index e221b39..73f9b68 100644 --- a/src/platform/wii/config.h +++ b/src/platform/wii/config.h @@ -28,7 +28,7 @@ #define C_DYNAMIC_X86 0 /* Define to 1 to use recompiling cpu core. Can not be used together with the dynamic-x86 core */ -#define C_DYNREC 0 +#define C_DYNREC 1 /* Enable memory function inlining in */ #define C_CORE_INLINE 1 @@ -54,6 +54,8 @@ /* Define to 1 if you want serial passthrough support (Win32 only). */ #define C_DIRECTSERIAL 0 +#define WORDS_BIGENDIAN 1 + #define GCC_ATTRIBUTE(x) __attribute__ ((x)) #define GCC_UNLIKELY(x) __builtin_expect((x), 0) #define GCC_LIKELY(x) __builtin_expect((x), 1)