/* * Copyright (C) 2002-2019 The DOSBox Team * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License along * with this program; if not, write to the Free Software Foundation, Inc., * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ /* ARMv4/ARMv7 (little endian) backend by M-HT (arm version) */ // temporary registers #define temp1 HOST_ip #define temp2 HOST_v3 #define temp3 HOST_v4 // register that holds function return values #define FC_RETOP HOST_a1 // register used for address calculations, #define FC_ADDR HOST_v1 // has to be saved across calls, see DRC_PROTECT_ADDR_REG // register that holds the first parameter #define FC_OP1 HOST_a1 // register that holds the second parameter #define FC_OP2 HOST_a2 // special register that holds the third parameter for _R3 calls (byte accessible) #define FC_OP3 HOST_v2 // register that holds byte-accessible temporary values #define FC_TMP_BA1 HOST_a1 // register that holds byte-accessible temporary values #define FC_TMP_BA2 HOST_a2 // temporary register for LEA #define TEMP_REG_DRC HOST_v2 // used to hold the address of "cpu_regs" - preferably filled in function gen_run_code #define FC_REGS_ADDR HOST_v7 // used to hold the address of "Segs" - preferably filled in function gen_run_code #define FC_SEGS_ADDR HOST_v8 // used to hold the address of "core_dynrec.readdata" - filled in function gen_run_code #define readdata_addr HOST_v5 // helper macro #define ROTATE_SCALE(x) ( (x)?(32 - x):(0) ) // instruction encodings // move // mov dst, #(imm ror rimm) @ 0 <= imm <= 255 & rimm mod 2 = 0 #define MOV_IMM(dst, imm, rimm) (0xe3a00000 + ((dst) << 12) + (imm) + ((rimm) << 7) ) // mov dst, src, lsl #imm #define MOV_REG_LSL_IMM(dst, src, imm) (0xe1a00000 + ((dst) << 12) + (src) + ((imm) << 7) ) // movs dst, src, lsl #imm #define MOVS_REG_LSL_IMM(dst, src, imm) (0xe1b00000 + ((dst) << 12) + (src) + ((imm) << 7) ) // mov dst, src, lsr #imm #define MOV_REG_LSR_IMM(dst, src, imm) (0xe1a00020 + ((dst) << 12) + (src) + ((imm) << 7) ) // mov dst, src, asr #imm #define MOV_REG_ASR_IMM(dst, src, imm) (0xe1a00040 + ((dst) << 12) + (src) + ((imm) << 7) ) // mov dst, src, lsl rreg #define MOV_REG_LSL_REG(dst, src, rreg) (0xe1a00010 + ((dst) << 12) + (src) + ((rreg) << 8) ) // mov dst, src, lsr rreg #define MOV_REG_LSR_REG(dst, src, rreg) (0xe1a00030 + ((dst) << 12) + (src) + ((rreg) << 8) ) // mov dst, src, asr rreg #define MOV_REG_ASR_REG(dst, src, rreg) (0xe1a00050 + ((dst) << 12) + (src) + ((rreg) << 8) ) // mov dst, src, ror rreg #define MOV_REG_ROR_REG(dst, src, rreg) (0xe1a00070 + ((dst) << 12) + (src) + ((rreg) << 8) ) // mvn dst, #(imm ror rimm) @ 0 <= imm <= 255 & rimm mod 2 = 0 #define MVN_IMM(dst, imm, rimm) (0xe3e00000 + ((dst) << 12) + (imm) + ((rimm) << 7) ) #if C_TARGETCPU == ARMV7LE // movw dst, #imm @ 0 <= imm <= 65535 #define MOVW(dst, imm) (0xe3000000 + ((dst) << 12) + (((imm) & 0xf000) << 4) + ((imm) & 0x0fff) ) // movt dst, #imm @ 0 <= imm <= 65535 #define MOVT(dst, imm) (0xe3400000 + ((dst) << 12) + (((imm) & 0xf000) << 4) + ((imm) & 0x0fff) ) #endif // arithmetic // add dst, src, #(imm ror rimm) @ 0 <= imm <= 255 & rimm mod 2 = 0 #define ADD_IMM(dst, src, imm, rimm) (0xe2800000 + ((dst) << 12) + ((src) << 16) + (imm) + ((rimm) << 7) ) // add dst, src1, src2, lsl #imm #define ADD_REG_LSL_IMM(dst, src1, src2, imm) (0xe0800000 + ((dst) << 12) + ((src1) << 16) + (src2) + ((imm) << 7) ) // sub dst, src, #(imm ror rimm) @ 0 <= imm <= 255 & rimm mod 2 = 0 #define SUB_IMM(dst, src, imm, rimm) (0xe2400000 + ((dst) << 12) + ((src) << 16) + (imm) + ((rimm) << 7) ) // sub dst, src1, src2, lsl #imm #define SUB_REG_LSL_IMM(dst, src1, src2, imm) (0xe0400000 + ((dst) << 12) + ((src1) << 16) + (src2) + ((imm) << 7) ) // rsb dst, src, #(imm ror rimm) @ 0 <= imm <= 255 & rimm mod 2 = 0 #define RSB_IMM(dst, src, imm, rimm) (0xe2600000 + ((dst) << 12) + ((src) << 16) + (imm) + ((rimm) << 7) ) // cmp src, #(imm ror rimm) @ 0 <= imm <= 255 & rimm mod 2 = 0 #define CMP_IMM(src, imm, rimm) (0xe3500000 + ((src) << 16) + (imm) + ((rimm) << 7) ) // nop #if C_TARGETCPU == ARMV7LE #define NOP (0xe320f000) #else #define NOP MOV_REG_LSL_IMM(HOST_r0, HOST_r0, 0) #endif // logical // tst src, #(imm ror rimm) @ 0 <= imm <= 255 & rimm mod 2 = 0 #define TST_IMM(src, imm, rimm) (0xe3100000 + ((src) << 16) + (imm) + ((rimm) << 7) ) // and dst, src, #(imm ror rimm) @ 0 <= imm <= 255 & rimm mod 2 = 0 #define AND_IMM(dst, src, imm, rimm) (0xe2000000 + ((dst) << 12) + ((src) << 16) + (imm) + ((rimm) << 7) ) // and dst, src1, src2, lsl #imm #define AND_REG_LSL_IMM(dst, src1, src2, imm) (0xe0000000 + ((dst) << 12) + ((src1) << 16) + (src2) + ((imm) << 7) ) // orr dst, src, #(imm ror rimm) @ 0 <= imm <= 255 & rimm mod 2 = 0 #define ORR_IMM(dst, src, imm, rimm) (0xe3800000 + ((dst) << 12) + ((src) << 16) + (imm) + ((rimm) << 7) ) // orr dst, src1, src2, lsl #imm #define ORR_REG_LSL_IMM(dst, src1, src2, imm) (0xe1800000 + ((dst) << 12) + ((src1) << 16) + (src2) + ((imm) << 7) ) // orr dst, src1, src2, lsr #imm #define ORR_REG_LSR_IMM(dst, src1, src2, imm) (0xe1800020 + ((dst) << 12) + ((src1) << 16) + (src2) + ((imm) << 7) ) // eor dst, src1, src2, lsl #imm #define EOR_REG_LSL_IMM(dst, src1, src2, imm) (0xe0200000 + ((dst) << 12) + ((src1) << 16) + (src2) + ((imm) << 7) ) // bic dst, src, #(imm ror rimm) @ 0 <= imm <= 255 & rimm mod 2 = 0 #define BIC_IMM(dst, src, imm, rimm) (0xe3c00000 + ((dst) << 12) + ((src) << 16) + (imm) + ((rimm) << 7) ) // bic dst, src1, src2, lsl #imm @ 0 <= imm <= 31 #define BIC_REG_LSL_IMM(dst, src1, src2, imm) (0xe1c00000 + ((dst) << 12) + ((src1) << 16) + (src2) + ((imm) << 7) ) // load // ldr reg, [addr, #imm] @ 0 <= imm < 4096 #define LDR_IMM(reg, addr, imm) (0xe5900000 + ((reg) << 12) + ((addr) << 16) + (imm) ) // ldr reg, [addr, #-(imm)] @ 0 <= imm < 4096 #define LDR_IMM_M(reg, addr, imm) (0xe5100000 + ((reg) << 12) + ((addr) << 16) + (imm) ) // ldrh reg, [addr, #imm] @ 0 <= imm < 256 #define LDRH_IMM(reg, addr, imm) (0xe1d000b0 + ((reg) << 12) + ((addr) << 16) + (((imm) & 0xf0) << 4) + ((imm) & 0x0f) ) // ldrh reg, [addr, #-(imm)] @ 0 <= imm < 256 #define LDRH_IMM_M(reg, addr, imm) (0xe15000b0 + ((reg) << 12) + ((addr) << 16) + (((imm) & 0xf0) << 4) + ((imm) & 0x0f) ) // ldrb reg, [addr, #imm] @ 0 <= imm < 4096 #define LDRB_IMM(reg, addr, imm) (0xe5d00000 + ((reg) << 12) + ((addr) << 16) + (imm) ) // ldrb reg, [addr, #-(imm)] @ 0 <= imm < 4096 #define LDRB_IMM_M(reg, addr, imm) (0xe5500000 + ((reg) << 12) + ((addr) << 16) + (imm) ) // ldr reg, [addr1, addr2, lsl #imm] @ 0 <= imm < 31 #define LDR_REG_LSL_IMM(reg, addr1, addr2, imm) (0xe7900000 + ((reg) << 12) + ((addr1) << 16) + (addr2) + ((imm) << 7) ) // store // str reg, [addr, #imm] @ 0 <= imm < 4096 #define STR_IMM(reg, addr, imm) (0xe5800000 + ((reg) << 12) + ((addr) << 16) + (imm) ) // str reg, [addr, #-(imm)] @ 0 <= imm < 4096 #define STR_IMM_M(reg, addr, imm) (0xe5000000 + ((reg) << 12) + ((addr) << 16) + (imm) ) // strh reg, [addr, #imm] @ 0 <= imm < 256 #define STRH_IMM(reg, addr, imm) (0xe1c000b0 + ((reg) << 12) + ((addr) << 16) + (((imm) & 0xf0) << 4) + ((imm) & 0x0f) ) // strh reg, [addr, #-(imm)] @ 0 <= imm < 256 #define STRH_IMM_M(reg, addr, imm) (0xe14000b0 + ((reg) << 12) + ((addr) << 16) + (((imm) & 0xf0) << 4) + ((imm) & 0x0f) ) // strb reg, [addr, #imm] @ 0 <= imm < 4096 #define STRB_IMM(reg, addr, imm) (0xe5c00000 + ((reg) << 12) + ((addr) << 16) + (imm) ) // strb reg, [addr, #-(imm)] @ 0 <= imm < 4096 #define STRB_IMM_M(reg, addr, imm) (0xe5400000 + ((reg) << 12) + ((addr) << 16) + (imm) ) // branch // beq pc+imm @ 0 <= imm < 32M & imm mod 4 = 0 #define BEQ_FWD(imm) (0x0a000000 + ((imm) >> 2) ) // bne pc+imm @ 0 <= imm < 32M & imm mod 4 = 0 #define BNE_FWD(imm) (0x1a000000 + ((imm) >> 2) ) // ble pc+imm @ 0 <= imm < 32M & imm mod 4 = 0 #define BLE_FWD(imm) (0xda000000 + ((imm) >> 2) ) // b pc+imm @ 0 <= imm < 32M & imm mod 4 = 0 #define B_FWD(imm) (0xea000000 + ((imm) >> 2) ) // bx reg #define BX(reg) (0xe12fff10 + (reg) ) #if C_TARGETCPU == ARMV7LE // blx reg #define BLX_REG(reg) (0xe12fff30 + (reg) ) // extend // sxth dst, src, ror #rimm @ rimm = 0 | 8 | 16 | 24 #define SXTH(dst, src, rimm) (0xe6bf0070 + ((dst) << 12) + (src) + (((rimm) & 24) << 7) ) // sxtb dst, src, ror #rimm @ rimm = 0 | 8 | 16 | 24 #define SXTB(dst, src, rimm) (0xe6af0070 + ((dst) << 12) + (src) + (((rimm) & 24) << 7) ) // uxth dst, src, ror #rimm @ rimm = 0 | 8 | 16 | 24 #define UXTH(dst, src, rimm) (0xe6ff0070 + ((dst) << 12) + (src) + (((rimm) & 24) << 7) ) // uxtb dst, src, ror #rimm @ rimm = 0 | 8 | 16 | 24 #define UXTB(dst, src, rimm) (0xe6ef0070 + ((dst) << 12) + (src) + (((rimm) & 24) << 7) ) // bit field // bfi dst, src, #lsb, #width @ lsb >= 0, width >= 1, lsb+width <= 32 #define BFI(dst, src, lsb, width) (0xe7c00010 + ((dst) << 12) + (src) + ((lsb) << 7) + (((lsb) + (width) - 1) << 16) ) // bfc dst, #lsb, #width @ lsb >= 0, width >= 1, lsb+width <= 32 #define BFC(dst, lsb, width) (0xe7c0001f + ((dst) << 12) + ((lsb) << 7) + (((lsb) + (width) - 1) << 16) ) #endif // move a full register from reg_src to reg_dst static void gen_mov_regs(HostReg reg_dst,HostReg reg_src) { if(reg_src == reg_dst) return; cache_addd( MOV_REG_LSL_IMM(reg_dst, reg_src, 0) ); // mov reg_dst, reg_src } // helper function static bool val_is_operand2(Bit32u value, Bit32u *val_shift) { Bit32u shift; if (GCC_UNLIKELY(value == 0)) { *val_shift = 0; return true; } shift = 0; while ((value & 3) == 0) { value>>=2; shift+=2; } if ((value >> 8) != 0) return false; *val_shift = shift; return true; } #if C_TARGETCPU != ARMV7LE // helper function static Bits get_imm_gen_len(Bit32u imm) { Bits ret; if (imm == 0) { return 1; } else { ret = 0; while (imm) { while ((imm & 3) == 0) { imm>>=2; } ret++; imm>>=8; } return ret; } } // helper function static Bits get_min_imm_gen_len(Bit32u imm) { Bits num1, num2; num1 = get_imm_gen_len(imm); num2 = get_imm_gen_len(~imm); return (num1 <= num2)?num1:num2; } #endif // move a 32bit constant value into dest_reg static void gen_mov_dword_to_reg_imm(HostReg dest_reg,Bit32u imm) { #if C_TARGETCPU == ARMV7LE Bit32u scale; if ( val_is_operand2(imm, &scale) ) { cache_addd( MOV_IMM(dest_reg, imm >> scale, ROTATE_SCALE(scale)) ); // mov dest_reg, #imm } else if ( val_is_operand2(~imm, &scale) ) { cache_addd( MVN_IMM(dest_reg, (~imm) >> scale, ROTATE_SCALE(scale)) ); // mvn dest_reg, #~imm } else { cache_addd( MOVW(dest_reg, imm & 0xffff) ); // movw dest_reg, #(imm & 0xffff) if (imm >= 0x10000) { cache_addd( MOVT(dest_reg, imm >> 16) ); // movt dest_reg, #(imm >> 16) } } #else Bit32u imm2, first, scale; scale = 0; first = 1; imm2 = ~imm; if (get_imm_gen_len(imm) <= get_imm_gen_len(imm2)) { if (imm == 0) { cache_addd( MOV_IMM(dest_reg, 0, 0) ); // mov dest_reg, #0 } else { while (imm) { while ((imm & 3) == 0) { imm>>=2; scale+=2; } if (first) { cache_addd( MOV_IMM(dest_reg, imm & 0xff, ROTATE_SCALE(scale)) ); // mov dest_reg, #((imm & 0xff) << scale) first = 0; } else { cache_addd( ORR_IMM(dest_reg, dest_reg, imm & 0xff, ROTATE_SCALE(scale)) ); // orr dest_reg, dest_reg, #((imm & 0xff) << scale) } imm>>=8; scale+=8; } } } else { if (imm2 == 0) { cache_addd( MVN_IMM(dest_reg, 0, 0) ); // mvn dest_reg, #0 } else { while (imm2) { while ((imm2 & 3) == 0) { imm2>>=2; scale+=2; } if (first) { cache_addd( MVN_IMM(dest_reg, imm2 & 0xff, ROTATE_SCALE(scale)) ); // mvn dest_reg, #((imm2 & 0xff) << scale) first = 0; } else { cache_addd( BIC_IMM(dest_reg, dest_reg, imm2 & 0xff, ROTATE_SCALE(scale)) ); // bic dest_reg, dest_reg, #((imm2 & 0xff) << scale) } imm2>>=8; scale+=8; } } } #endif } // helper function static bool gen_mov_memval_to_reg_helper(HostReg dest_reg, Bit32u data, Bitu size, HostReg addr_reg, Bit32u addr_data) { switch (size) { case 4: #if !(defined(C_UNALIGNED_MEMORY) || (C_TARGETCPU == ARMV7LE)) if ((data & 3) == 0) #endif { if ((data >= addr_data) && (data < addr_data + 4096)) { cache_addd( LDR_IMM(dest_reg, addr_reg, data - addr_data) ); // ldr dest_reg, [addr_reg, #(data - addr_data)] return true; } else if ((data < addr_data) && (data > addr_data - 4096)) { cache_addd( LDR_IMM_M(dest_reg, addr_reg, addr_data - data) ); // ldr dest_reg, [addr_reg, #-(addr_data - data)] return true; } } break; case 2: #if !(defined(C_UNALIGNED_MEMORY) || (C_TARGETCPU == ARMV7LE)) if ((data & 1) == 0) #endif { if ((data >= addr_data) && (data < addr_data + 256)) { cache_addd( LDRH_IMM(dest_reg, addr_reg, data - addr_data) ); // ldrh dest_reg, [addr_reg, #(data - addr_data)] return true; } else if ((data < addr_data) && (data > addr_data - 256)) { cache_addd( LDRH_IMM_M(dest_reg, addr_reg, addr_data - data) ); // ldrh dest_reg, [addr_reg, #-(addr_data - data)] return true; } } break; case 1: if ((data >= addr_data) && (data < addr_data + 4096)) { cache_addd( LDRB_IMM(dest_reg, addr_reg, data - addr_data) ); // ldrb dest_reg, [addr_reg, #(data - addr_data)] return true; } else if ((data < addr_data) && (data > addr_data - 4096)) { cache_addd( LDRB_IMM_M(dest_reg, addr_reg, addr_data - data) ); // ldrb dest_reg, [addr_reg, #-(addr_data - data)] return true; } default: break; } return false; } // helper function static bool gen_mov_memval_to_reg(HostReg dest_reg, void *data, Bitu size) { if (gen_mov_memval_to_reg_helper(dest_reg, (Bit32u)data, size, FC_REGS_ADDR, (Bit32u)&cpu_regs)) return true; if (gen_mov_memval_to_reg_helper(dest_reg, (Bit32u)data, size, readdata_addr, (Bit32u)&core_dynrec.readdata)) return true; if (gen_mov_memval_to_reg_helper(dest_reg, (Bit32u)data, size, FC_SEGS_ADDR, (Bit32u)&Segs)) return true; return false; } // helper function for gen_mov_word_to_reg static void gen_mov_word_to_reg_helper(HostReg dest_reg,void* data,bool dword,HostReg data_reg) { // alignment.... if (dword) { #if !(defined(C_UNALIGNED_MEMORY) || (C_TARGETCPU == ARMV7LE)) if ((Bit32u)data & 3) { if ( ((Bit32u)data & 3) == 2 ) { cache_addd( LDRH_IMM(dest_reg, data_reg, 0) ); // ldrh dest_reg, [data_reg] cache_addd( LDRH_IMM(temp2, data_reg, 2) ); // ldrh temp2, [data_reg, #2] cache_addd( ORR_REG_LSL_IMM(dest_reg, dest_reg, temp2, 16) ); // orr dest_reg, dest_reg, temp2, lsl #16 } else { cache_addd( LDRB_IMM(dest_reg, data_reg, 0) ); // ldrb dest_reg, [data_reg] cache_addd( LDRH_IMM(temp2, data_reg, 1) ); // ldrh temp2, [data_reg, #1] cache_addd( ORR_REG_LSL_IMM(dest_reg, dest_reg, temp2, 8) ); // orr dest_reg, dest_reg, temp2, lsl #8 cache_addd( LDRB_IMM(temp2, data_reg, 3) ); // ldrb temp2, [data_reg, #3] cache_addd( ORR_REG_LSL_IMM(dest_reg, dest_reg, temp2, 24) ); // orr dest_reg, dest_reg, temp2, lsl #24 } } else #endif { cache_addd( LDR_IMM(dest_reg, data_reg, 0) ); // ldr dest_reg, [data_reg] } } else { #if !(defined(C_UNALIGNED_MEMORY) || (C_TARGETCPU == ARMV7LE)) if ((Bit32u)data & 1) { cache_addd( LDRB_IMM(dest_reg, data_reg, 0) ); // ldrb dest_reg, [data_reg] cache_addd( LDRB_IMM(temp2, data_reg, 1) ); // ldrb temp2, [data_reg, #1] cache_addd( ORR_REG_LSL_IMM(dest_reg, dest_reg, temp2, 8) ); // orr dest_reg, dest_reg, temp2, lsl #8 } else #endif { cache_addd( LDRH_IMM(dest_reg, data_reg, 0) ); // ldrh dest_reg, [data_reg] } } } // move a 32bit (dword==true) or 16bit (dword==false) value from memory into dest_reg // 16bit moves may destroy the upper 16bit of the destination register static void gen_mov_word_to_reg(HostReg dest_reg,void* data,bool dword) { if (!gen_mov_memval_to_reg(dest_reg, data, (dword)?4:2)) { gen_mov_dword_to_reg_imm(temp1, (Bit32u)data); gen_mov_word_to_reg_helper(dest_reg, data, dword, temp1); } } // move a 16bit constant value into dest_reg // the upper 16bit of the destination register may be destroyed static void INLINE gen_mov_word_to_reg_imm(HostReg dest_reg,Bit16u imm) { gen_mov_dword_to_reg_imm(dest_reg, (Bit32u)imm); } // helper function static bool gen_mov_memval_from_reg_helper(HostReg src_reg, Bit32u data, Bitu size, HostReg addr_reg, Bit32u addr_data) { switch (size) { case 4: #if !(defined(C_UNALIGNED_MEMORY) || (C_TARGETCPU == ARMV7LE)) if ((data & 3) == 0) #endif { if ((data >= addr_data) && (data < addr_data + 4096)) { cache_addd( STR_IMM(src_reg, addr_reg, data - addr_data) ); // str src_reg, [addr_reg, #(data - addr_data)] return true; } else if ((data < addr_data) && (data > addr_data - 4096)) { cache_addd( STR_IMM_M(src_reg, addr_reg, addr_data - data) ); // str src_reg, [addr_reg, #-(addr_data - data)] return true; } } break; case 2: #if !(defined(C_UNALIGNED_MEMORY) || (C_TARGETCPU == ARMV7LE)) if ((data & 1) == 0) #endif { if ((data >= addr_data) && (data < addr_data + 256)) { cache_addd( STRH_IMM(src_reg, addr_reg, data - addr_data) ); // strh src_reg, [addr_reg, #(data - addr_data)] return true; } else if ((data < addr_data) && (data > addr_data - 256)) { cache_addd( STRH_IMM_M(src_reg, addr_reg, addr_data - data) ); // strh src_reg, [addr_reg, #-(addr_data - data)] return true; } } break; case 1: if ((data >= addr_data) && (data < addr_data + 4096)) { cache_addd( STRB_IMM(src_reg, addr_reg, data - addr_data) ); // strb src_reg, [addr_reg, #(data - addr_data)] return true; } else if ((data < addr_data) && (data > addr_data - 4096)) { cache_addd( STRB_IMM_M(src_reg, addr_reg, addr_data - data) ); // strb src_reg, [addr_reg, #-(addr_data - data)] return true; } default: break; } return false; } // helper function static bool gen_mov_memval_from_reg(HostReg src_reg, void *dest, Bitu size) { if (gen_mov_memval_from_reg_helper(src_reg, (Bit32u)dest, size, FC_REGS_ADDR, (Bit32u)&cpu_regs)) return true; if (gen_mov_memval_from_reg_helper(src_reg, (Bit32u)dest, size, readdata_addr, (Bit32u)&core_dynrec.readdata)) return true; if (gen_mov_memval_from_reg_helper(src_reg, (Bit32u)dest, size, FC_SEGS_ADDR, (Bit32u)&Segs)) return true; return false; } // helper function for gen_mov_word_from_reg static void gen_mov_word_from_reg_helper(HostReg src_reg,void* dest,bool dword, HostReg data_reg) { // alignment.... if (dword) { #if !(defined(C_UNALIGNED_MEMORY) || (C_TARGETCPU == ARMV7LE)) if ((Bit32u)dest & 3) { if ( ((Bit32u)dest & 3) == 2 ) { cache_addd( STRH_IMM(src_reg, data_reg, 0) ); // strh src_reg, [data_reg] cache_addd( MOV_REG_LSR_IMM(temp2, src_reg, 16) ); // mov temp2, src_reg, lsr #16 cache_addd( STRH_IMM(temp2, data_reg, 2) ); // strh temp2, [data_reg, #2] } else { cache_addd( STRB_IMM(src_reg, data_reg, 0) ); // strb src_reg, [data_reg] cache_addd( MOV_REG_LSR_IMM(temp2, src_reg, 8) ); // mov temp2, src_reg, lsr #8 cache_addd( STRH_IMM(temp2, data_reg, 1) ); // strh temp2, [data_reg, #1] cache_addd( MOV_REG_LSR_IMM(temp2, temp2, 16) ); // mov temp2, temp2, lsr #16 cache_addd( STRB_IMM(temp2, data_reg, 3) ); // strb temp2, [data_reg, #3] } } else #endif { cache_addd( STR_IMM(src_reg, data_reg, 0) ); // str src_reg, [data_reg] } } else { #if !(defined(C_UNALIGNED_MEMORY) || (C_TARGETCPU == ARMV7LE)) if ((Bit32u)dest & 1) { cache_addd( STRB_IMM(src_reg, data_reg, 0) ); // strb src_reg, [data_reg] cache_addd( MOV_REG_LSR_IMM(temp2, src_reg, 8) ); // mov temp2, src_reg, lsr #8 cache_addd( STRB_IMM(temp2, data_reg, 1) ); // strb temp2, [data_reg, #1] } else #endif { cache_addd( STRH_IMM(src_reg, data_reg, 0) ); // strh src_reg, [data_reg] } } } // move 32bit (dword==true) or 16bit (dword==false) of a register into memory static void gen_mov_word_from_reg(HostReg src_reg,void* dest,bool dword) { if (!gen_mov_memval_from_reg(src_reg, dest, (dword)?4:2)) { gen_mov_dword_to_reg_imm(temp1, (Bit32u)dest); gen_mov_word_from_reg_helper(src_reg, dest, dword, temp1); } } // move an 8bit value from memory into dest_reg // the upper 24bit of the destination register can be destroyed // this function does not use FC_OP1/FC_OP2 as dest_reg as these // registers might not be directly byte-accessible on some architectures static void gen_mov_byte_to_reg_low(HostReg dest_reg,void* data) { if (!gen_mov_memval_to_reg(dest_reg, data, 1)) { gen_mov_dword_to_reg_imm(temp1, (Bit32u)data); cache_addd( LDRB_IMM(dest_reg, temp1, 0) ); // ldrb dest_reg, [temp1] } } // move an 8bit value from memory into dest_reg // the upper 24bit of the destination register can be destroyed // this function can use FC_OP1/FC_OP2 as dest_reg which are // not directly byte-accessible on some architectures static void INLINE gen_mov_byte_to_reg_low_canuseword(HostReg dest_reg,void* data) { gen_mov_byte_to_reg_low(dest_reg, data); } // move an 8bit constant value into dest_reg // the upper 24bit of the destination register can be destroyed // this function does not use FC_OP1/FC_OP2 as dest_reg as these // registers might not be directly byte-accessible on some architectures static void gen_mov_byte_to_reg_low_imm(HostReg dest_reg,Bit8u imm) { cache_addd( MOV_IMM(dest_reg, imm, 0) ); // mov dest_reg, #(imm) } // move an 8bit constant value into dest_reg // the upper 24bit of the destination register can be destroyed // this function can use FC_OP1/FC_OP2 as dest_reg which are // not directly byte-accessible on some architectures static void INLINE gen_mov_byte_to_reg_low_imm_canuseword(HostReg dest_reg,Bit8u imm) { gen_mov_byte_to_reg_low_imm(dest_reg, imm); } // move the lowest 8bit of a register into memory static void gen_mov_byte_from_reg_low(HostReg src_reg,void* dest) { if (!gen_mov_memval_from_reg(src_reg, dest, 1)) { gen_mov_dword_to_reg_imm(temp1, (Bit32u)dest); cache_addd( STRB_IMM(src_reg, temp1, 0) ); // strb src_reg, [temp1] } } // convert an 8bit word to a 32bit dword // the register is zero-extended (sign==false) or sign-extended (sign==true) static void gen_extend_byte(bool sign,HostReg reg) { if (sign) { #if C_TARGETCPU == ARMV7LE cache_addd( SXTB(reg, reg, 0) ); // sxtb reg, reg #else cache_addd( MOV_REG_LSL_IMM(reg, reg, 24) ); // mov reg, reg, lsl #24 cache_addd( MOV_REG_ASR_IMM(reg, reg, 24) ); // mov reg, reg, asr #24 #endif } else { #if C_TARGETCPU == ARMV7LE cache_addd( UXTB(reg, reg, 0) ); // uxtb reg, reg #else cache_addd( AND_IMM(reg, reg, 0xff, 0) ); // and reg, reg, #0xff #endif } } // convert a 16bit word to a 32bit dword // the register is zero-extended (sign==false) or sign-extended (sign==true) static void gen_extend_word(bool sign,HostReg reg) { if (sign) { #if C_TARGETCPU == ARMV7LE cache_addd( SXTH(reg, reg, 0) ); // sxth reg, reg #else cache_addd( MOV_REG_LSL_IMM(reg, reg, 16) ); // mov reg, reg, lsl #16 cache_addd( MOV_REG_ASR_IMM(reg, reg, 16) ); // mov reg, reg, asr #16 #endif } else { #if C_TARGETCPU == ARMV7LE cache_addd( UXTH(reg, reg, 0) ); // uxth reg, reg #else cache_addd( MOV_REG_LSL_IMM(reg, reg, 16) ); // mov reg, reg, lsl #16 cache_addd( MOV_REG_LSR_IMM(reg, reg, 16) ); // mov reg, reg, lsr #16 #endif } } // add a 32bit value from memory to a full register static void gen_add(HostReg reg,void* op) { gen_mov_word_to_reg(temp3, op, 1); cache_addd( ADD_REG_LSL_IMM(reg, reg, temp3, 0) ); // add reg, reg, temp3 } // add a 32bit constant value to a full register static void gen_add_imm(HostReg reg,Bit32u imm) { Bit32u imm2, scale; if(!imm) return; imm2 = (Bit32u) (-((Bit32s)imm)); if ( val_is_operand2(imm, &scale) ) { cache_addd( ADD_IMM(reg, reg, imm >> scale, ROTATE_SCALE(scale)) ); // add reg, reg, #imm } else if ( val_is_operand2(imm2, &scale) ) { cache_addd( SUB_IMM(reg, reg, imm2 >> scale, ROTATE_SCALE(scale)) ); // sub reg, reg, #(-imm) #if C_TARGETCPU == ARMV7LE } else if (imm2 < 0x10000) { cache_addd( MOVW(temp2, imm2) ); // movw temp2, #(-imm) cache_addd( SUB_REG_LSL_IMM(reg, reg, temp2, 0) ); // sub reg, reg, temp2 #endif } else { #if C_TARGETCPU != ARMV7LE if (get_min_imm_gen_len(imm) <= get_min_imm_gen_len(imm2)) { #endif gen_mov_dword_to_reg_imm(temp2, imm); cache_addd( ADD_REG_LSL_IMM(reg, reg, temp2, 0) ); // add reg, reg, temp2 #if C_TARGETCPU != ARMV7LE } else { gen_mov_dword_to_reg_imm(temp2, imm2); cache_addd( SUB_REG_LSL_IMM(reg, reg, temp2, 0) ); // sub reg, reg, temp2 } #endif } } // and a 32bit constant value with a full register static void gen_and_imm(HostReg reg,Bit32u imm) { Bit32u imm2, scale; imm2 = ~imm; if(!imm2) return; if (!imm) { cache_addd( MOV_IMM(reg, 0, 0) ); // mov reg, #0 } else if ( val_is_operand2(imm, &scale) ) { cache_addd( AND_IMM(reg, reg, imm >> scale, ROTATE_SCALE(scale)) ); // and reg, reg, #imm } else if ( val_is_operand2(imm2, &scale) ) { cache_addd( BIC_IMM(reg, reg, imm2 >> scale, ROTATE_SCALE(scale)) ); // bic reg, reg, #(~imm) #if C_TARGETCPU == ARMV7LE } else if (imm2 < 0x10000) { cache_addd( MOVW(temp2, imm2) ); // movw temp2, #(~imm) cache_addd( BIC_REG_LSL_IMM(reg, reg, temp2, 0) ); // bic reg, reg, temp2 #endif } else { gen_mov_dword_to_reg_imm(temp2, imm); cache_addd( AND_REG_LSL_IMM(reg, reg, temp2, 0) ); // and reg, reg, temp2 } } // move a 32bit constant value into memory static void gen_mov_direct_dword(void* dest,Bit32u imm) { gen_mov_dword_to_reg_imm(temp3, imm); gen_mov_word_from_reg(temp3, dest, 1); } // move an address into memory static void INLINE gen_mov_direct_ptr(void* dest,DRC_PTR_SIZE_IM imm) { gen_mov_direct_dword(dest,(Bit32u)imm); } // add a 32bit (dword==true) or 16bit (dword==false) constant value to a memory value static void gen_add_direct_word(void* dest,Bit32u imm,bool dword) { if (!dword) imm &= 0xffff; if(!imm) return; if (!gen_mov_memval_to_reg(temp3, dest, (dword)?4:2)) { gen_mov_dword_to_reg_imm(temp1, (Bit32u)dest); gen_mov_word_to_reg_helper(temp3, dest, dword, temp1); } gen_add_imm(temp3, imm); if (!gen_mov_memval_from_reg(temp3, dest, (dword)?4:2)) { gen_mov_word_from_reg_helper(temp3, dest, dword, temp1); } } // add an 8bit constant value to a dword memory value static void gen_add_direct_byte(void* dest,Bit8s imm) { gen_add_direct_word(dest, (Bit32s)imm, 1); } // subtract a 32bit (dword==true) or 16bit (dword==false) constant value from a memory value static void gen_sub_direct_word(void* dest,Bit32u imm,bool dword) { Bit32u imm2, scale; if (!dword) imm &= 0xffff; if(!imm) return; if (!gen_mov_memval_to_reg(temp3, dest, (dword)?4:2)) { gen_mov_dword_to_reg_imm(temp1, (Bit32u)dest); gen_mov_word_to_reg_helper(temp3, dest, dword, temp1); } imm2 = (Bit32u) (-((Bit32s)imm)); if ( val_is_operand2(imm, &scale) ) { cache_addd( SUB_IMM(temp3, temp3, imm >> scale, ROTATE_SCALE(scale)) ); // sub temp3, temp3, #imm } else if ( val_is_operand2(imm2, &scale) ) { cache_addd( ADD_IMM(temp3, temp3, imm2 >> scale, ROTATE_SCALE(scale)) ); // add temp3, temp3, #(-imm) #if C_TARGETCPU == ARMV7LE } else if (imm2 < 0x10000) { cache_addd( MOVW(temp2, imm2) ); // movw temp2, #(-imm) cache_addd( ADD_REG_LSL_IMM(temp3, temp3, temp2, 0) ); // add temp3, temp3, temp2 #endif } else { #if C_TARGETCPU != ARMV7LE if (get_min_imm_gen_len(imm) <= get_min_imm_gen_len(imm2)) { #endif gen_mov_dword_to_reg_imm(temp2, imm); cache_addd( SUB_REG_LSL_IMM(temp3, temp3, temp2, 0) ); // sub temp3, temp3, temp2 #if C_TARGETCPU != ARMV7LE } else { gen_mov_dword_to_reg_imm(temp2, imm2); cache_addd( ADD_REG_LSL_IMM(temp3, temp3, temp2, 0) ); // add temp3, temp3, temp2 } #endif } if (!gen_mov_memval_from_reg(temp3, dest, (dword)?4:2)) { gen_mov_word_from_reg_helper(temp3, dest, dword, temp1); } } // subtract an 8bit constant value from a dword memory value static void gen_sub_direct_byte(void* dest,Bit8s imm) { gen_sub_direct_word(dest, (Bit32s)imm, 1); } // effective address calculation, destination is dest_reg // scale_reg is scaled by scale (scale_reg*(2^scale)) and // added to dest_reg, then the immediate value is added static INLINE void gen_lea(HostReg dest_reg,HostReg scale_reg,Bitu scale,Bits imm) { cache_addd( ADD_REG_LSL_IMM(dest_reg, dest_reg, scale_reg, scale) ); // add dest_reg, dest_reg, scale_reg, lsl #(scale) gen_add_imm(dest_reg, imm); } // effective address calculation, destination is dest_reg // dest_reg is scaled by scale (dest_reg*(2^scale)), // then the immediate value is added static INLINE void gen_lea(HostReg dest_reg,Bitu scale,Bits imm) { if (scale) { cache_addd( MOV_REG_LSL_IMM(dest_reg, dest_reg, scale) ); // mov dest_reg, dest_reg, lsl #(scale) } gen_add_imm(dest_reg, imm); } // generate a call to a parameterless function static void INLINE gen_call_function_raw(void * func) { #if C_TARGETCPU == ARMV7LE cache_addd( MOVW(temp1, ((Bit32u)func) & 0xffff) ); // movw temp1, #(func & 0xffff) cache_addd( MOVT(temp1, ((Bit32u)func) >> 16) ); // movt temp1, #(func >> 16) cache_addd( BLX_REG(temp1) ); // blx temp1 #else cache_addd( LDR_IMM(temp1, HOST_pc, 4) ); // ldr temp1, [pc, #4] cache_addd( ADD_IMM(HOST_lr, HOST_pc, 4, 0) ); // add lr, pc, #4 cache_addd( BX(temp1) ); // bx temp1 cache_addd((Bit32u)func); // .int func #endif } // generate a call to a function with paramcount parameters // note: the parameters are loaded in the architecture specific way // using the gen_load_param_ functions below static Bit32u INLINE gen_call_function_setup(void * func,Bitu paramcount,bool fastcall=false) { Bit32u proc_addr = (Bit32u)cache.pos; gen_call_function_raw(func); return proc_addr; } #if (1) // max of 4 parameters in a1-a4 // load an immediate value as param'th function parameter static void INLINE gen_load_param_imm(Bitu imm,Bitu param) { gen_mov_dword_to_reg_imm(param, imm); } // load an address as param'th function parameter static void INLINE gen_load_param_addr(Bitu addr,Bitu param) { gen_mov_dword_to_reg_imm(param, addr); } // load a host-register as param'th function parameter static void INLINE gen_load_param_reg(Bitu reg,Bitu param) { gen_mov_regs(param, reg); } // load a value from memory as param'th function parameter static void INLINE gen_load_param_mem(Bitu mem,Bitu param) { gen_mov_word_to_reg(param, (void *)mem, 1); } #else other arm abis #endif // jump to an address pointed at by ptr, offset is in imm static void gen_jmp_ptr(void * ptr,Bits imm=0) { Bit32u scale; gen_mov_word_to_reg(temp3, ptr, 1); #if !(defined(C_UNALIGNED_MEMORY) || (C_TARGETCPU == ARMV7LE)) // (*ptr) should be word aligned if ((imm & 0x03) == 0) { #endif if ((imm >= 0) && (imm < 4096)) { cache_addd( LDR_IMM(temp1, temp3, imm) ); // ldr temp1, [temp3, #imm] } else { gen_mov_dword_to_reg_imm(temp2, imm); cache_addd( LDR_REG_LSL_IMM(temp1, temp3, temp2, 0) ); // ldr temp1, [temp3, temp2] } #if !(defined(C_UNALIGNED_MEMORY) || (C_TARGETCPU == ARMV7LE)) } else { gen_add_imm(temp3, imm); cache_addd( LDRB_IMM(temp1, temp3, 0) ); // ldrb temp1, [temp3] cache_addd( LDRB_IMM(temp2, temp3, 1) ); // ldrb temp2, [temp3, #1] cache_addd( ORR_REG_LSL_IMM(temp1, temp1, temp2, 8) ); // orr temp1, temp1, temp2, lsl #8 cache_addd( LDRB_IMM(temp2, temp3, 2) ); // ldrb temp2, [temp3, #2] cache_addd( ORR_REG_LSL_IMM(temp1, temp1, temp2, 16) ); // orr temp1, temp1, temp2, lsl #16 cache_addd( LDRB_IMM(temp2, temp3, 3) ); // ldrb temp2, [temp3, #3] cache_addd( ORR_REG_LSL_IMM(temp1, temp1, temp2, 24) ); // orr temp1, temp1, temp2, lsl #24 } #endif cache_addd( BX(temp1) ); // bx temp1 } // short conditional jump (+-127 bytes) if register is zero // the destination is set by gen_fill_branch() later static Bit32u gen_create_branch_on_zero(HostReg reg,bool dword) { if (dword) { cache_addd( CMP_IMM(reg, 0, 0) ); // cmp reg, #0 } else { cache_addd( MOVS_REG_LSL_IMM(temp1, reg, 16) ); // movs temp1, reg, lsl #16 } cache_addd( BEQ_FWD(0) ); // beq j return ((Bit32u)cache.pos-4); } // short conditional jump (+-127 bytes) if register is nonzero // the destination is set by gen_fill_branch() later static Bit32u gen_create_branch_on_nonzero(HostReg reg,bool dword) { if (dword) { cache_addd( CMP_IMM(reg, 0, 0) ); // cmp reg, #0 } else { cache_addd( MOVS_REG_LSL_IMM(temp1, reg, 16) ); // movs temp1, reg, lsl #16 } cache_addd( BNE_FWD(0) ); // bne j return ((Bit32u)cache.pos-4); } // calculate relative offset and fill it into the location pointed to by data static void INLINE gen_fill_branch(DRC_PTR_SIZE_IM data) { #if C_DEBUG Bits len=(Bit32u)cache.pos-(data+8); if (len<0) len=-len; if (len>0x02000000) LOG_MSG("Big jump %d",len); #endif *(Bit32u*)data=( (*(Bit32u*)data) & 0xff000000 ) | ( ( ((Bit32u)cache.pos - (data+8)) >> 2 ) & 0x00ffffff ); } // conditional jump if register is nonzero // for isdword==true the 32bit of the register are tested // for isdword==false the lowest 8bit of the register are tested static Bit32u gen_create_branch_long_nonzero(HostReg reg,bool isdword) { if (isdword) { cache_addd( CMP_IMM(reg, 0, 0) ); // cmp reg, #0 } else { cache_addd( TST_IMM(reg, 0xff, 0) ); // tst reg, #0xff } cache_addd( BNE_FWD(0) ); // bne j return ((Bit32u)cache.pos-4); } // compare 32bit-register against zero and jump if value less/equal than zero static Bit32u gen_create_branch_long_leqzero(HostReg reg) { cache_addd( CMP_IMM(reg, 0, 0) ); // cmp reg, #0 cache_addd( BLE_FWD(0) ); // ble j return ((Bit32u)cache.pos-4); } // calculate long relative offset and fill it into the location pointed to by data static void INLINE gen_fill_branch_long(Bit32u data) { *(Bit32u*)data=( (*(Bit32u*)data) & 0xff000000 ) | ( ( ((Bit32u)cache.pos - (data+8)) >> 2 ) & 0x00ffffff ); } static void gen_run_code(void) { #if C_TARGETCPU == ARMV7LE cache_addd(0xe92d4df0); // stmfd sp!, {v1-v5,v7,v8,lr} cache_addd( MOVW(FC_SEGS_ADDR, ((Bit32u)&Segs) & 0xffff) ); // movw FC_SEGS_ADDR, #(&Segs & 0xffff) cache_addd( MOVT(FC_SEGS_ADDR, ((Bit32u)&Segs) >> 16) ); // movt FC_SEGS_ADDR, #(&Segs >> 16) cache_addd( MOVW(FC_REGS_ADDR, ((Bit32u)&cpu_regs) & 0xffff) ); // movw FC_REGS_ADDR, #(&cpu_regs & 0xffff) cache_addd( MOVT(FC_REGS_ADDR, ((Bit32u)&cpu_regs) >> 16) ); // movt FC_REGS_ADDR, #(&cpu_regs >> 16) cache_addd( MOVW(readdata_addr, ((Bitu)&core_dynrec.readdata) & 0xffff) ); // movw readdata_addr, #(&core_dynrec.readdata & 0xffff) cache_addd( MOVT(readdata_addr, ((Bitu)&core_dynrec.readdata) >> 16) ); // movt readdata_addr, #(&core_dynrec.readdata >> 16) cache_addd( BX(HOST_r0) ); // bx r0 #else Bit8u *pos1, *pos2, *pos3; cache_addd(0xe92d4df0); // stmfd sp!, {v1-v5,v7,v8,lr} pos1 = cache.pos; cache_addd( 0 ); pos2 = cache.pos; cache_addd( 0 ); pos3 = cache.pos; cache_addd( 0 ); cache_addd( BX(HOST_r0) ); // bx r0 // align cache.pos to 32 bytes if ((((Bitu)cache.pos) & 0x1f) != 0) { cache.pos = cache.pos + (32 - (((Bitu)cache.pos) & 0x1f)); } *(Bit32u*)pos1 = LDR_IMM(FC_SEGS_ADDR, HOST_pc, cache.pos - (pos1 + 8)); // ldr FC_SEGS_ADDR, [pc, #(&Segs)] cache_addd((Bit32u)&Segs); // address of "Segs" *(Bit32u*)pos2 = LDR_IMM(FC_REGS_ADDR, HOST_pc, cache.pos - (pos2 + 8)); // ldr FC_REGS_ADDR, [pc, #(&cpu_regs)] cache_addd((Bit32u)&cpu_regs); // address of "cpu_regs" *(Bit32u*)pos3 = LDR_IMM(readdata_addr, HOST_pc, cache.pos - (pos3 + 8)); // ldr readdata_addr, [pc, #(&core_dynrec.readdata)] cache_addd((Bit32u)&core_dynrec.readdata); // address of "core_dynrec.readdata" // align cache.pos to 32 bytes if ((((Bitu)cache.pos) & 0x1f) != 0) { cache.pos = cache.pos + (32 - (((Bitu)cache.pos) & 0x1f)); } #endif } // return from a function static void gen_return_function(void) { cache_addd(0xe8bd8df0); // ldmfd sp!, {v1-v5,v7,v8,pc} } #ifdef DRC_FLAGS_INVALIDATION // called when a call to a function can be replaced by a // call to a simpler function static void gen_fill_function_ptr(Bit8u * pos,void* fct_ptr,Bitu flags_type) { #ifdef DRC_FLAGS_INVALIDATION_DCODE // try to avoid function calls but rather directly fill in code switch (flags_type) { case t_ADDb: case t_ADDw: case t_ADDd: *(Bit32u*)pos=NOP; // nop *(Bit32u*)(pos+4)=NOP; // nop *(Bit32u*)(pos+8)=ADD_REG_LSL_IMM(FC_RETOP, HOST_a1, HOST_a2, 0); // add FC_RETOP, a1, a2 #if C_TARGETCPU != ARMV7LE *(Bit32u*)(pos+12)=NOP; // nop #endif break; case t_ORb: case t_ORw: case t_ORd: *(Bit32u*)pos=NOP; // nop *(Bit32u*)(pos+4)=NOP; // nop *(Bit32u*)(pos+8)=ORR_REG_LSL_IMM(FC_RETOP, HOST_a1, HOST_a2, 0); // orr FC_RETOP, a1, a2 #if C_TARGETCPU != ARMV7LE *(Bit32u*)(pos+12)=NOP; // nop #endif break; case t_ANDb: case t_ANDw: case t_ANDd: *(Bit32u*)pos=NOP; // nop *(Bit32u*)(pos+4)=NOP; // nop *(Bit32u*)(pos+8)=AND_REG_LSL_IMM(FC_RETOP, HOST_a1, HOST_a2, 0); // and FC_RETOP, a1, a2 #if C_TARGETCPU != ARMV7LE *(Bit32u*)(pos+12)=NOP; // nop #endif break; case t_SUBb: case t_SUBw: case t_SUBd: *(Bit32u*)pos=NOP; // nop *(Bit32u*)(pos+4)=NOP; // nop *(Bit32u*)(pos+8)=SUB_REG_LSL_IMM(FC_RETOP, HOST_a1, HOST_a2, 0); // sub FC_RETOP, a1, a2 #if C_TARGETCPU != ARMV7LE *(Bit32u*)(pos+12)=NOP; // nop #endif break; case t_XORb: case t_XORw: case t_XORd: *(Bit32u*)pos=NOP; // nop *(Bit32u*)(pos+4)=NOP; // nop *(Bit32u*)(pos+8)=EOR_REG_LSL_IMM(FC_RETOP, HOST_a1, HOST_a2, 0); // eor FC_RETOP, a1, a2 #if C_TARGETCPU != ARMV7LE *(Bit32u*)(pos+12)=NOP; // nop #endif break; case t_CMPb: case t_CMPw: case t_CMPd: case t_TESTb: case t_TESTw: case t_TESTd: *(Bit32u*)pos=NOP; // nop *(Bit32u*)(pos+4)=NOP; // nop *(Bit32u*)(pos+8)=NOP; // nop #if C_TARGETCPU != ARMV7LE *(Bit32u*)(pos+12)=NOP; // nop #endif break; case t_INCb: case t_INCw: case t_INCd: *(Bit32u*)pos=NOP; // nop *(Bit32u*)(pos+4)=NOP; // nop *(Bit32u*)(pos+8)=ADD_IMM(FC_RETOP, HOST_a1, 1, 0); // add FC_RETOP, a1, #1 #if C_TARGETCPU != ARMV7LE *(Bit32u*)(pos+12)=NOP; // nop #endif break; case t_DECb: case t_DECw: case t_DECd: *(Bit32u*)pos=NOP; // nop *(Bit32u*)(pos+4)=NOP; // nop *(Bit32u*)(pos+8)=SUB_IMM(FC_RETOP, HOST_a1, 1, 0); // sub FC_RETOP, a1, #1 #if C_TARGETCPU != ARMV7LE *(Bit32u*)(pos+12)=NOP; // nop #endif break; case t_SHLb: case t_SHLw: case t_SHLd: *(Bit32u*)pos=NOP; // nop *(Bit32u*)(pos+4)=NOP; // nop *(Bit32u*)(pos+8)=MOV_REG_LSL_REG(FC_RETOP, HOST_a1, HOST_a2); // mov FC_RETOP, a1, lsl a2 #if C_TARGETCPU != ARMV7LE *(Bit32u*)(pos+12)=NOP; // nop #endif break; case t_SHRb: *(Bit32u*)pos=NOP; // nop #if C_TARGETCPU == ARMV7LE *(Bit32u*)(pos+4)=BFC(HOST_a1, 8, 24); // bfc a1, 8, 24 *(Bit32u*)(pos+8)=MOV_REG_LSR_REG(FC_RETOP, HOST_a1, HOST_a2); // mov FC_RETOP, a1, lsr a2 #else *(Bit32u*)(pos+4)=NOP; // nop *(Bit32u*)(pos+8)=AND_IMM(FC_RETOP, HOST_a1, 0xff, 0); // and FC_RETOP, a1, #0xff *(Bit32u*)(pos+12)=MOV_REG_LSR_REG(FC_RETOP, FC_RETOP, HOST_a2); // mov FC_RETOP, FC_RETOP, lsr a2 #endif break; case t_SHRw: *(Bit32u*)pos=NOP; // nop #if C_TARGETCPU == ARMV7LE *(Bit32u*)(pos+4)=BFC(HOST_a1, 16, 16); // bfc a1, 16, 16 *(Bit32u*)(pos+8)=MOV_REG_LSR_REG(FC_RETOP, HOST_a1, HOST_a2); // mov FC_RETOP, a1, lsr a2 #else *(Bit32u*)(pos+4)=MOV_REG_LSL_IMM(FC_RETOP, HOST_a1, 16); // mov FC_RETOP, a1, lsl #16 *(Bit32u*)(pos+8)=MOV_REG_LSR_IMM(FC_RETOP, FC_RETOP, 16); // mov FC_RETOP, FC_RETOP, lsr #16 *(Bit32u*)(pos+12)=MOV_REG_LSR_REG(FC_RETOP, FC_RETOP, HOST_a2); // mov FC_RETOP, FC_RETOP, lsr a2 #endif break; case t_SHRd: *(Bit32u*)pos=NOP; // nop *(Bit32u*)(pos+4)=NOP; // nop *(Bit32u*)(pos+8)=MOV_REG_LSR_REG(FC_RETOP, HOST_a1, HOST_a2); // mov FC_RETOP, a1, lsr a2 #if C_TARGETCPU != ARMV7LE *(Bit32u*)(pos+12)=NOP; // nop #endif break; case t_SARb: *(Bit32u*)pos=NOP; // nop #if C_TARGETCPU == ARMV7LE *(Bit32u*)(pos+4)=SXTB(FC_RETOP, HOST_a1, 0); // sxtb FC_RETOP, a1 *(Bit32u*)(pos+8)=MOV_REG_ASR_REG(FC_RETOP, FC_RETOP, HOST_a2); // mov FC_RETOP, FC_RETOP, asr a2 #else *(Bit32u*)(pos+4)=MOV_REG_LSL_IMM(FC_RETOP, HOST_a1, 24); // mov FC_RETOP, a1, lsl #24 *(Bit32u*)(pos+8)=MOV_REG_ASR_IMM(FC_RETOP, FC_RETOP, 24); // mov FC_RETOP, FC_RETOP, asr #24 *(Bit32u*)(pos+12)=MOV_REG_ASR_REG(FC_RETOP, FC_RETOP, HOST_a2); // mov FC_RETOP, FC_RETOP, asr a2 #endif break; case t_SARw: *(Bit32u*)pos=NOP; // nop #if C_TARGETCPU == ARMV7LE *(Bit32u*)(pos+4)=SXTH(FC_RETOP, HOST_a1, 0); // sxth FC_RETOP, a1 *(Bit32u*)(pos+8)=MOV_REG_ASR_REG(FC_RETOP, FC_RETOP, HOST_a2); // mov FC_RETOP, FC_RETOP, asr a2 #else *(Bit32u*)(pos+4)=MOV_REG_LSL_IMM(FC_RETOP, HOST_a1, 16); // mov FC_RETOP, a1, lsl #16 *(Bit32u*)(pos+8)=MOV_REG_ASR_IMM(FC_RETOP, FC_RETOP, 16); // mov FC_RETOP, FC_RETOP, asr #16 *(Bit32u*)(pos+12)=MOV_REG_ASR_REG(FC_RETOP, FC_RETOP, HOST_a2); // mov FC_RETOP, FC_RETOP, asr a2 #endif break; case t_SARd: *(Bit32u*)pos=NOP; // nop *(Bit32u*)(pos+4)=NOP; // nop *(Bit32u*)(pos+8)=MOV_REG_ASR_REG(FC_RETOP, HOST_a1, HOST_a2); // mov FC_RETOP, a1, asr a2 #if C_TARGETCPU != ARMV7LE *(Bit32u*)(pos+12)=NOP; // nop #endif break; case t_RORb: #if C_TARGETCPU == ARMV7LE *(Bit32u*)pos=BFI(HOST_a1, HOST_a1, 8, 8); // bfi a1, a1, 8, 8 *(Bit32u*)(pos+4)=BFI(HOST_a1, HOST_a1, 16, 16); // bfi a1, a1, 16, 16 *(Bit32u*)(pos+8)=MOV_REG_ROR_REG(FC_RETOP, HOST_a1, HOST_a2); // mov FC_RETOP, a1, ror a2 #else *(Bit32u*)pos=MOV_REG_LSL_IMM(FC_RETOP, HOST_a1, 24); // mov FC_RETOP, a1, lsl #24 *(Bit32u*)(pos+4)=ORR_REG_LSR_IMM(FC_RETOP, FC_RETOP, FC_RETOP, 8); // orr FC_RETOP, FC_RETOP, FC_RETOP, lsr #8 *(Bit32u*)(pos+8)=ORR_REG_LSR_IMM(FC_RETOP, FC_RETOP, FC_RETOP, 16); // orr FC_RETOP, FC_RETOP, FC_RETOP, lsr #16 *(Bit32u*)(pos+12)=MOV_REG_ROR_REG(FC_RETOP, FC_RETOP, HOST_a2); // mov FC_RETOP, FC_RETOP, ror a2 #endif break; case t_RORw: *(Bit32u*)pos=NOP; // nop #if C_TARGETCPU == ARMV7LE *(Bit32u*)(pos+4)=BFI(HOST_a1, HOST_a1, 16, 16); // bfi a1, a1, 16, 16 *(Bit32u*)(pos+8)=MOV_REG_ROR_REG(FC_RETOP, HOST_a1, HOST_a2); // mov FC_RETOP, a1, ror a2 #else *(Bit32u*)(pos+4)=MOV_REG_LSL_IMM(FC_RETOP, HOST_a1, 16); // mov FC_RETOP, a1, lsl #16 *(Bit32u*)(pos+8)=ORR_REG_LSR_IMM(FC_RETOP, FC_RETOP, FC_RETOP, 16); // orr FC_RETOP, FC_RETOP, FC_RETOP, lsr #16 *(Bit32u*)(pos+12)=MOV_REG_ROR_REG(FC_RETOP, FC_RETOP, HOST_a2); // mov FC_RETOP, FC_RETOP, ror a2 #endif break; case t_RORd: *(Bit32u*)pos=NOP; // nop *(Bit32u*)(pos+4)=NOP; // nop *(Bit32u*)(pos+8)=MOV_REG_ROR_REG(FC_RETOP, HOST_a1, HOST_a2); // mov FC_RETOP, a1, ror a2 #if C_TARGETCPU != ARMV7LE *(Bit32u*)(pos+12)=NOP; // nop #endif break; case t_ROLw: #if C_TARGETCPU == ARMV7LE *(Bit32u*)pos=BFI(HOST_a1, HOST_a1, 16, 16); // bfi a1, a1, 16, 16 *(Bit32u*)(pos+4)=RSB_IMM(HOST_a2, HOST_a2, 32, 0); // rsb a2, a2, #32 *(Bit32u*)(pos+8)=MOV_REG_ROR_REG(FC_RETOP, HOST_a1, HOST_a2); // mov FC_RETOP, a1, ror a2 #else *(Bit32u*)pos=MOV_REG_LSL_IMM(FC_RETOP, HOST_a1, 16); // mov FC_RETOP, a1, lsl #16 *(Bit32u*)(pos+4)=RSB_IMM(HOST_a2, HOST_a2, 32, 0); // rsb a2, a2, #32 *(Bit32u*)(pos+8)=ORR_REG_LSR_IMM(FC_RETOP, FC_RETOP, FC_RETOP, 16); // orr FC_RETOP, FC_RETOP, FC_RETOP, lsr #16 *(Bit32u*)(pos+12)=MOV_REG_ROR_REG(FC_RETOP, FC_RETOP, HOST_a2); // mov FC_RETOP, FC_RETOP, ror a2 #endif break; case t_ROLd: *(Bit32u*)pos=NOP; // nop #if C_TARGETCPU == ARMV7LE *(Bit32u*)(pos+4)=RSB_IMM(HOST_a2, HOST_a2, 32, 0); // rsb a2, a2, #32 *(Bit32u*)(pos+8)=MOV_REG_ROR_REG(FC_RETOP, HOST_a1, HOST_a2); // mov FC_RETOP, a1, ror a2 #else *(Bit32u*)(pos+4)=NOP; // nop *(Bit32u*)(pos+8)=RSB_IMM(HOST_a2, HOST_a2, 32, 0); // rsb a2, a2, #32 *(Bit32u*)(pos+12)=MOV_REG_ROR_REG(FC_RETOP, HOST_a1, HOST_a2); // mov FC_RETOP, a1, ror a2 #endif break; case t_NEGb: case t_NEGw: case t_NEGd: *(Bit32u*)pos=NOP; // nop *(Bit32u*)(pos+4)=NOP; // nop *(Bit32u*)(pos+8)=RSB_IMM(FC_RETOP, HOST_a1, 0, 0); // rsb FC_RETOP, a1, #0 #if C_TARGETCPU != ARMV7LE *(Bit32u*)(pos+12)=NOP; // nop #endif break; default: #if C_TARGETCPU == ARMV7LE *(Bit32u*)pos=MOVW(temp1, ((Bit32u)fct_ptr) & 0xffff); // movw temp1, #(fct_ptr & 0xffff) *(Bit32u*)(pos+4)=MOVT(temp1, ((Bit32u)fct_ptr) >> 16); // movt temp1, #(fct_ptr >> 16) #else *(Bit32u*)(pos+12)=(Bit32u)fct_ptr; // simple_func #endif break; } #else #if C_TARGETCPU == ARMV7LE *(Bit32u*)pos=MOVW(temp1, ((Bit32u)fct_ptr) & 0xffff); // movw temp1, #(fct_ptr & 0xffff) *(Bit32u*)(pos+4)=MOVT(temp1, ((Bit32u)fct_ptr) >> 16); // movt temp1, #(fct_ptr >> 16) #else *(Bit32u*)(pos+12)=(Bit32u)fct_ptr; // simple_func #endif #endif } #endif static void cache_block_before_close(void) { } #ifdef DRC_USE_SEGS_ADDR // mov 16bit value from Segs[index] into dest_reg using FC_SEGS_ADDR (index modulo 2 must be zero) // 16bit moves may destroy the upper 16bit of the destination register static void gen_mov_seg16_to_reg(HostReg dest_reg,Bitu index) { cache_addd( LDRH_IMM(dest_reg, FC_SEGS_ADDR, index) ); // ldrh dest_reg, [FC_SEGS_ADDR, #index] } // mov 32bit value from Segs[index] into dest_reg using FC_SEGS_ADDR (index modulo 4 must be zero) static void gen_mov_seg32_to_reg(HostReg dest_reg,Bitu index) { cache_addd( LDR_IMM(dest_reg, FC_SEGS_ADDR, index) ); // ldr dest_reg, [FC_SEGS_ADDR, #index] } // add a 32bit value from Segs[index] to a full register using FC_SEGS_ADDR (index modulo 4 must be zero) static void gen_add_seg32_to_reg(HostReg reg,Bitu index) { cache_addd( LDR_IMM(temp1, FC_SEGS_ADDR, index) ); // ldr temp1, [FC_SEGS_ADDR, #index] cache_addd( ADD_REG_LSL_IMM(reg, reg, temp1, 0) ); // add reg, reg, temp1 } #endif #ifdef DRC_USE_REGS_ADDR // mov 16bit value from cpu_regs[index] into dest_reg using FC_REGS_ADDR (index modulo 2 must be zero) // 16bit moves may destroy the upper 16bit of the destination register static void gen_mov_regval16_to_reg(HostReg dest_reg,Bitu index) { cache_addd( LDRH_IMM(dest_reg, FC_REGS_ADDR, index) ); // ldrh dest_reg, [FC_REGS_ADDR, #index] } // mov 32bit value from cpu_regs[index] into dest_reg using FC_REGS_ADDR (index modulo 4 must be zero) static void gen_mov_regval32_to_reg(HostReg dest_reg,Bitu index) { cache_addd( LDR_IMM(dest_reg, FC_REGS_ADDR, index) ); // ldr dest_reg, [FC_REGS_ADDR, #index] } // move a 32bit (dword==true) or 16bit (dword==false) value from cpu_regs[index] into dest_reg using FC_REGS_ADDR (if dword==true index modulo 4 must be zero) (if dword==false index modulo 2 must be zero) // 16bit moves may destroy the upper 16bit of the destination register static void gen_mov_regword_to_reg(HostReg dest_reg,Bitu index,bool dword) { if (dword) { cache_addd( LDR_IMM(dest_reg, FC_REGS_ADDR, index) ); // ldr dest_reg, [FC_REGS_ADDR, #index] } else { cache_addd( LDRH_IMM(dest_reg, FC_REGS_ADDR, index) ); // ldrh dest_reg, [FC_REGS_ADDR, #index] } } // move an 8bit value from cpu_regs[index] into dest_reg using FC_REGS_ADDR // the upper 24bit of the destination register can be destroyed // this function does not use FC_OP1/FC_OP2 as dest_reg as these // registers might not be directly byte-accessible on some architectures static void gen_mov_regbyte_to_reg_low(HostReg dest_reg,Bitu index) { cache_addd( LDRB_IMM(dest_reg, FC_REGS_ADDR, index) ); // ldrb dest_reg, [FC_REGS_ADDR, #index] } // move an 8bit value from cpu_regs[index] into dest_reg using FC_REGS_ADDR // the upper 24bit of the destination register can be destroyed // this function can use FC_OP1/FC_OP2 as dest_reg which are // not directly byte-accessible on some architectures static void gen_mov_regbyte_to_reg_low_canuseword(HostReg dest_reg,Bitu index) { cache_addd( LDRB_IMM(dest_reg, FC_REGS_ADDR, index) ); // ldrb dest_reg, [FC_REGS_ADDR, #index] } // add a 32bit value from cpu_regs[index] to a full register using FC_REGS_ADDR (index modulo 4 must be zero) static void gen_add_regval32_to_reg(HostReg reg,Bitu index) { cache_addd( LDR_IMM(temp2, FC_REGS_ADDR, index) ); // ldr temp2, [FC_REGS_ADDR, #index] cache_addd( ADD_REG_LSL_IMM(reg, reg, temp2, 0) ); // add reg, reg, temp2 } // move 16bit of register into cpu_regs[index] using FC_REGS_ADDR (index modulo 2 must be zero) static void gen_mov_regval16_from_reg(HostReg src_reg,Bitu index) { cache_addd( STRH_IMM(src_reg, FC_REGS_ADDR, index) ); // strh src_reg, [FC_REGS_ADDR, #index] } // move 32bit of register into cpu_regs[index] using FC_REGS_ADDR (index modulo 4 must be zero) static void gen_mov_regval32_from_reg(HostReg src_reg,Bitu index) { cache_addd( STR_IMM(src_reg, FC_REGS_ADDR, index) ); // str src_reg, [FC_REGS_ADDR, #index] } // move 32bit (dword==true) or 16bit (dword==false) of a register into cpu_regs[index] using FC_REGS_ADDR (if dword==true index modulo 4 must be zero) (if dword==false index modulo 2 must be zero) static void gen_mov_regword_from_reg(HostReg src_reg,Bitu index,bool dword) { if (dword) { cache_addd( STR_IMM(src_reg, FC_REGS_ADDR, index) ); // str src_reg, [FC_REGS_ADDR, #index] } else { cache_addd( STRH_IMM(src_reg, FC_REGS_ADDR, index) ); // strh src_reg, [FC_REGS_ADDR, #index] } } // move the lowest 8bit of a register into cpu_regs[index] using FC_REGS_ADDR static void gen_mov_regbyte_from_reg_low(HostReg src_reg,Bitu index) { cache_addd( STRB_IMM(src_reg, FC_REGS_ADDR, index) ); // strb src_reg, [FC_REGS_ADDR, #index] } #endif